{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"npm-github-computer-use-mcp","slug":"github-computer-use-mcp","name":"@github/computer-use-mcp","type":"mcp","url":"https://github.com/github/computer-use-mcp#readme","page_url":"https://unfragile.ai/github-computer-use-mcp","categories":["mcp-servers"],"tags":[],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"npm-github-computer-use-mcp__cap_0","uri":"capability://image.visual.desktop.screenshot.capture.and.analysis","name":"desktop-screenshot-capture-and-analysis","description":"Captures full-screen or region-specific screenshots from the host desktop and returns pixel-perfect image data in base64 format, enabling AI agents to visually perceive and analyze the current UI state. Integrates with native OS screenshot APIs (macOS/Linux/Windows) through Node.js bindings, providing sub-100ms capture latency for real-time visual feedback loops in agent decision-making.","intents":["I need my AI agent to see what's currently on the screen before deciding what action to take next","I want to build a visual debugging tool that captures desktop state at each step of an automated workflow","I need to verify UI changes after programmatic interactions by comparing before/after screenshots"],"best_for":["AI agent developers building desktop automation workflows","Teams implementing visual RPA (Robotic Process Automation) solutions","Developers creating cross-platform UI testing frameworks with LLM perception"],"limitations":["Screenshot capture is blocking — high-frequency polling (>10 Hz) may degrade performance","No built-in image compression — full screenshots can be 2-5MB uncompressed, increasing token usage in LLM context","Region-based capture requires precise pixel coordinates; no automatic UI element detection","Wayland display server support on Linux may be limited depending on compositor implementation"],"requires":["Node.js 16+","MCP client supporting binary/base64 resource types","Desktop environment with graphics output (headless servers not supported)","Read permissions to display server (X11/Wayland on Linux, native APIs on macOS/Windows)"],"input_types":["region coordinates (optional: x, y, width, height)","format specification (PNG, JPEG)"],"output_types":["base64-encoded image data","image metadata (dimensions, format, capture timestamp)"],"categories":["image-visual","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-github-computer-use-mcp__cap_1","uri":"capability://tool.use.integration.mouse.cursor.movement.and.clicking","name":"mouse-cursor-movement-and-clicking","description":"Enables precise mouse cursor positioning and click operations (single-click, double-click, right-click) at specified screen coordinates, translating high-level agent intents into low-level input events. Uses native OS input APIs (Xdotool on Linux, CGEvent on macOS, SendInput on Windows) to simulate human-like mouse interactions with configurable timing and movement curves to avoid detection as automated input.","intents":["I need my agent to click on UI elements identified from screenshots to interact with applications","I want to automate mouse-driven workflows like form filling or menu navigation across different applications","I need to simulate human-like mouse movement patterns to interact with applications that detect rapid/unnatural input"],"best_for":["Desktop automation engineers building cross-platform RPA solutions","AI agent developers creating interactive workflow orchestrators","QA automation teams implementing visual regression testing with agent-driven interactions"],"limitations":["No built-in coordinate mapping — agent must translate visual element positions from screenshots to screen coordinates","Click timing is not synchronized with application event loops — rapid clicks may be missed if application is processing","No drag-and-drop support in base implementation — requires multiple move + click operations","Mouse movement curves are deterministic — sophisticated input detection systems may still identify as automated"],"requires":["Node.js 16+","MCP client with input event capability","Desktop environment with input device access","On Linux: Xdotool or similar input simulation tool installed","Appropriate user permissions (may require sudo on some systems)"],"input_types":["x coordinate (integer, pixels)","y coordinate (integer, pixels)","click type (left, right, double)","optional: movement duration (milliseconds)"],"output_types":["confirmation of click execution","error status if coordinates out of bounds"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-github-computer-use-mcp__cap_10","uri":"capability://automation.workflow.operation.logging.and.audit.trail","name":"operation-logging-and-audit-trail","description":"Maintains a detailed audit trail of all operations performed by agents, including operation type, parameters, timestamp, and result. Logs are stored locally and can be retrieved through MCP interface for debugging, compliance, or workflow analysis. Implements structured logging with configurable verbosity levels and optional sensitive data redaction for security-sensitive operations.","intents":["I need to audit what operations my agent performed for compliance or debugging purposes","I want to replay a failed workflow by examining the operation log","I need to redact sensitive data (passwords, API keys) from logs for security"],"best_for":["Enterprise automation systems requiring audit trails for compliance","Debugging complex automation workflows by examining operation history","Security-sensitive automation that requires sensitive data redaction"],"limitations":["Logging adds disk I/O overhead — high-frequency operations may impact performance","Log storage is unbounded — long-running agents may accumulate large log files","Sensitive data redaction is pattern-based — may miss some sensitive information","No built-in log rotation or archival — requires external log management"],"requires":["Node.js 16+","Disk space for log storage","MCP client that supports log retrieval"],"input_types":["log query parameters (time range, operation type, verbosity level)"],"output_types":["structured log entries (timestamp, operation, parameters, result)","log statistics (operation count, error count)"],"categories":["automation-workflow","safety-moderation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-github-computer-use-mcp__cap_2","uri":"capability://tool.use.integration.keyboard.input.simulation.with.hotkey.support","name":"keyboard-input-simulation-with-hotkey-support","description":"Simulates keyboard input including text typing, individual key presses, and multi-key hotkey combinations (Ctrl+C, Cmd+Z, etc.) at the OS level. Implements key event queuing with configurable inter-key delays to simulate human typing speed, and supports modifier key combinations for application shortcuts. Routes through native OS keyboard APIs to ensure compatibility with applications that validate input source.","intents":["I need my agent to type text into form fields and text editors identified from screenshots","I want to trigger application hotkeys and keyboard shortcuts (Ctrl+S, Cmd+Q) as part of automated workflows","I need to simulate realistic human typing speed with variable delays between keystrokes to avoid detection"],"best_for":["Desktop automation developers building form-filling and data-entry workflows","AI agents automating text-based application interactions","Cross-platform automation engineers needing unified keyboard input abstraction"],"limitations":["No support for IME (Input Method Editor) — cannot type non-ASCII characters in some applications without additional configuration","Keyboard layout is assumed to be QWERTY — special characters may map incorrectly on non-QWERTY layouts","No built-in validation that target application has focus — keystrokes may go to wrong window if focus changes","Hotkey combinations are limited to standard modifiers (Ctrl, Shift, Alt, Cmd) — application-specific key bindings not supported"],"requires":["Node.js 16+","MCP client with keyboard input capability","Desktop environment with keyboard input access","Target application window must have focus (agent responsible for ensuring focus)"],"input_types":["text string (for typing)","key name (for individual keys: 'Enter', 'Tab', 'Escape')","modifier array (for hotkeys: ['ctrl', 'shift', 'c'])","optional: typing speed (characters per second)"],"output_types":["confirmation of input execution","error if invalid key names or modifier combinations"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-github-computer-use-mcp__cap_3","uri":"capability://tool.use.integration.mcp.protocol.server.implementation","name":"mcp-protocol-server-implementation","description":"Implements a complete MCP (Model Context Protocol) server that exposes computer-use capabilities as standardized MCP resources and tools, enabling any MCP-compatible client (Claude, custom agents, etc.) to discover and invoke desktop automation functions. Uses JSON-RPC 2.0 transport over stdio or network sockets, with automatic capability advertisement through MCP's resource and tool schemas.","intents":["I want to connect my Claude instance or custom LLM agent to desktop automation capabilities via the standard MCP protocol","I need to expose computer-use tools to multiple AI clients without reimplementing the integration for each one","I want to build an agent that can discover available desktop automation capabilities at runtime through MCP introspection"],"best_for":["AI agent developers integrating desktop automation into Claude or other MCP-compatible LLMs","Teams building multi-agent systems where desktop automation is one capability among many","Developers creating standardized automation infrastructure that multiple clients need to access"],"limitations":["MCP protocol overhead adds ~50-100ms latency per tool invocation due to JSON-RPC serialization","No built-in authentication — MCP server assumes trusted client environment; requires external auth layer for untrusted networks","Stdio transport is blocking — high-frequency tool calls may create backpressure in the communication channel","No rate limiting or quota management — malicious clients could spam tool invocations"],"requires":["Node.js 16+","MCP client library (Claude SDK, custom MCP client, etc.)","Network connectivity if using socket transport (localhost sufficient for local agents)"],"input_types":["MCP tool call requests (JSON-RPC format)","MCP resource requests for capability discovery"],"output_types":["MCP tool results (JSON-RPC responses)","MCP resource descriptions (capability schemas)"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-github-computer-use-mcp__cap_4","uri":"capability://tool.use.integration.multi.monitor.and.virtual.display.support","name":"multi-monitor-and-virtual-display-support","description":"Detects and handles multiple physical monitors and virtual display configurations, allowing agents to capture screenshots and perform interactions across the entire display landscape. Maintains a coordinate system that maps logical screen positions to physical monitor positions, enabling agents to work with multi-monitor setups without explicit monitor selection. Automatically detects display topology changes and updates coordinate mappings.","intents":["I need my agent to interact with applications spread across multiple monitors without manual monitor selection","I want to automate workflows that span multiple displays (e.g., reference document on one monitor, data entry on another)","I need to handle dynamic display configurations where monitors are connected/disconnected during agent execution"],"best_for":["Enterprise automation teams with multi-monitor workstations","Remote desktop automation scenarios with variable display configurations","AI agents managing complex workflows requiring simultaneous interaction with multiple applications"],"limitations":["Coordinate mapping assumes static display topology — dynamic monitor hotplug during execution may cause coordinate misalignment","No built-in display scaling awareness — DPI scaling on high-resolution monitors may cause coordinate offset errors","Virtual display detection is OS-specific — some virtual display drivers may not be recognized","Screenshot capture across monitors may have timing skew if monitors have different refresh rates"],"requires":["Node.js 16+","Multi-monitor display configuration (or virtual display driver)","OS-level display enumeration APIs (Xrandr on Linux, NSScreen on macOS, EnumDisplayMonitors on Windows)"],"input_types":["global screen coordinates (x, y)","optional: monitor index for explicit monitor selection"],"output_types":["display topology metadata (monitor count, resolutions, positions)","coordinate mapping information"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-github-computer-use-mcp__cap_5","uri":"capability://tool.use.integration.application.window.enumeration.and.focus.control","name":"application-window-enumeration-and-focus-control","description":"Enumerates open application windows on the desktop and provides window focus control, allowing agents to switch between applications and ensure keyboard/mouse input targets the correct window. Returns window metadata including title, process ID, window bounds, and focus state. Implements platform-specific window management (wmctrl on Linux, NSWindow API on macOS, Windows API on Windows) with a unified interface.","intents":["I need my agent to switch between multiple open applications as part of a multi-app workflow","I want to verify that keyboard input is going to the correct application window before typing","I need to enumerate available windows to dynamically select the target for the next interaction"],"best_for":["Multi-application automation workflows requiring window switching","AI agents that need to verify window focus before performing input operations","Desktop automation frameworks that manage complex application interactions"],"limitations":["Window enumeration is asynchronous — window list may be stale if applications open/close during enumeration","Focus control is not atomic — window may lose focus between focus command and subsequent input operation","Some applications (e.g., fullscreen games, privileged applications) may not be enumerable or focusable","Window titles are not unique — multiple windows may have identical titles, requiring PID-based disambiguation"],"requires":["Node.js 16+","Desktop environment with window manager (X11/Wayland on Linux, native on macOS/Windows)","On Linux: wmctrl or xdotool installed"],"input_types":["window selector (by title, PID, or index)","focus command (bring to foreground, activate)"],"output_types":["window list with metadata (title, PID, bounds, focus state)","confirmation of focus change"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-github-computer-use-mcp__cap_6","uri":"capability://tool.use.integration.clipboard.read.write.operations","name":"clipboard-read-write-operations","description":"Provides read and write access to the system clipboard, enabling agents to exchange text data with applications through copy/paste operations. Implements platform-specific clipboard APIs (xclip on Linux, NSPasteboard on macOS, Windows Clipboard API) with support for both text and rich text formats. Allows agents to retrieve clipboard contents for verification or use clipboard as a data exchange mechanism.","intents":["I need my agent to copy text from an application and paste it into another as part of a data transfer workflow","I want to use clipboard as a communication channel between my agent and applications","I need to verify clipboard contents after a copy operation to ensure data was captured correctly"],"best_for":["Data transfer workflows that leverage copy/paste as a mechanism","Agents that need to exchange data with applications lacking direct API access","Cross-application automation where clipboard is the primary data exchange method"],"limitations":["Clipboard is a shared resource — concurrent agents may overwrite each other's clipboard contents","No built-in clipboard history — only current clipboard contents are accessible","Rich text format support is limited — most operations use plain text","Clipboard access may be restricted in sandboxed or remote environments"],"requires":["Node.js 16+","System clipboard access (may require permissions on some systems)","On Linux: xclip or xsel installed"],"input_types":["text string (for write operations)","format specification (plain text, rich text)"],"output_types":["clipboard contents (text string)","format metadata"],"categories":["tool-use-integration","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-github-computer-use-mcp__cap_7","uri":"capability://tool.use.integration.system.information.and.environment.detection","name":"system-information-and-environment-detection","description":"Detects and reports system information including OS type/version, available displays, installed applications, and environment variables, enabling agents to adapt behavior based on system capabilities and configuration. Queries OS-level APIs to gather hardware information (CPU, memory, display resolution) and software environment (installed packages, PATH, environment variables). Provides this metadata to agents for capability negotiation and conditional execution.","intents":["I need my agent to detect the OS and available capabilities before attempting platform-specific operations","I want to gather system information to verify the environment meets requirements for a workflow","I need to detect installed applications to determine which tools are available for automation"],"best_for":["Cross-platform automation frameworks that need to adapt to different OS environments","Agents that need to verify system capabilities before executing workflows","Multi-tenant automation systems that need to report environment metadata"],"limitations":["Application detection is heuristic-based — not all installed applications may be detected","Environment variable access may be restricted in sandboxed environments","Hardware information is read-only — agents cannot modify system configuration","Some system information may require elevated privileges to access"],"requires":["Node.js 16+","Read access to OS system information APIs"],"input_types":["information type selector (os, displays, applications, environment)"],"output_types":["system metadata (OS type, version, architecture)","display information (count, resolutions, positions)","application list","environment variables"],"categories":["tool-use-integration","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-github-computer-use-mcp__cap_8","uri":"capability://automation.workflow.error.recovery.and.state.validation","name":"error-recovery-and-state-validation","description":"Implements error handling and recovery mechanisms for failed operations, including retry logic with exponential backoff, state validation after operations, and detailed error reporting. Validates that operations succeeded by comparing expected state (e.g., window focus, clipboard contents) with actual state, and provides detailed error messages including OS error codes and recovery suggestions. Enables agents to detect and recover from transient failures without explicit error handling logic.","intents":["I need my agent to automatically retry failed operations (e.g., click on element that wasn't ready) without explicit retry logic","I want detailed error information when operations fail so I can debug automation issues","I need my agent to validate that operations succeeded before proceeding to the next step"],"best_for":["Long-running automation workflows that need resilience to transient failures","Agents that need detailed error diagnostics for debugging","Automation systems that must handle variable application response times"],"limitations":["Retry logic is generic — application-specific retry strategies may not be optimal","State validation is limited to observable state — internal application state cannot be verified","Exponential backoff may be too aggressive for some applications — configurable backoff not exposed","Error recovery suggestions are heuristic-based and may not apply to all failure scenarios"],"requires":["Node.js 16+","MCP client that supports error handling and retry semantics"],"input_types":["operation to execute","optional: retry count, backoff strategy"],"output_types":["operation result or error with recovery suggestions","state validation results"],"categories":["automation-workflow","safety-moderation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-github-computer-use-mcp__cap_9","uri":"capability://automation.workflow.performance.monitoring.and.operation.timing","name":"performance-monitoring-and-operation-timing","description":"Tracks performance metrics for each operation including execution time, latency, and resource usage, enabling agents and developers to identify bottlenecks and optimize workflows. Records timing information for screenshot capture, input operations, and window management, and exposes metrics through MCP interface. Implements low-overhead instrumentation that doesn't significantly impact operation latency.","intents":["I need to identify which operations are slow in my automation workflow to optimize performance","I want to monitor resource usage (CPU, memory) of desktop automation operations","I need to track operation timing to detect performance regressions in my automation system"],"best_for":["Performance-sensitive automation workflows that need optimization","Teams monitoring automation system health and efficiency","Developers debugging slow automation workflows"],"limitations":["Instrumentation overhead is minimal but non-zero (~1-5ms per operation)","Resource usage metrics are OS-specific and may not be available on all platforms","No built-in performance profiling — detailed profiling requires external tools","Metrics are per-operation — end-to-end workflow timing requires agent-side aggregation"],"requires":["Node.js 16+","MCP client that supports metrics retrieval"],"input_types":["metrics type selector (timing, resource usage, operation count)"],"output_types":["performance metrics (operation timing, resource usage)","aggregated statistics (min, max, average, percentiles)"],"categories":["automation-workflow","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":44,"verified":false,"data_access_risk":"high","permissions":["Node.js 16+","MCP client supporting binary/base64 resource types","Desktop environment with graphics output (headless servers not supported)","Read permissions to display server (X11/Wayland on Linux, native APIs on macOS/Windows)","MCP client with input event capability","Desktop environment with input device access","On Linux: Xdotool or similar input simulation tool installed","Appropriate user permissions (may require sudo on some systems)","Disk space for log storage","MCP client that supports log retrieval"],"failure_modes":["Screenshot capture is blocking — high-frequency polling (>10 Hz) may degrade performance","No built-in image compression — full screenshots can be 2-5MB uncompressed, increasing token usage in LLM context","Region-based capture requires precise pixel coordinates; no automatic UI element detection","Wayland display server support on Linux may be limited depending on compositor implementation","No built-in coordinate mapping — agent must translate visual element positions from screenshots to screen coordinates","Click timing is not synchronized with application event loops — rapid clicks may be missed if application is processing","No drag-and-drop support in base implementation — requires multiple move + click operations","Mouse movement curves are deterministic — sophisticated input detection systems may still identify as automated","Logging adds disk I/O overhead — high-frequency operations may impact performance","Log storage is unbounded — long-running agents may accumulate large log files","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.5908928850934951,"quality":0.32,"ecosystem":0.39999999999999997,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.25,"quality":0.25,"ecosystem":0.15,"match_graph":0.23,"freshness":0.12}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:23.902Z","last_scraped_at":"2026-05-03T14:04:47.472Z","last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":45023,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=github-computer-use-mcp","compare_url":"https://unfragile.ai/compare?artifact=github-computer-use-mcp"}},"signature":"WsAMoWla8OVD2/0OvGCaKeAWsqmADr5PO4RjGo5zKu0qxMexuotGSnYdKlmW22cKAMC24YtKUHKKEA9q/MdBDA==","signedAt":"2026-06-20T20:14:46.308Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/github-computer-use-mcp","artifact":"https://unfragile.ai/github-computer-use-mcp","verify":"https://unfragile.ai/api/v1/verify?slug=github-computer-use-mcp","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}