{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"npm_npm-githubcomputer-use-mcp","slug":"npm-githubcomputer-use-mcp","name":"@github/computer-use-mcp","type":"mcp","url":"https://www.npmjs.com/package/@github/computer-use-mcp","page_url":"https://unfragile.ai/npm-githubcomputer-use-mcp","categories":["mcp-servers"],"tags":[],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"npm_npm-githubcomputer-use-mcp__cap_0","uri":"capability://tool.use.integration.gui.automation.via.standardized.mcp.protocol","name":"gui automation via standardized mcp protocol","description":"Exposes computer screen interaction (mouse, keyboard, screenshot capture) through the Model Context Protocol (MCP), enabling LLM agents to control desktop applications and web interfaces programmatically. Implements MCP server specification with tools for screenshot capture, mouse movement/clicking, and keyboard input, allowing any MCP-compatible client (Claude, custom agents) to orchestrate GUI interactions without direct OS-level bindings.","intents":["I want my LLM agent to interact with desktop applications that don't have APIs","I need to automate repetitive GUI workflows like form filling or data entry across multiple applications","I want to give Claude or another LLM the ability to see and control my screen to complete tasks"],"best_for":["AI agent developers building autonomous task automation systems","Teams integrating Claude with legacy or proprietary desktop software","Researchers prototyping LLM-driven UI automation without building custom integrations"],"limitations":["No built-in OCR — relies on LLM's vision capabilities to interpret screen content, limiting accuracy on complex layouts","Latency overhead from screenshot encoding/transmission per action cycle (typically 500ms-2s round-trip)","No native support for multi-monitor setups or window-specific targeting — operates on full screen coordinates only","Requires MCP client implementation; not directly usable as a standalone tool without wrapping in an agent framework"],"requires":["Node.js 16+ (MCP server runtime)","MCP-compatible client (Claude API with MCP support, or custom agent framework)","Display server access (X11/Wayland on Linux, native on macOS/Windows)","Appropriate OS permissions for input simulation and screenshot capture"],"input_types":["coordinate pairs (x, y for mouse)","keyboard input strings","screenshot request signals"],"output_types":["PNG/JPEG screenshot data (base64 encoded)","confirmation messages for input actions","structured metadata about screen state"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm_npm-githubcomputer-use-mcp__cap_1","uri":"capability://image.visual.screenshot.capture.with.llm.compatible.encoding","name":"screenshot capture with llm-compatible encoding","description":"Captures the current display state and encodes it as base64-encoded image data (PNG/JPEG) compatible with multimodal LLM vision APIs. Implements efficient screenshot serialization that balances image quality with token efficiency, allowing LLMs to analyze screen content for decision-making in automation loops.","intents":["I need my LLM agent to see what's currently on screen to decide what action to take next","I want to capture screen state at specific points in a workflow for debugging or logging","I need to feed visual context to a vision model to interpret UI elements or extract information"],"best_for":["LLM agent developers building perception-action loops","Automation engineers debugging GUI interaction failures","Researchers studying LLM reasoning over visual UI state"],"limitations":["No selective region capture — always captures full screen, increasing token usage for large displays","Encoding overhead adds 100-300ms per screenshot depending on resolution and compression","No built-in image optimization or downsampling — relies on client-side compression or LLM token budgeting","Vision accuracy depends entirely on LLM's visual understanding; no structured UI parsing or element detection"],"requires":["Node.js 16+","Display server with screenshot capability (X11, Wayland, native macOS/Windows)","MCP client with image/base64 support in tool responses"],"input_types":["screenshot request (no parameters)"],"output_types":["base64-encoded PNG or JPEG image data","image metadata (dimensions, format)"],"categories":["image-visual","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm_npm-githubcomputer-use-mcp__cap_2","uri":"capability://tool.use.integration.mouse.control.with.absolute.positioning","name":"mouse control with absolute positioning","description":"Enables LLM agents to move the mouse cursor to absolute screen coordinates and perform click actions (left, right, double-click). Implements coordinate-based input without relative motion or gesture support, requiring the agent to calculate target positions based on visual feedback from screenshots.","intents":["I want my agent to click on UI elements it identified in a screenshot","I need to move the mouse to a specific location before typing or performing another action","I want to simulate right-click context menus for accessing application features"],"best_for":["Automation developers building click-based workflows on web and desktop UIs","Teams automating data entry or form submission across applications","Researchers testing LLM spatial reasoning on screen coordinates"],"limitations":["Absolute positioning only — no relative motion, drag-and-drop, or gesture support (swipes, pinches)","No collision detection or validation — agent can click on invalid coordinates without feedback","Coordinate system is global screen space; no window-relative or element-relative targeting","No hover state tracking — agent cannot detect hover-triggered UI changes without taking another screenshot"],"requires":["Node.js 16+","OS input simulation permissions (may require elevated privileges on some systems)","MCP client capable of sending tool calls with numeric coordinate parameters"],"input_types":["x coordinate (integer, 0 to screen width)","y coordinate (integer, 0 to screen height)","click type (left, right, double)"],"output_types":["confirmation message (e.g., 'clicked at 512, 384')","error message if coordinates out of bounds"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm_npm-githubcomputer-use-mcp__cap_3","uri":"capability://tool.use.integration.keyboard.input.with.text.and.special.key.support","name":"keyboard input with text and special key support","description":"Allows LLM agents to send keyboard input including text strings and special keys (Enter, Tab, Escape, arrow keys, etc.) to the focused application. Implements key event simulation at the OS level, enabling agents to type into forms, navigate menus, and trigger keyboard shortcuts without requiring visual feedback between keystrokes.","intents":["I want my agent to type text into a form field or search box","I need to send keyboard shortcuts (Ctrl+C, Cmd+V) to interact with applications","I want to navigate UI menus using arrow keys and Enter"],"best_for":["Automation engineers building text-input workflows (form filling, search, code entry)","Teams automating keyboard-driven applications (terminals, IDEs, legacy software)","Researchers studying LLM text generation for UI interaction"],"limitations":["No keyboard state awareness — agent cannot detect if Caps Lock or Num Lock is active","No input validation or error recovery — typing into wrong field fails silently without feedback","Special key support depends on OS and keyboard layout; non-ASCII characters may not work reliably","No modifier key state tracking — agent must explicitly send Shift/Ctrl/Alt with each key combination"],"requires":["Node.js 16+","OS input simulation permissions","MCP client capable of sending tool calls with string and key parameters"],"input_types":["text string (arbitrary length)","special key name (Enter, Tab, Escape, ArrowUp, ArrowDown, ArrowLeft, ArrowRight, etc.)","modifier combinations (Ctrl+, Shift+, Alt+, Cmd+)"],"output_types":["confirmation message (e.g., 'typed 'hello world'')","error message if key not recognized"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm_npm-githubcomputer-use-mcp__cap_4","uri":"capability://tool.use.integration.mcp.server.lifecycle.and.tool.registration","name":"mcp server lifecycle and tool registration","description":"Implements the MCP server specification, registering screenshot, mouse, and keyboard tools as discoverable capabilities that MCP clients can invoke. Handles MCP protocol handshake, tool schema definition, and request/response serialization, enabling any MCP-compatible client to discover and call computer-use tools without hardcoding tool names.","intents":["I want to integrate computer-use capabilities into my MCP-compatible agent framework","I need my LLM client to discover available GUI automation tools dynamically","I want to build a custom agent that uses MCP tools for computer control"],"best_for":["MCP client developers (Claude API, custom agent frameworks) integrating computer use","Teams building standardized agent platforms that support multiple tool providers","Researchers prototyping LLM agent architectures with pluggable tool systems"],"limitations":["MCP protocol overhead adds latency to each tool invocation (typically 50-200ms per round-trip)","Tool schema must be statically defined at server startup — no dynamic tool registration based on runtime state","No built-in authentication or access control — assumes trusted client environment","Error handling relies on MCP error response format; no custom exception types or detailed diagnostics"],"requires":["Node.js 16+","MCP client library or framework (e.g., @anthropic-sdk/sdk with MCP support)","Understanding of MCP protocol and tool schema format"],"input_types":["MCP protocol messages (tool_call, resource_read, etc.)"],"output_types":["MCP tool definitions (schema, description, parameters)","MCP tool responses (result, error)"],"categories":["tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm_npm-githubcomputer-use-mcp__cap_5","uri":"capability://planning.reasoning.agent.driven.perception.action.loop.orchestration","name":"agent-driven perception-action loop orchestration","description":"Enables LLM agents to execute multi-step automation workflows by composing screenshot analysis with mouse/keyboard actions in tight feedback loops. The agent perceives screen state via screenshots, reasons about next actions, and executes them via mouse/keyboard tools, repeating until task completion. Supports iterative refinement where agents can correct mistakes by taking new screenshots and adjusting subsequent actions.","intents":["I want my agent to complete a multi-step task like filling a form, submitting it, and verifying the result","I need my agent to recover from mistakes by detecting failures in screenshots and retrying with different actions","I want to build a workflow that adapts to dynamic UI changes by re-analyzing the screen state"],"best_for":["Automation engineers building resilient, adaptive workflows for complex applications","Teams automating business processes that require visual feedback and error recovery","Researchers studying LLM reasoning and planning in interactive environments"],"limitations":["Latency compounds with loop iterations — each screenshot + action cycle adds 500ms-2s, making long workflows slow","No built-in state machine or workflow definition — agent must maintain task context and progress in its reasoning","No timeout or loop-break mechanisms — agent can get stuck in infinite loops if it misinterprets screen state","Vision-based error detection is unreliable — agent may not recognize failures or may misinterpret error messages"],"requires":["Node.js 16+","MCP-compatible LLM client with vision capabilities (Claude 3.5+)","Sufficient LLM context window to maintain task state across multiple action cycles"],"input_types":["task description (natural language)","initial screen state (screenshot)"],"output_types":["task completion status","sequence of actions taken","final screen state"],"categories":["planning-reasoning","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":40,"verified":false,"data_access_risk":"high","permissions":["Node.js 16+ (MCP server runtime)","MCP-compatible client (Claude API with MCP support, or custom agent framework)","Display server access (X11/Wayland on Linux, native on macOS/Windows)","Appropriate OS permissions for input simulation and screenshot capture","Node.js 16+","Display server with screenshot capability (X11, Wayland, native macOS/Windows)","MCP client with image/base64 support in tool responses","OS input simulation permissions (may require elevated privileges on some systems)","MCP client capable of sending tool calls with numeric coordinate parameters","OS input simulation permissions"],"failure_modes":["No built-in OCR — relies on LLM's vision capabilities to interpret screen content, limiting accuracy on complex layouts","Latency overhead from screenshot encoding/transmission per action cycle (typically 500ms-2s round-trip)","No native support for multi-monitor setups or window-specific targeting — operates on full screen coordinates only","Requires MCP client implementation; not directly usable as a standalone tool without wrapping in an agent framework","No selective region capture — always captures full screen, increasing token usage for large displays","Encoding overhead adds 100-300ms per screenshot depending on resolution and compression","No built-in image optimization or downsampling — relies on client-side compression or LLM token budgeting","Vision accuracy depends entirely on LLM's visual understanding; no structured UI parsing or element detection","Absolute positioning only — no relative motion, drag-and-drop, or gesture support (swipes, pinches)","No collision detection or validation — agent can click on invalid coordinates without feedback","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.5908928850934951,"quality":0.22,"ecosystem":0.3,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.25,"quality":0.25,"ecosystem":0.15,"match_graph":0.23,"freshness":0.12}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:23.903Z","last_scraped_at":"2026-05-03T14:23:34.421Z","last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":45023,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=npm-githubcomputer-use-mcp","compare_url":"https://unfragile.ai/compare?artifact=npm-githubcomputer-use-mcp"}},"signature":"MiL4XlT+JviYCR4m4Aj4AYTdFg3v7YtnJbHwu8ingHDk8+suWE0Lhxz8C2Jh/Jg36oIEeDsntxZZHVp5kzyvCg==","signedAt":"2026-06-19T22:09:38.470Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/npm-githubcomputer-use-mcp","artifact":"https://unfragile.ai/npm-githubcomputer-use-mcp","verify":"https://unfragile.ai/api/v1/verify?slug=npm-githubcomputer-use-mcp","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}