{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"github-microsoft--ufo","slug":"microsoft--ufo","name":"UFO","type":"repo","url":"https://microsoft.github.io/UFO/","page_url":"https://unfragile.ai/microsoft--ufo","categories":["ai-agents"],"tags":["agent","automation","copilot","gui","llm","windows"],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"github-microsoft--ufo__cap_0","uri":"capability://automation.workflow.gui.based.desktop.automation.via.visual.understanding.and.ui.control","name":"gui-based desktop automation via visual understanding and ui control","description":"UFO² captures Windows desktop screenshots, annotates UI elements with bounding boxes and semantic labels, and executes actions (clicks, text input, keyboard commands) by mapping LLM-generated action descriptions to concrete UI coordinates. The system uses OCR and UI inspection APIs (COM-based Windows Automation Framework) to build a semantic representation of the screen state, enabling the agent to interact with any Windows application without requiring native API bindings or application-specific integrations.","intents":["Automate repetitive Windows desktop tasks like form filling, data entry, or multi-app workflows without writing scripts","Build agents that can interact with legacy or proprietary Windows applications that lack APIs","Enable non-technical users to define automation through natural language rather than code"],"best_for":["Enterprise automation teams managing Windows-heavy workflows","RPA practitioners replacing UiPath or Blue Prism with open-source alternatives","Developers building copilots for Windows desktop applications"],"limitations":["Windows-only — no native support for macOS or Linux desktop automation","Screenshot-based perception introduces latency (~500ms per perception cycle) and can fail on dynamic or rapidly changing UIs","Coordinate-based clicking is fragile to screen resolution changes; requires annotation system to remain synchronized","No built-in handling of modal dialogs, overlays, or off-screen UI elements"],"requires":["Windows 10 or later with COM automation support enabled","Python 3.9+","LLM API key (OpenAI, Anthropic, or local Ollama instance)","Administrator privileges for some UI inspection operations"],"input_types":["natural language task descriptions","desktop screenshots (PNG/JPEG)","UI element annotations (bounding boxes, labels)"],"output_types":["action sequences (click, type, keyboard commands)","execution logs with screenshots and action traces"],"categories":["automation-workflow","image-visual"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-microsoft--ufo__cap_1","uri":"capability://planning.reasoning.multi.device.task.orchestration.via.constellation.agent.and.galaxy.framework","name":"multi-device task orchestration via constellation agent and galaxy framework","description":"UFO³ Galaxy enables a Constellation Agent to decompose high-level tasks into subtasks, distribute them across multiple registered Windows devices, and coordinate execution through an Agent Interaction Protocol (AIP). The system maintains device lifecycle state (registration, heartbeat, availability), routes tasks to appropriate devices based on capability matching, and aggregates results. Task Constellation manages task dependencies and execution order across heterogeneous devices in a network.","intents":["Orchestrate complex workflows that span multiple Windows machines (e.g., data processing on Device A, report generation on Device B)","Build distributed automation systems where different devices handle different application domains","Scale automation from single-device to multi-device deployments without rewriting task logic"],"best_for":["Enterprise teams automating workflows across multiple Windows workstations or servers","Distributed RPA deployments requiring centralized task management","Organizations building multi-tenant automation platforms"],"limitations":["Requires network connectivity and stable device registration; device failures can cascade to dependent tasks","Task decomposition is LLM-driven and may not always produce optimal device assignments or task granularity","No built-in load balancing or task prioritization — relies on simple capability matching","Cross-device state synchronization adds latency; eventual consistency model may cause ordering issues in tightly-coupled workflows"],"requires":["UFO² agents running on each target Windows device","Network connectivity between Galaxy orchestrator and all devices","Device registration via Galaxy Web UI or API","LLM with structured output support (for task decomposition)"],"input_types":["natural language task descriptions","device capability declarations (e.g., 'has Excel', 'has SAP access')","task dependency graphs (optional)"],"output_types":["task execution plan with device assignments","aggregated results from all devices","execution logs with per-device traces"],"categories":["planning-reasoning","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-microsoft--ufo__cap_10","uri":"capability://automation.workflow.galaxy.web.ui.for.task.submission.monitoring.and.device.management","name":"galaxy web ui for task submission, monitoring, and device management","description":"UFO³ provides a web-based interface for submitting automation tasks, monitoring execution progress, viewing device status, and managing device registrations. The Web UI communicates with the Galaxy orchestrator via REST APIs, displays real-time execution logs and screenshots, and allows users to pause/resume/cancel tasks. Supports role-based access control for multi-user environments.","intents":["Allow non-technical users to submit automation tasks without command-line access","Monitor automation execution in real-time with visual feedback (screenshots, logs)","Manage device registrations and capabilities through a user-friendly interface"],"best_for":["Non-technical business users who need to submit and monitor automation tasks","Operations teams managing multiple automation deployments","Organizations requiring audit trails and execution visibility"],"limitations":["Web UI is read-only for execution logs; cannot modify running tasks beyond pause/resume/cancel","Real-time updates rely on polling or WebSocket; high-frequency updates can overload the server","No built-in support for task scheduling or recurring automation; requires external scheduler","Role-based access control is basic; no fine-grained permission model"],"requires":["Web browser (Chrome, Firefox, Safari, Edge)","Network access to Galaxy orchestrator","REST API endpoints exposed by Galaxy"],"input_types":["task descriptions (text input)","device selections (dropdown)","task parameters (form fields)"],"output_types":["task execution status (running, completed, failed)","execution logs (text)","screenshots (images)","device status (online, offline, busy)"],"categories":["automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-microsoft--ufo__cap_11","uri":"capability://automation.workflow.configuration.system.with.agent.device.and.llm.settings","name":"configuration system with agent, device, and llm settings","description":"UFO³ uses a hierarchical configuration system (YAML/JSON files) to define agent behavior, device capabilities, LLM provider settings, and knowledge base sources. Configuration files are organized by scope: agent-level (model selection, prompt templates), device-level (capabilities, resource constraints), and system-level (Galaxy settings, database connections). The system supports configuration inheritance and environment variable substitution, enabling flexible deployment across development, staging, and production environments.","intents":["Configure agent behavior without code changes (model selection, prompt templates, tool availability)","Declare device capabilities and constraints for task routing","Manage LLM provider credentials and settings across multiple environments"],"best_for":["Teams deploying UFO³ across multiple environments (dev, staging, prod)","Organizations with strict credential management requirements","Scenarios requiring frequent configuration changes without code redeployment"],"limitations":["Configuration validation is minimal; invalid settings may only be detected at runtime","No built-in configuration versioning or rollback; changes are immediate","Configuration inheritance can be confusing; unclear which setting takes precedence","Environment variable substitution is basic; no support for complex templating"],"requires":["Configuration files (YAML or JSON)","Environment variables for sensitive data (API keys, credentials)","Python 3.9+"],"input_types":["YAML/JSON configuration files","environment variables"],"output_types":["parsed configuration objects","validation errors"],"categories":["automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-microsoft--ufo__cap_12","uri":"capability://automation.workflow.user.interaction.module.for.human.in.the.loop.automation","name":"user interaction module for human-in-the-loop automation","description":"UFO² includes a User Interaction Module that pauses automation and requests human input when the agent encounters ambiguous situations or needs confirmation. The module can display screenshots with annotations, ask multiple-choice questions, or request free-form text input. Responses are injected back into the agent's context, allowing it to continue with human guidance. Supports both synchronous (blocking) and asynchronous (non-blocking) interaction patterns.","intents":["Handle edge cases and ambiguous situations by requesting human clarification","Implement approval workflows where humans must confirm actions before execution","Reduce automation failures by allowing humans to guide agents through unexpected scenarios"],"best_for":["Workflows with approval requirements (e.g., financial transactions, sensitive data changes)","Scenarios with high failure rates due to UI variability or edge cases","Organizations requiring human oversight for compliance or risk management"],"limitations":["Blocking on human input introduces latency; long wait times can cause session timeouts","No built-in timeout mechanism; if human doesn't respond, automation hangs indefinitely","Asynchronous interaction is complex to implement; requires external notification system","Human responses are not validated; invalid responses can cause automation failures"],"requires":["User interface for displaying prompts and collecting responses (CLI, Web UI, or custom)","Notification system for asynchronous interaction (email, Slack, etc.)","Python 3.9+"],"input_types":["screenshots (for context)","question text","response options (for multiple-choice)"],"output_types":["human responses (text, selections)","confirmation decisions"],"categories":["automation-workflow","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-microsoft--ufo__cap_13","uri":"capability://data.processing.analysis.execution.logging.and.dataflow.tracking.with.lam.data.collection","name":"execution logging and dataflow tracking with lam data collection","description":"UFO³ logs all execution details (actions, observations, LLM responses, tool results) to structured logs that can be analyzed for debugging and improvement. The system captures LAM (Learning from Automation Metrics) data including action success rates, LLM reasoning quality, and tool call patterns. Logs include screenshots, action traces, and full context at each step, enabling post-mortem analysis of failures. Supports log export in multiple formats (JSON, CSV) and integration with external analytics platforms.","intents":["Debug automation failures by reviewing detailed execution logs with screenshots and action traces","Analyze agent behavior patterns to identify improvement opportunities","Collect metrics for monitoring automation reliability and performance"],"best_for":["Teams debugging complex automation failures","Organizations collecting metrics for continuous improvement","Scenarios requiring audit trails for compliance"],"limitations":["Logging adds significant overhead (~50-100ms per round) and storage requirements (~10-50MB per hour of automation)","Screenshots in logs consume large amounts of storage; requires compression or selective logging","Log analysis is manual; no built-in anomaly detection or automated failure diagnosis","LAM data collection requires careful privacy considerations (screenshots may contain sensitive data)"],"requires":["Persistent storage for logs (filesystem, database, or cloud storage)","Log analysis tools (built-in or external)","Python 3.9+"],"input_types":["execution events (actions, observations, responses)","screenshots","context information"],"output_types":["structured logs (JSON, CSV)","execution traces","metrics (success rates, latencies, etc.)"],"categories":["data-processing-analysis","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-microsoft--ufo__cap_2","uri":"capability://tool.use.integration.hybrid.action.execution.combining.llm.reasoning.with.deterministic.automation","name":"hybrid action execution combining llm reasoning with deterministic automation","description":"UFO² supports both LLM-generated actions (click, type, navigate) and deterministic automation actions (MCP tool calls, COM API invocations, PowerShell scripts). The system routes actions through an Automation Framework that dispatches to appropriate executors: GUI actions go to the screenshot-annotation-action loop, while tool calls invoke registered MCP servers or COM Application Receivers. This hybrid approach allows agents to use LLM reasoning for complex UI navigation while offloading structured tasks (data extraction, API calls) to deterministic tools.","intents":["Combine visual automation (clicking UI elements) with programmatic automation (API calls, database queries) in a single workflow","Reduce hallucination risk by using deterministic tools for well-defined operations while using LLM for unstructured UI navigation","Integrate with existing enterprise tools (SAP, Salesforce, custom APIs) without rewriting automation logic"],"best_for":["Teams with mixed automation needs (some tasks require UI interaction, others require API/database access)","Organizations with existing MCP servers or COM components that should be reused","Developers building agents that need both flexibility (LLM reasoning) and reliability (deterministic tools)"],"limitations":["Requires explicit registration of MCP servers or COM components; no auto-discovery","Context switching between LLM actions and tool calls adds latency and complexity to error handling","Tool schemas must be manually defined; no automatic schema inference from COM interfaces","Debugging hybrid workflows is harder because failures can occur in either the LLM reasoning or the tool execution layer"],"requires":["MCP servers running and accessible (for tool-use actions)","COM components registered on Windows (for COM Application Receivers)","Tool schema definitions in UFO configuration","Python 3.9+"],"input_types":["natural language task descriptions","tool schemas (JSON or Python type hints)","desktop screenshots (for GUI actions)"],"output_types":["action sequences (mixed GUI and tool actions)","structured tool results (JSON, CSV, etc.)","execution traces with action-level details"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-microsoft--ufo__cap_3","uri":"capability://image.visual.multi.modal.prompt.construction.with.screenshots.ocr.and.ui.annotations","name":"multi-modal prompt construction with screenshots, ocr, and ui annotations","description":"UFO² builds prompts that include desktop screenshots, extracted text (via OCR), and semantic UI annotations (element labels, bounding boxes, hierarchy). The Prompt System constructs multi-modal inputs by combining these modalities with task context and memory, then sends them to LLMs that support vision (GPT-4V, Claude 3.5). The system maintains a Prompt Component library that allows customization of how screenshots, OCR, and annotations are formatted and prioritized based on agent strategy.","intents":["Enable LLMs to understand complex desktop UIs by providing visual context alongside text descriptions","Reduce token usage by selectively including only relevant UI regions rather than full screenshots","Support agents in reasoning about UI state changes and element relationships"],"best_for":["Teams using vision-capable LLMs (GPT-4V, Claude 3.5, Gemini) for desktop automation","Scenarios where UI complexity requires visual reasoning (e.g., spreadsheets, design tools)","Organizations optimizing for token efficiency in long-running automation sessions"],"limitations":["Vision LLM API costs are significantly higher than text-only models (~10-20x per request)","OCR quality degrades on low-resolution screenshots, small fonts, or non-English text","Annotation system can become out-of-sync with actual UI if elements move or change rapidly","No built-in support for video or temporal sequences; each screenshot is treated independently"],"requires":["Vision-capable LLM API (OpenAI GPT-4V, Anthropic Claude 3.5, or compatible)","OCR engine (built-in or external service)","UI annotation system (part of UFO² core)"],"input_types":["desktop screenshots (PNG, JPEG)","task descriptions (natural language)","UI element metadata (bounding boxes, labels, hierarchy)"],"output_types":["multi-modal prompts (text + image + structured annotations)","LLM responses with action descriptions"],"categories":["image-visual","text-generation-language"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-microsoft--ufo__cap_4","uri":"capability://planning.reasoning.agent.state.machine.management.with.session.and.round.lifecycle","name":"agent state machine management with session and round lifecycle","description":"UFO² implements explicit state machines for both Host Agent (window/app selection state) and App Agent (UI interaction state). Sessions represent continuous automation contexts (e.g., 'automate Excel workbook'), while Rounds represent individual LLM reasoning cycles within a session. The system tracks state transitions, maintains context across rounds, and enforces valid state progressions. Session Pool manages multiple concurrent sessions, enabling parallel automation across different applications.","intents":["Maintain consistent agent behavior across multiple reasoning cycles by explicitly managing state","Enable recovery from failures by checkpointing session state and resuming from known points","Support concurrent automation of multiple applications without cross-contamination of context"],"best_for":["Complex automation workflows requiring multi-step reasoning and state consistency","Long-running automation sessions that need to survive transient failures","Scenarios with concurrent automation of multiple applications"],"limitations":["State machine complexity increases with number of valid state transitions; hard to reason about for complex workflows","Session checkpointing adds overhead (~100-200ms per round) and requires persistent storage","No built-in distributed session management; sessions are tied to a single UFO² instance","State transitions are synchronous; no support for asynchronous or event-driven state changes"],"requires":["Python 3.9+","Persistent storage for session checkpoints (local filesystem or database)","LLM with structured output support (for state transition decisions)"],"input_types":["task descriptions","current session state","round context (previous actions, observations)"],"output_types":["state transition decisions","action sequences","session checkpoints"],"categories":["planning-reasoning","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-microsoft--ufo__cap_5","uri":"capability://memory.knowledge.knowledge.base.integration.via.rag.system.with.vector.embeddings","name":"knowledge base integration via rag system with vector embeddings","description":"UFO³ includes a RAG (Retrieval-Augmented Generation) system that allows agents to query knowledge bases (documents, FAQs, process guides) using semantic search. The system embeds documents into a vector database, retrieves relevant context based on task descriptions, and injects retrieved knowledge into prompts. Supports multiple vector database backends and allows custom knowledge creation through document ingestion pipelines.","intents":["Provide agents with access to domain-specific knowledge (e.g., company process guides, API documentation) without fine-tuning","Reduce hallucination by grounding agent reasoning in retrieved facts","Enable non-technical users to update automation knowledge by adding documents to the knowledge base"],"best_for":["Organizations with extensive process documentation that should guide automation","Scenarios where agents need to reference external knowledge (APIs, regulations, company policies)","Teams building domain-specific automation copilots"],"limitations":["RAG quality depends on document quality and relevance; poor documents lead to poor retrievals","Semantic search can fail on domain-specific terminology or acronyms not well-represented in embeddings","Vector database synchronization adds latency (~200-500ms per retrieval) and requires maintenance","No built-in handling of document versioning or temporal knowledge (e.g., 'this process changed on date X')"],"requires":["Vector database (Chroma, Weaviate, Pinecone, or compatible)","Embedding model (OpenAI, Hugging Face, or local)","Document ingestion pipeline (built-in or custom)","Knowledge base documents in supported formats (PDF, TXT, Markdown)"],"input_types":["task descriptions (for semantic search)","documents (for knowledge base ingestion)","query strings"],"output_types":["retrieved document chunks","augmented prompts with knowledge context","relevance scores"],"categories":["memory-knowledge","search-retrieval"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-microsoft--ufo__cap_6","uri":"capability://tool.use.integration.llm.provider.abstraction.with.support.for.multiple.models.and.custom.integrations","name":"llm provider abstraction with support for multiple models and custom integrations","description":"UFO³ abstracts LLM interactions through a Service Architecture that supports OpenAI, Anthropic, Azure OpenAI, and local Ollama instances. The system handles model-specific differences (function calling schemas, vision capabilities, structured output formats) through adapter patterns. Agents can specify preferred LLM providers in configuration, and the system routes requests accordingly. Supports custom model integration through a plugin interface.","intents":["Switch between LLM providers (OpenAI, Anthropic, local Ollama) without changing agent code","Use different models for different tasks (e.g., GPT-4V for visual reasoning, GPT-3.5 for simple decisions)","Integrate proprietary or fine-tuned models into the automation framework"],"best_for":["Organizations with multi-cloud or hybrid LLM strategies","Teams optimizing for cost by using cheaper models for simple tasks and expensive models for complex reasoning","Enterprises with on-premises LLM requirements (Ollama, vLLM)"],"limitations":["Model-specific features (function calling, vision, structured output) may not be available across all providers; requires fallback logic","Response format differences between providers can cause parsing errors; requires normalization layer","Rate limiting and quota management are provider-specific and not abstracted","Custom model integration requires implementing adapter interface; no auto-detection"],"requires":["API keys for selected LLM providers (OpenAI, Anthropic, Azure, etc.)","Configuration specifying preferred providers and models","Python 3.9+"],"input_types":["prompts (text and multi-modal)","function schemas (for tool calling)","configuration specifying provider and model"],"output_types":["LLM responses (text, structured JSON, function calls)","normalized response objects"],"categories":["tool-use-integration","text-generation-language"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-microsoft--ufo__cap_7","uri":"capability://data.processing.analysis.structured.output.and.response.parsing.with.schema.validation","name":"structured output and response parsing with schema validation","description":"UFO³ uses structured output formats (JSON schemas, Pydantic models) to constrain LLM responses and enable reliable parsing. The system defines schemas for agent actions (click, type, navigate), task decomposition results, and tool call parameters. LLMs that support structured output (OpenAI JSON mode, Anthropic structured output) are used to generate responses matching these schemas. Responses are validated against schemas before execution, preventing malformed actions.","intents":["Ensure LLM-generated actions are well-formed and executable without manual validation","Enable reliable parsing of complex responses (multi-step action sequences, task decompositions)","Reduce hallucination by constraining LLM output to valid action spaces"],"best_for":["Automation workflows requiring high reliability and low error rates","Scenarios with complex action sequences that need to be parsed and validated","Teams building production automation systems where malformed actions are costly"],"limitations":["Schema constraints may limit LLM expressiveness; some valid reasoning patterns may not fit schema","Not all LLM providers support structured output; fallback to regex parsing is less reliable","Schema evolution is difficult; changing schemas requires revalidating existing workflows","Validation errors provide limited debugging information; hard to understand why LLM response didn't match schema"],"requires":["LLM with structured output support (OpenAI, Anthropic, or compatible)","Schema definitions (JSON Schema or Pydantic models)","Validation library (built-in or external)"],"input_types":["prompts","schema definitions"],"output_types":["validated structured responses (JSON, Pydantic objects)","validation errors with details"],"categories":["data-processing-analysis","text-generation-language"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-microsoft--ufo__cap_8","uri":"capability://tool.use.integration.mcp.model.context.protocol.server.integration.for.tool.calling","name":"mcp (model context protocol) server integration for tool calling","description":"UFO³ integrates with MCP servers to extend agent capabilities beyond built-in actions. Agents can discover available tools from registered MCP servers, call them with structured parameters, and receive results. The system handles MCP protocol details (request/response serialization, error handling) transparently. MCP servers can be local (same machine) or remote (over HTTP/WebSocket), enabling integration with external services and tools.","intents":["Extend agent capabilities by integrating with external tools and services via MCP","Enable agents to call APIs, databases, and custom services without hardcoding integrations","Build composable automation by combining multiple MCP servers"],"best_for":["Teams with existing MCP servers that should be integrated into automation","Scenarios requiring integration with external APIs or services","Organizations building extensible automation platforms"],"limitations":["Requires MCP servers to be running and accessible; no built-in server lifecycle management","Tool discovery and schema inference must be done manually; no auto-discovery from MCP servers","Error handling is limited to MCP protocol errors; application-level errors must be handled by server","No built-in caching or memoization of tool results; repeated calls incur full latency"],"requires":["MCP servers running and accessible (local or remote)","MCP server registration in UFO configuration","Tool schema definitions","Network connectivity (for remote MCP servers)"],"input_types":["tool names and parameters","MCP server endpoints"],"output_types":["tool results (JSON, text, or custom formats)","error messages"],"categories":["tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-microsoft--ufo__cap_9","uri":"capability://automation.workflow.device.lifecycle.management.and.capability.based.task.routing","name":"device lifecycle management and capability-based task routing","description":"UFO³ Galaxy maintains a registry of connected Windows devices with their capabilities (installed applications, available tools, resource constraints). Devices register with the Galaxy orchestrator via a registration protocol, send periodic heartbeats to signal availability, and report their capabilities. The Constellation Agent uses this capability information to route tasks to appropriate devices (e.g., 'route to device with Excel' or 'route to device with SAP access'). Device failures are detected via heartbeat timeouts, and tasks can be rerouted to healthy devices.","intents":["Distribute automation tasks across multiple devices based on application availability","Automatically detect and handle device failures without manual intervention","Scale automation by adding new devices to the Galaxy without reconfiguring tasks"],"best_for":["Enterprise environments with multiple Windows workstations or servers","Distributed automation deployments requiring automatic device discovery and failover","Organizations scaling automation from single-device to multi-device setups"],"limitations":["Device capability declarations are static; dynamic capability changes (e.g., app installed at runtime) require manual updates","Heartbeat-based failure detection has inherent latency (typically 30-60 seconds); transient network issues can cause false positives","No built-in load balancing; capability-based routing may result in uneven device utilization","Device registration requires manual configuration or API calls; no auto-discovery mechanism"],"requires":["UFO² agents running on each target device","Network connectivity between devices and Galaxy orchestrator","Device registration via Galaxy Web UI or API","Capability declarations for each device"],"input_types":["device registration requests (device ID, capabilities, resource info)","heartbeat signals","capability updates"],"output_types":["device registry (device ID, status, capabilities)","task routing decisions","failure notifications"],"categories":["automation-workflow","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":46,"verified":false,"data_access_risk":"high","permissions":["Windows 10 or later with COM automation support enabled","Python 3.9+","LLM API key (OpenAI, Anthropic, or local Ollama instance)","Administrator privileges for some UI inspection operations","UFO² agents running on each target Windows device","Network connectivity between Galaxy orchestrator and all devices","Device registration via Galaxy Web UI or API","LLM with structured output support (for task decomposition)","Web browser (Chrome, Firefox, Safari, Edge)","Network access to Galaxy orchestrator"],"failure_modes":["Windows-only — no native support for macOS or Linux desktop automation","Screenshot-based perception introduces latency (~500ms per perception cycle) and can fail on dynamic or rapidly changing UIs","Coordinate-based clicking is fragile to screen resolution changes; requires annotation system to remain synchronized","No built-in handling of modal dialogs, overlays, or off-screen UI elements","Requires network connectivity and stable device registration; device failures can cascade to dependent tasks","Task decomposition is LLM-driven and may not always produce optimal device assignments or task granularity","No built-in load balancing or task prioritization — relies on simple capability matching","Cross-device state synchronization adds latency; eventual consistency model may cause ordering issues in tightly-coupled workflows","Web UI is read-only for execution logs; cannot modify running tasks beyond pause/resume/cancel","Real-time updates rely on polling or WebSocket; high-frequency updates can overload the server","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.6499796730969731,"quality":0.35,"ecosystem":0.5800000000000001,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.3,"quality":0.2,"ecosystem":0.15,"match_graph":0.3,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:22.062Z","last_scraped_at":"2026-05-03T13:58:39.623Z","last_commit":"2026-04-29T04:39:38Z"},"community":{"stars":8518,"forks":1015,"weekly_downloads":null,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=microsoft--ufo","compare_url":"https://unfragile.ai/compare?artifact=microsoft--ufo"}},"signature":"0bhswC2g5uD+oIj8Q9KWyVRiOHbOljYMWAC3abVD1viuv8O71CfkawTGHFiofNBHgepSx1P2sYCzKndkmRneBg==","signedAt":"2026-06-22T18:14:43.043Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/microsoft--ufo","artifact":"https://unfragile.ai/microsoft--ufo","verify":"https://unfragile.ai/api/v1/verify?slug=microsoft--ufo","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}