{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"github-khoj-ai--khoj","slug":"khoj-ai--khoj","name":"khoj","type":"agent","url":"https://khoj.dev","page_url":"https://unfragile.ai/khoj-ai--khoj","categories":["ai-agents"],"tags":["agent","ai","assistant","chat","chatgpt","emacs","image-generation","llama3","llamacpp","llm","obsidian","obsidian-md","offline-llm","productivity","rag","research","self-hosted","semantic-search","stt","whatsapp-ai"],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"github-khoj-ai--khoj__cap_0","uri":"capability://search.retrieval.semantic.search.over.personal.documents","name":"semantic-search-over-personal-documents","description":"Indexes user documents (markdown, PDFs, web pages) into PostgreSQL with vector embeddings, enabling semantic search via cosine similarity matching. Uses a content processing pipeline that extracts, chunks, and embeds documents through configurable embedding models, then retrieves contextually relevant passages to augment chat responses. The search engine supports multiple content sources (local files, web URLs, Obsidian vaults) with unified indexing through database adapters.","intents":["I want to search my personal knowledge base by meaning, not just keywords","I need to find relevant context from my notes to answer a question","I want to index my Obsidian vault and search it semantically","I need to add web pages to my searchable knowledge base"],"best_for":["knowledge workers maintaining large personal document collections","researchers building custom knowledge bases","teams migrating from keyword search to semantic retrieval"],"limitations":["Embedding quality depends on chosen model; local embeddings slower than cloud alternatives","Vector search latency increases with corpus size (no built-in sharding)","Requires PostgreSQL with pgvector extension for vector operations","Chunking strategy is fixed; no dynamic chunk size optimization per document type"],"requires":["PostgreSQL 12+ with pgvector extension","Embedding model API key (OpenAI, Hugging Face, or local model)","Python 3.9+","Supported document formats: markdown, PDF, HTML, plain text"],"input_types":["markdown files","PDF documents","HTML/web pages","plain text","Obsidian vault exports"],"output_types":["ranked list of relevant document passages","structured metadata (source, timestamp, relevance score)","augmented chat context with citations"],"categories":["search-retrieval","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_1","uri":"capability://text.generation.language.multi.provider.llm.chat.with.context.augmentation","name":"multi-provider-llm-chat-with-context-augmentation","description":"Routes chat requests through a provider-agnostic conversation pipeline that supports OpenAI (GPT), Anthropic (Claude), Google Gemini, and local LLMs (Llama, Qwen, Mistral via Ollama/LlamaCPP). The chat processor retrieves relevant context from the semantic search index, constructs a system prompt with retrieved passages, and streams responses back to clients. Implements conversation history management via Django ORM with per-user conversation threads and message persistence.","intents":["I want to chat with my documents using my preferred LLM provider","I need to switch between different LLM providers without changing my workflow","I want to run a local LLM for privacy without cloud API calls","I need conversation history preserved across sessions"],"best_for":["developers building multi-provider LLM applications","privacy-conscious teams requiring on-premise LLM inference","organizations with existing LLM provider contracts (OpenAI, Anthropic, Google)"],"limitations":["Context window limited by chosen LLM; no automatic context compression","Local LLM inference requires significant GPU memory (8GB+ for Llama 7B)","Provider-specific prompt engineering needed for optimal results per model","No built-in fallback mechanism if primary provider fails","Streaming response latency varies significantly by provider (OpenAI ~200ms, local ~500ms+)"],"requires":["API key for at least one provider: OpenAI, Anthropic, Google Gemini, or Ollama/LlamaCPP endpoint","Python 3.9+","PostgreSQL for conversation history storage","For local LLMs: Ollama or LlamaCPP server running locally"],"input_types":["natural language text queries","conversation history (JSON)","system prompts (text)"],"output_types":["streaming text responses","structured conversation metadata (tokens used, model, timestamp)","conversation thread exports (JSON, markdown)"],"categories":["text-generation-language","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_10","uri":"capability://memory.knowledge.obsidian.vault.integration.with.live.sync","name":"obsidian-vault-integration-with-live-sync","description":"Provides an Obsidian plugin that indexes the user's vault into Khoj's knowledge base and enables semantic search within Obsidian. The plugin watches for file changes and incrementally updates the index, supporting live synchronization of new notes. Implements bidirectional integration: users can search their vault from Khoj chat, and Khoj can suggest related notes from the vault. The plugin uses Obsidian's API for file access and the Khoj backend API for indexing and search.","intents":["I want to search my Obsidian vault semantically from Khoj","I need my Obsidian notes automatically indexed and searchable","I want to see related notes from my vault when chatting with Khoj","I need live sync so new notes are immediately searchable"],"best_for":["Obsidian users building AI-augmented note-taking workflows","researchers maintaining large Obsidian vaults with semantic search needs","teams using Obsidian as their knowledge management system"],"limitations":["Obsidian plugin requires Obsidian 1.0+; older versions not supported","Live sync depends on file system watchers; may miss rapid changes","Large vaults (10,000+ notes) may take several minutes to index","Plugin requires network connectivity to Khoj backend","No conflict resolution for simultaneous edits in Obsidian and Khoj"],"requires":["Obsidian 1.0+","Khoj backend running and accessible","Network connectivity between Obsidian and Khoj backend"],"input_types":["Obsidian vault files (markdown)","file change events (from Obsidian API)"],"output_types":["indexed vault content in Khoj backend","search results (relevant notes with snippets)","related notes suggestions"],"categories":["memory-knowledge","search-retrieval"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_11","uri":"capability://text.generation.language.emacs.integration.with.inline.chat","name":"emacs-integration-with-inline-chat","description":"Provides an Emacs plugin that enables inline chat and search within Emacs buffers. Users can select text, ask Khoj questions about it, and receive responses inline. The plugin supports semantic search of indexed documents and integrates with Emacs' completion and buffer management systems. Implements streaming response rendering in Emacs buffers with syntax highlighting for code blocks.","intents":["I want to chat with Khoj without leaving Emacs","I need to search my knowledge base from within Emacs","I want to ask questions about selected text in my buffer","I need streaming responses rendered directly in Emacs"],"best_for":["Emacs power users integrating AI into their workflow","developers using Emacs as their primary editor","teams with Emacs-based development environments"],"limitations":["Emacs plugin requires Emacs 27+; older versions not supported","Streaming response rendering may be slow for large responses in Emacs","No built-in syntax highlighting for all code languages","Plugin requires network connectivity to Khoj backend","Limited UI customization compared to web client"],"requires":["Emacs 27+","Khoj backend running and accessible","Network connectivity between Emacs and Khoj backend"],"input_types":["selected text from Emacs buffer","natural language queries","search terms"],"output_types":["inline responses in Emacs buffer","search results with snippets","formatted code blocks with syntax highlighting"],"categories":["text-generation-language","search-retrieval"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_12","uri":"capability://automation.workflow.self.hosted.deployment.with.docker.and.configuration.management","name":"self-hosted-deployment-with-docker-and-configuration-management","description":"Provides Docker and Docker Compose configurations for self-hosted deployment of the full Khoj stack (backend, PostgreSQL, frontend). Includes environment-based configuration management through .env files and Django settings, supporting customization of LLM providers, embedding models, search engines, and other services. The deployment supports both development (docker-compose.yml) and production (prod.Dockerfile) configurations with Gunicorn WSGI server for production.","intents":["I want to deploy Khoj on my own infrastructure","I need to customize Khoj for my specific LLM providers and services","I want to run Khoj completely offline without cloud dependencies","I need to scale Khoj across multiple servers"],"best_for":["organizations with on-premise infrastructure requirements","teams needing full control over data and deployment","developers building custom Khoj deployments"],"limitations":["Docker deployment requires Docker 20.10+ and Docker Compose 2.0+","PostgreSQL setup requires manual configuration for production (backups, replication)","Scaling beyond single server requires load balancer and database replication setup","No built-in monitoring or alerting; requires external tools (Prometheus, Grafana)","Configuration management is environment-based; no UI for runtime configuration changes"],"requires":["Docker 20.10+","Docker Compose 2.0+","PostgreSQL 12+ (can be containerized)","Minimum 4GB RAM for backend + 2GB for PostgreSQL","For production: Gunicorn, Nginx reverse proxy, SSL certificates"],"input_types":[".env configuration files","Docker Compose YAML","Django settings modules"],"output_types":["running Khoj services (backend, frontend, database)","deployment logs","configuration validation reports"],"categories":["automation-workflow","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_13","uri":"capability://data.processing.analysis.content.type.agnostic.indexing.with.pluggable.extractors","name":"content-type-agnostic-indexing-with-pluggable-extractors","description":"Implements a content processing pipeline with pluggable extractors for different file types (PDF, markdown, HTML, plain text, Obsidian). Each extractor converts the source format to normalized text, which is then chunked and embedded. The pipeline supports custom extractors through a plugin interface, allowing users to add support for new file types. Chunking strategies are configurable (fixed size, semantic, sliding window) with metadata preservation (source, timestamp, section).","intents":["I want to index documents in multiple formats (PDF, markdown, HTML)","I need to add support for custom file types without modifying Khoj core","I want to preserve document structure and metadata during indexing","I need to re-index documents when they change"],"best_for":["organizations with heterogeneous document sources","developers building custom content extractors","teams needing flexible document indexing pipelines"],"limitations":["PDF extraction quality depends on PDF structure; scanned PDFs require OCR (not built-in)","Custom extractor development requires Python knowledge","Chunking strategy is global; no per-document-type customization","Large documents (>100MB) may timeout during extraction","No incremental indexing; full re-indexing required for updates"],"requires":["Python 3.9+","For PDF: PyPDF2 or pdfplumber library","For HTML: BeautifulSoup or similar parser","Embedding model API or local model"],"input_types":["PDF files","markdown files","HTML files","plain text files","Obsidian vault exports"],"output_types":["normalized text chunks","embeddings (vectors)","metadata (source, timestamp, section)"],"categories":["data-processing-analysis","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_14","uri":"capability://text.generation.language.streaming.response.delivery.with.websocket.support","name":"streaming-response-delivery-with-websocket-support","description":"Implements streaming response delivery through both HTTP Server-Sent Events (SSE) and WebSocket protocols, enabling real-time response rendering on clients. The streaming processor chunks LLM responses and sends them incrementally, reducing perceived latency and enabling progressive rendering. Supports streaming for chat responses, search results, and agent execution logs. Clients can subscribe to response streams and render content as it arrives.","intents":["I want to see responses appear in real-time as the AI generates them","I need to stream agent execution logs to see tool calls and results","I want to cancel long-running requests mid-stream","I need low-latency response delivery for interactive applications"],"best_for":["developers building real-time AI chat interfaces","teams needing low-latency response delivery","applications requiring progressive response rendering"],"limitations":["WebSocket connections require persistent network; may fail on unstable networks","Streaming adds complexity to error handling; partial responses may be incomplete","SSE has browser compatibility issues in older browsers","No built-in backpressure handling; fast clients may overwhelm slow networks","Streaming responses cannot be easily cached or indexed"],"requires":["FastAPI with WebSocket support","Client support for SSE or WebSocket","Python 3.9+"],"input_types":["chat requests","search queries","agent task definitions"],"output_types":["streamed response chunks (text)","streaming metadata (token count, model)","stream termination signals"],"categories":["text-generation-language","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_2","uri":"capability://planning.reasoning.agent.based.task.automation.with.tool.execution","name":"agent-based-task-automation-with-tool-execution","description":"Implements an agent system that decomposes user requests into subtasks, selects appropriate tools (web search, code execution, image generation, MCP servers), and executes them in sequence with result aggregation. The agent uses the LLM to reason about tool selection via function-calling APIs (OpenAI, Anthropic native support) or prompt-based tool selection for other providers. Tool execution is sandboxed through subprocess isolation for code execution and API-based execution for external tools, with results fed back into the agent loop for iterative refinement.","intents":["I want the AI to autonomously research a topic by searching the web and synthesizing results","I need to execute code snippets and see results without leaving the chat","I want to generate images or diagrams as part of a larger task","I need to integrate custom tools via MCP (Model Context Protocol) servers"],"best_for":["teams building autonomous research assistants","developers creating custom AI workflows with tool integration","organizations needing sandboxed code execution within AI systems"],"limitations":["Agent reasoning quality depends on LLM capability; weaker models struggle with multi-step planning","Tool execution latency compounds with each step; no parallel tool execution","Code execution sandbox is subprocess-based; no true isolation (use containers for production)","MCP server integration requires manual configuration; no auto-discovery","No built-in cost tracking for multi-step agent runs across providers"],"requires":["LLM provider with function-calling support (OpenAI, Anthropic) or prompt-based tool selection","Python 3.9+","For code execution: Python environment with restricted permissions","For web search: API key (Brave Search, Google Custom Search, or Bing)","For image generation: API key (OpenAI DALL-E, Hugging Face, or local Stable Diffusion)","Optional: MCP server endpoints for custom tools"],"input_types":["natural language task descriptions","tool definitions (JSON schema)","MCP server configurations"],"output_types":["structured agent execution logs","tool results (text, images, code output)","final synthesized response with citations"],"categories":["planning-reasoning","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_3","uri":"capability://planning.reasoning.research.mode.with.iterative.web.search.and.synthesis","name":"research-mode-with-iterative-web-search-and-synthesis","description":"Provides a specialized research workflow that iteratively searches the web, retrieves results, synthesizes findings, and generates follow-up queries based on gaps in knowledge. The research mode uses the agent system to orchestrate multiple web searches with semantic deduplication of results, then aggregates findings into a structured research report. Implements a loop that continues searching until confidence threshold is met or iteration limit reached, with each iteration refining the search query based on previous results.","intents":["I want to deeply research a topic with multiple web searches and synthesis","I need to generate a comprehensive research report with citations","I want the AI to identify gaps in knowledge and search for missing information","I need to track the research process and see what sources were consulted"],"best_for":["researchers and analysts conducting deep topic investigations","content creators building comprehensive articles with sourced information","teams needing audit trails of research methodology and sources"],"limitations":["Research quality depends on web search API quality and coverage","Iterative searching increases latency; typical research takes 30-60 seconds","No built-in fact-checking or source credibility assessment","Web search results may be outdated or biased depending on search engine","Requires web search API quota; costs scale with research depth"],"requires":["Web search API key (Brave Search, Google Custom Search, or Bing)","LLM provider with function-calling support for iterative query refinement","Python 3.9+","PostgreSQL for storing research sessions and results"],"input_types":["research topic (natural language)","research parameters (iteration limit, confidence threshold)","optional: initial search queries"],"output_types":["structured research report (markdown, JSON)","list of sources with citations","research execution log with search queries and results","confidence metrics per finding"],"categories":["planning-reasoning","search-retrieval"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_4","uri":"capability://image.visual.image.generation.and.diagram.creation","name":"image-generation-and-diagram-creation","description":"Integrates image generation capabilities through OpenAI DALL-E, Hugging Face Stable Diffusion, and local image generation models. The image processor accepts natural language prompts from chat or agent tasks, generates images through the selected provider, and returns URLs or base64-encoded images. Supports diagram generation through specialized prompts that guide the LLM to create structured image descriptions suitable for visualization tools.","intents":["I want to generate images from text descriptions within a chat","I need to create diagrams or visualizations as part of a research task","I want to use local image generation for privacy without cloud APIs","I need to batch generate multiple images from a list of prompts"],"best_for":["content creators generating visual assets","teams building creative AI workflows","organizations with privacy requirements preventing cloud image generation"],"limitations":["Cloud image generation (DALL-E) has usage quotas and costs per image","Local image generation requires significant GPU memory (6GB+ for Stable Diffusion)","Image quality varies significantly by provider; DALL-E generally superior to local models","No built-in image editing or refinement; regeneration required for modifications","Diagram generation relies on LLM prompt engineering; results may be inconsistent"],"requires":["Image generation API key (OpenAI DALL-E, Hugging Face, or local Stable Diffusion server)","Python 3.9+","For local generation: GPU with 6GB+ VRAM and Stable Diffusion server running"],"input_types":["natural language image descriptions","diagram specifications (text)","batch prompt lists (JSON)"],"output_types":["image URLs (cloud providers)","base64-encoded images (local providers)","image metadata (generation time, model, prompt)"],"categories":["image-visual","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_5","uri":"capability://code.generation.editing.code.execution.and.result.streaming","name":"code-execution-and-result-streaming","description":"Executes Python code snippets in a sandboxed subprocess environment with output capture and error handling. The code executor accepts code strings from the agent or chat, runs them with restricted permissions, captures stdout/stderr, and returns results to the agent loop. Implements timeout protection (default 30 seconds) and resource limits to prevent runaway execution. Results are streamed back to clients for real-time feedback.","intents":["I want to execute Python code and see results without leaving the chat","I need the AI to write and run code to solve a problem","I want to verify code correctness by running it in a sandboxed environment","I need to execute data analysis or transformation code with immediate feedback"],"best_for":["developers testing code snippets interactively","data analysts running quick transformations","teams building AI-assisted coding workflows"],"limitations":["Subprocess isolation is not true sandboxing; use containers for untrusted code","No network access from executed code (security measure)","File system access limited to temporary directories","Execution timeout is fixed at 30 seconds; no per-request customization","Large output (>1MB) may cause memory issues; no streaming output chunking","Python-only; no support for other languages"],"requires":["Python 3.9+ with subprocess module","Sufficient disk space for temporary execution files","No external network access required"],"input_types":["Python code strings","execution parameters (timeout, environment variables)"],"output_types":["stdout/stderr output (text)","execution status (success/error/timeout)","execution metadata (duration, memory used)"],"categories":["code-generation-editing","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_6","uri":"capability://tool.use.integration.multi.client.interface.support.with.unified.backend","name":"multi-client-interface-support-with-unified-backend","description":"Provides multiple client interfaces (Next.js web app, Emacs plugin, Obsidian plugin, desktop/mobile apps) that all connect to a unified FastAPI backend through REST APIs. Each client implements its own UI/UX while sharing the same backend services (chat, search, agents, settings). The backend exposes REST endpoints for all operations, with WebSocket support for streaming responses. Authentication is handled centrally through the backend with token-based auth (JWT) and multi-method support (password, OAuth).","intents":["I want to use Khoj from my preferred editor (VS Code, Emacs, Obsidian)","I need the same knowledge base and chat history across all my devices","I want to deploy a single backend and connect multiple client applications","I need to build a custom client that integrates with Khoj backend"],"best_for":["teams deploying Khoj across heterogeneous client environments","developers building custom clients on top of Khoj backend","organizations requiring editor-native AI integration (Emacs, Obsidian)"],"limitations":["Client-server latency adds ~100-200ms per request vs local-only solutions","WebSocket streaming requires persistent connections; may fail on unstable networks","Each client must implement its own UI; no shared component library","Authentication token management required on each client","Offline mode not supported; requires backend connectivity"],"requires":["Khoj backend running (Python 3.9+, PostgreSQL, FastAPI)","For web client: Node.js 18+, Next.js 13+","For Emacs: Emacs 27+","For Obsidian: Obsidian 1.0+","Network connectivity to backend server"],"input_types":["REST API requests (JSON)","WebSocket messages (JSON)","file uploads (multipart/form-data)"],"output_types":["REST API responses (JSON)","WebSocket streaming responses (JSON)","file downloads (PDF, markdown, etc.)"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_7","uri":"capability://tool.use.integration.model.context.protocol.tool.integration","name":"model-context-protocol-tool-integration","description":"Integrates with MCP (Model Context Protocol) servers to extend the agent's tool capabilities beyond built-in tools (web search, code execution, image generation). The MCP processor discovers available tools from registered MCP servers, converts them to function-calling schemas compatible with LLM providers, and executes them through the agent loop. Supports both local MCP servers and remote endpoints with automatic schema translation and error handling.","intents":["I want to extend Khoj with custom tools via MCP servers","I need to integrate third-party services (Slack, GitHub, databases) as agent tools","I want to use existing MCP servers without writing custom code","I need to build a custom MCP server and connect it to Khoj"],"best_for":["developers building extensible AI agent systems","teams integrating Khoj with existing MCP server ecosystems","organizations needing custom tool integration without modifying Khoj core"],"limitations":["MCP server discovery is manual; no auto-discovery mechanism","Schema translation may lose provider-specific features (e.g., streaming)","MCP server availability directly impacts agent reliability; no fallback mechanisms","Debugging MCP tool failures requires access to server logs","No built-in rate limiting or quota management for MCP tools"],"requires":["MCP server endpoint (local or remote)","MCP server configuration in Khoj settings (URL, authentication)","Python 3.9+","LLM provider with function-calling support (OpenAI, Anthropic)"],"input_types":["MCP server configurations (JSON)","tool invocation requests (natural language from agent)"],"output_types":["tool results (JSON, text, or binary)","tool execution logs","error messages with debugging context"],"categories":["tool-use-integration","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_8","uri":"capability://memory.knowledge.conversation.history.management.with.persistence","name":"conversation-history-management-with-persistence","description":"Manages conversation threads and message history through Django ORM models (Conversation, Message) stored in PostgreSQL. Each user has isolated conversation threads with full message history, metadata (timestamps, token counts, model used), and optional titles. The conversation manager supports retrieving conversation context for augmentation, archiving old conversations, and exporting conversation history. Implements efficient context window management by truncating older messages when approaching token limits.","intents":["I want to maintain conversation history across sessions","I need to retrieve previous conversations and continue from them","I want to export my conversations for backup or analysis","I need to manage conversation memory efficiently without exceeding token limits"],"best_for":["users building long-term AI assistants with persistent memory","teams needing conversation audit trails and compliance records","developers implementing conversation-based workflows"],"limitations":["Full conversation history retrieval can be slow for conversations with 1000+ messages","Token counting is approximate; actual token usage may vary by model","No automatic conversation summarization; old messages are truncated, not summarized","Conversation export is manual; no scheduled backup mechanism","No built-in conversation search; requires full-text search index for large histories"],"requires":["PostgreSQL 12+","Django ORM configured and migrated","Python 3.9+"],"input_types":["user messages (text)","system metadata (model, tokens, timestamp)"],"output_types":["conversation history (JSON, markdown)","conversation metadata (title, created_at, message_count)","exported conversation files (markdown, JSON)"],"categories":["memory-knowledge","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-khoj-ai--khoj__cap_9","uri":"capability://safety.moderation.multi.method.authentication.and.authorization","name":"multi-method-authentication-and-authorization","description":"Implements authentication through multiple methods: password-based login, OAuth (Google, GitHub), and API key authentication. Uses JWT tokens for session management with configurable expiration. Authorization is role-based (user, admin) with per-user resource isolation (conversations, settings, indexed documents). The authentication backend (UserAuthenticationBackend) integrates with Django ORM for user management and supports both web clients (cookie-based) and API clients (token-based).","intents":["I want to authenticate users with passwords or OAuth","I need API key authentication for programmatic access","I want to isolate user data and prevent cross-user access","I need admin controls for user management and system settings"],"best_for":["teams deploying multi-user Khoj instances","organizations requiring OAuth integration with existing identity providers","developers building API clients that need token-based authentication"],"limitations":["OAuth configuration requires external provider setup (Google, GitHub)","JWT token expiration is fixed; no refresh token mechanism","No built-in multi-factor authentication (MFA)","API key rotation is manual; no automatic key expiration","Role-based access control is basic (user/admin); no fine-grained permissions"],"requires":["PostgreSQL for user storage","Django authentication backend configured","For OAuth: OAuth provider credentials (Google, GitHub)","Python 3.9+"],"input_types":["username/password credentials","OAuth tokens","API keys"],"output_types":["JWT tokens","user profile information","authorization status (allowed/denied)"],"categories":["safety-moderation","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":54,"verified":false,"data_access_risk":"high","permissions":["PostgreSQL 12+ with pgvector extension","Embedding model API key (OpenAI, Hugging Face, or local model)","Python 3.9+","Supported document formats: markdown, PDF, HTML, plain text","API key for at least one provider: OpenAI, Anthropic, Google Gemini, or Ollama/LlamaCPP endpoint","PostgreSQL for conversation history storage","For local LLMs: Ollama or LlamaCPP server running locally","Obsidian 1.0+","Khoj backend running and accessible","Network connectivity between Obsidian and Khoj backend"],"failure_modes":["Embedding quality depends on chosen model; local embeddings slower than cloud alternatives","Vector search latency increases with corpus size (no built-in sharding)","Requires PostgreSQL with pgvector extension for vector operations","Chunking strategy is fixed; no dynamic chunk size optimization per document type","Context window limited by chosen LLM; no automatic context compression","Local LLM inference requires significant GPU memory (8GB+ for Llama 7B)","Provider-specific prompt engineering needed for optimal results per model","No built-in fallback mechanism if primary provider fails","Streaming response latency varies significantly by provider (OpenAI ~200ms, local ~500ms+)","Obsidian plugin requires Obsidian 1.0+; older versions not supported","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.7735106324196392,"quality":0.5,"ecosystem":0.6000000000000001,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.25,"quality":0.25,"ecosystem":0.1,"match_graph":0.28,"freshness":0.12}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:21.550Z","last_scraped_at":"2026-05-03T13:58:26.976Z","last_commit":"2026-03-26T03:35:43Z"},"community":{"stars":34370,"forks":2182,"weekly_downloads":null,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=khoj-ai--khoj","compare_url":"https://unfragile.ai/compare?artifact=khoj-ai--khoj"}},"signature":"z7bjUf6jJVaPObKWaChPA59kS2e91SGIMCUqXE/8I0g2z0mP9ssvRWqqeN8mfXjgfqQkeu8I5yahfAHm90I0CQ==","signedAt":"2026-06-20T08:36:05.735Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/khoj-ai--khoj","artifact":"https://unfragile.ai/khoj-ai--khoj","verify":"https://unfragile.ai/api/v1/verify?slug=khoj-ai--khoj","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}