{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"awesome-apache-doris","slug":"apache-doris","name":"Apache Doris","type":"mcp","url":"https://github.com/apache/doris-mcp-server","page_url":"https://unfragile.ai/apache-doris","categories":["mcp-servers"],"tags":[],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"awesome-apache-doris__cap_0","uri":"capability://tool.use.integration.mcp.protocol.compliant.sql.query.execution.with.connection.pooling","name":"mcp protocol-compliant sql query execution with connection pooling","description":"Executes SQL queries against Apache Doris through a standardized MCP protocol interface, leveraging a connection pooling layer (DorisConnectionManager) that maintains persistent database connections with health monitoring and token-bound configuration. Queries flow through a QueryExecutor component that handles result serialization and error propagation back to MCP clients via stdio or HTTP transports.","intents":["Execute ad-hoc SQL queries against Doris from an AI assistant or LLM agent","Run parameterized queries with result streaming for large datasets","Integrate Doris query execution into multi-step reasoning workflows"],"best_for":["AI agents and LLM applications needing real-time data warehouse access","Teams building data-driven decision systems on top of Doris","Developers integrating Doris into MCP-compatible tools (Claude, etc.)"],"limitations":["Query results are serialized through MCP protocol, adding latency for very large result sets (>100K rows)","No built-in query caching — each execution hits the database directly","Connection pool size is fixed at initialization; dynamic scaling requires server restart","Timeout behavior depends on underlying Doris query timeout configuration"],"requires":["Apache Doris 2.0+ instance with network accessibility","Python 3.9+","Valid Doris database credentials (username/password or token)","MCP client implementation (e.g., Claude desktop, custom MCP client)"],"input_types":["SQL query string (SELECT, INSERT, UPDATE, DELETE)","Query parameters (optional, for parameterized queries)","Database/schema context"],"output_types":["Structured result set (rows as JSON objects)","Query metadata (column names, types, row count)","Error messages with SQL validation details"],"categories":["tool-use-integration","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"awesome-apache-doris__cap_1","uri":"capability://data.processing.analysis.database.schema.and.metadata.extraction.with.caching","name":"database schema and metadata extraction with caching","description":"Extracts and caches database schema information (tables, columns, data types, constraints) through a SchemaExtractor component that queries Doris system catalogs and materializes results for fast retrieval by AI agents. Metadata is exposed as MCP resources, enabling LLMs to understand data structure without executing discovery queries repeatedly.","intents":["Provide LLM agents with current schema context for intelligent query generation","Enable semantic understanding of available tables and columns for natural language to SQL translation","Support schema-aware code completion and validation in query builders"],"best_for":["LLM-powered SQL generation tools that need schema awareness","Data exploration interfaces for non-technical users","Teams building semantic layers on top of Doris"],"limitations":["Schema cache is in-memory only; changes to database schema require server restart or manual cache invalidation","Extraction of large schemas (>10K tables) may cause initial startup latency","Does not track schema change history or versioning","Column-level statistics (cardinality, null counts) are not extracted by default"],"requires":["Apache Doris 2.0+ with accessible system catalogs","Database user with SELECT permissions on information_schema","Python 3.9+"],"input_types":["Database/schema name","Optional table name filter"],"output_types":["JSON schema definition (tables, columns, types, nullable flags)","MCP resource URIs for schema access","Structured metadata for prompt injection"],"categories":["data-processing-analysis","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"awesome-apache-doris__cap_10","uri":"capability://automation.workflow.health.check.and.connection.pool.monitoring","name":"health check and connection pool monitoring","description":"Monitors connection pool health through DorisConnectionManager, which periodically tests connections and removes stale or failed connections. Health check results are exposed as MCP resources and can trigger alerts. Connection pool statistics (size, utilization, wait time) are tracked and available for monitoring dashboards.","intents":["Detect and recover from database connectivity issues automatically","Monitor connection pool utilization to identify capacity issues","Provide visibility into connection health for operational dashboards"],"best_for":["Production deployments requiring high availability","Teams monitoring Doris connectivity and performance","Operations teams managing connection pool capacity"],"limitations":["Health checks add latency to connection acquisition; check interval is fixed at startup","Failed health checks block new queries until pool recovers; no graceful degradation","Connection pool statistics are in-memory only; historical data is lost on restart","No automatic pool resizing; capacity must be configured statically"],"requires":["Apache Doris 2.0+ instance","Python 3.9+"],"input_types":["Health check interval (seconds)","Connection pool size configuration"],"output_types":["Health status (healthy, degraded, unhealthy)","Connection pool statistics (size, utilization, wait time)","Failed connection details"],"categories":["automation-workflow","safety-moderation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"awesome-apache-doris__cap_2","uri":"capability://safety.moderation.sql.security.validation.and.data.masking","name":"sql security validation and data masking","description":"Validates incoming SQL queries against a security policy engine (DorisSecurityManager) that checks for dangerous operations (DROP, TRUNCATE, unauthorized schema access) and applies data masking rules before query execution. Masking policies are defined per column and enforced at the result serialization layer, preventing sensitive data exposure to LLM agents.","intents":["Prevent LLM agents from executing destructive SQL operations","Mask sensitive columns (PII, financial data) in query results","Enforce role-based access control on database objects"],"best_for":["Enterprise deployments where LLM agents access production Doris instances","Regulated industries (healthcare, finance) requiring data masking compliance","Teams building multi-tenant AI applications with data isolation requirements"],"limitations":["Masking rules are static and defined at server initialization; runtime policy changes require restart","Masking is applied at result serialization layer only — does not prevent data exfiltration via side-channel queries (e.g., COUNT(*) on masked columns)","No audit logging of masked data access by default","Performance overhead of masking increases with result set size"],"requires":["Security policy configuration (YAML or environment variables)","Doris user with appropriate permissions for the policies being enforced","Python 3.9+"],"input_types":["SQL query string","User/agent identity (for role-based checks)","Query result set (for masking application)"],"output_types":["Validation pass/fail with reason","Masked result set (with sensitive columns redacted or hashed)","Security audit log entries"],"categories":["safety-moderation","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"awesome-apache-doris__cap_3","uri":"capability://safety.moderation.token.based.authentication.with.multi.provider.support","name":"token-based authentication with multi-provider support","description":"Manages authentication to Doris through a TokenManager component that supports multiple credential types (username/password, API tokens, JWT) and binds tokens to connection pool entries. Tokens are refreshed automatically based on TTL, and authentication state is tracked per connection, enabling secure multi-agent access without credential sharing.","intents":["Authenticate multiple LLM agents to Doris using separate tokens","Rotate credentials automatically without server restart","Enforce per-agent audit trails by binding tokens to connections"],"best_for":["Multi-tenant SaaS platforms using Doris as a shared data warehouse","Enterprise deployments requiring credential rotation and audit trails","Teams integrating Doris with identity providers (OAuth, LDAP)"],"limitations":["Token refresh logic is synchronous; expired tokens block queries until refresh completes","No built-in token revocation — revoked tokens remain valid until TTL expiration","JWT validation is basic; complex claim validation requires custom extensions","Token storage is in-memory; tokens are lost on server restart"],"requires":["Doris instance with authentication enabled","Token credentials (username/password, JWT, or API token)","Python 3.9+"],"input_types":["Credential type (password, jwt, api_token)","Credential value","Token TTL/expiration time"],"output_types":["Authenticated connection handle","Token refresh status","Authentication error messages"],"categories":["safety-moderation","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"awesome-apache-doris__cap_4","uri":"capability://tool.use.integration.multi.transport.protocol.support.stdio.http.adbc","name":"multi-transport protocol support (stdio, http, adbc)","description":"Supports three transport mechanisms for different deployment scenarios: stdio for direct process-to-process MCP integration, HTTP for REST-based access, and ADBC for Arrow-based data interchange. Transport selection is configured at startup, with each mode using dedicated initialization paths (initialize_for_stdio_mode, start_http, ADBC integration) that abstract protocol differences from the core query execution layer.","intents":["Deploy Doris MCP server as a subprocess for Claude desktop or other MCP clients","Expose Doris queries via REST API for web applications and integrations","Enable Arrow-based data transfer for high-performance analytics clients"],"best_for":["Teams using Claude desktop or other MCP-native tools","Web applications needing REST access to Doris","Data science workflows using Arrow/Pandas for analysis"],"limitations":["Only one transport mode can be active per server instance; multi-transport requires multiple server processes","HTTP transport adds network latency vs. stdio; suitable for remote access but not low-latency local integration","ADBC integration requires Arrow-compatible client libraries; not all Doris clients support ADBC","Stdio transport is blocking; concurrent requests require multiple server instances or async wrapper"],"requires":["Python 3.9+","For HTTP: Flask or similar WSGI server","For ADBC: PyArrow 10.0+","For stdio: MCP client implementation"],"input_types":["Transport mode selection (stdio, http, adbc)","Protocol-specific configuration (port for HTTP, etc.)"],"output_types":["Protocol-specific response format (JSON for HTTP/stdio, Arrow for ADBC)","Transport-specific error messages"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"awesome-apache-doris__cap_5","uri":"capability://data.processing.analysis.query.analysis.and.performance.metrics.collection","name":"query analysis and performance metrics collection","description":"Collects query execution metrics (latency, rows processed, memory usage) through AnalysisTools component and exposes them as MCP resources. Metrics are aggregated per query and per user, enabling performance monitoring and optimization recommendations. Integration with Doris query profiling provides detailed execution plan analysis.","intents":["Monitor query performance and identify slow queries for optimization","Provide LLM agents with execution metrics for query cost estimation","Generate performance reports and optimization recommendations"],"best_for":["Data teams optimizing Doris query performance","LLM agents making cost-aware query decisions","Operations teams monitoring data warehouse health"],"limitations":["Metrics collection adds ~5-10% overhead per query","Metrics are in-memory only; historical data is lost on server restart","No built-in alerting on performance thresholds","Execution plan analysis requires Doris 2.0+ with profiling enabled"],"requires":["Apache Doris 2.0+ with query profiling enabled","Python 3.9+"],"input_types":["Query execution context","Query result metadata"],"output_types":["JSON metrics object (latency, rows, memory, etc.)","Execution plan analysis","Performance recommendations"],"categories":["data-processing-analysis","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"awesome-apache-doris__cap_6","uri":"capability://tool.use.integration.dynamic.tool.registration.and.prompt.template.injection","name":"dynamic tool registration and prompt template injection","description":"Registers SQL query tools and analysis functions dynamically through DorisToolsManager, which exposes them as MCP tools with schema-based function signatures. Prompt templates are managed by DorisPromptsManager and injected into LLM context, providing domain-specific guidance for query generation and data exploration.","intents":["Enable LLM agents to discover available query tools and their schemas","Inject domain-specific prompts to guide natural language to SQL translation","Register custom analysis functions as callable tools for agents"],"best_for":["Teams building LLM-powered data exploration interfaces","AI agents needing domain-specific query guidance","Custom tool development for specialized Doris use cases"],"limitations":["Tool registration is static at server initialization; dynamic tool addition requires restart","Prompt templates are text-based; no built-in template versioning or A/B testing","Tool schemas are inferred from function signatures; complex types require manual schema definition","No built-in tool usage tracking or analytics"],"requires":["Python 3.9+","Tool function definitions with type hints"],"input_types":["Tool function (Python callable)","Tool schema (JSON schema or inferred from type hints)","Prompt template (text with variable placeholders)"],"output_types":["MCP tool definition (name, description, schema)","Injected prompt text","Tool invocation results"],"categories":["tool-use-integration","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"awesome-apache-doris__cap_7","uri":"capability://automation.workflow.multi.worker.horizontal.scaling.with.request.distribution","name":"multi-worker horizontal scaling with request distribution","description":"Supports multi-worker deployment mode (multiworker_app.py) where multiple server instances are coordinated behind a load balancer or request router. Each worker maintains its own connection pool and processes requests independently, with optional request distribution based on query complexity or user affinity. Worker health is monitored and unhealthy workers are automatically excluded from routing.","intents":["Scale Doris MCP server horizontally to handle high request volume","Distribute query load across multiple workers for improved throughput","Enable zero-downtime deployments by gracefully draining workers"],"best_for":["High-traffic AI applications with many concurrent agents","Production deployments requiring fault tolerance and load distribution","Teams needing to scale query throughput beyond single-worker limits"],"limitations":["Multi-worker mode requires external load balancer or request router; no built-in coordination","Connection pools are per-worker; no connection sharing across workers, increasing total Doris connection count","Worker health checks add latency; unhealthy worker detection has ~30-60 second lag","Request distribution is simple (round-robin or health-based); no query-aware routing"],"requires":["Python 3.9+","External load balancer (nginx, HAProxy, etc.) or request router","Multiple server instances (typically 2-4 workers)"],"input_types":["Worker configuration (port, health check interval)","Load balancer routing rules"],"output_types":["Worker health status","Request distribution metrics","Per-worker query statistics"],"categories":["automation-workflow","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"awesome-apache-doris__cap_8","uri":"capability://automation.workflow.structured.logging.and.observability.with.context.propagation","name":"structured logging and observability with context propagation","description":"Implements structured logging through DorisLoggerManager that captures request context (user, query, execution time) and propagates it across the request lifecycle. Logs are emitted in JSON format with correlation IDs, enabling distributed tracing and debugging. Integration with standard Python logging allows output to files, syslog, or observability platforms.","intents":["Debug query execution issues by tracing request flow across components","Monitor server health and performance through structured metrics","Audit user actions and query execution for compliance"],"best_for":["Production deployments requiring observability and debugging","Teams using centralized logging platforms (ELK, Datadog, etc.)","Regulated environments requiring audit trails"],"limitations":["Structured logging adds ~2-5% overhead per request","Log volume can be high for verbose logging levels; requires log rotation/archival","Correlation ID propagation is manual; requires explicit context passing","No built-in log sampling or filtering; high-volume queries can flood logs"],"requires":["Python 3.9+","Optional: centralized logging platform (ELK, Datadog, etc.)"],"input_types":["Log level (DEBUG, INFO, WARNING, ERROR)","Log message and context data","Correlation ID (optional)"],"output_types":["JSON log entries with context","Structured metrics (latency, error rates, etc.)","Audit trail entries"],"categories":["automation-workflow","safety-moderation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"awesome-apache-doris__cap_9","uri":"capability://automation.workflow.configuration.management.with.environment.variable.override","name":"configuration management with environment variable override","description":"Manages server configuration through DorisConfig component that supports YAML files, environment variables, and programmatic overrides. Configuration is validated at startup and cached in memory. Environment variables take precedence over YAML, enabling deployment-specific customization without code changes. Configuration changes require server restart.","intents":["Configure Doris connection parameters for different environments (dev, staging, prod)","Override security policies and logging levels via environment variables","Manage multi-worker deployment configuration centrally"],"best_for":["Teams deploying to multiple environments with different configurations","Container-based deployments (Docker, Kubernetes) using environment variables","Infrastructure-as-code workflows requiring configuration as code"],"limitations":["Configuration changes require server restart; no hot-reload capability","YAML schema validation is basic; complex configurations may have runtime errors","Environment variable naming conventions must be followed; typos silently fall back to defaults","No configuration versioning or rollback; changes are immediate and permanent"],"requires":["Python 3.9+","YAML configuration file (optional) or environment variables"],"input_types":["YAML configuration file","Environment variables (DORIS_* prefix)","Programmatic config overrides"],"output_types":["Validated configuration object","Configuration validation errors","Active configuration state"],"categories":["automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":31,"verified":false,"data_access_risk":"high","permissions":["Apache Doris 2.0+ instance with network accessibility","Python 3.9+","Valid Doris database credentials (username/password or token)","MCP client implementation (e.g., Claude desktop, custom MCP client)","Apache Doris 2.0+ with accessible system catalogs","Database user with SELECT permissions on information_schema","Apache Doris 2.0+ instance","Security policy configuration (YAML or environment variables)","Doris user with appropriate permissions for the policies being enforced","Doris instance with authentication enabled"],"failure_modes":["Query results are serialized through MCP protocol, adding latency for very large result sets (>100K rows)","No built-in query caching — each execution hits the database directly","Connection pool size is fixed at initialization; dynamic scaling requires server restart","Timeout behavior depends on underlying Doris query timeout configuration","Schema cache is in-memory only; changes to database schema require server restart or manual cache invalidation","Extraction of large schemas (>10K tables) may cause initial startup latency","Does not track schema change history or versioning","Column-level statistics (cardinality, null counts) are not extracted by default","Health checks add latency to connection acquisition; check interval is fixed at startup","Failed health checks block new queries until pool recovers; no graceful degradation","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.05,"quality":0.47,"ecosystem":0.39999999999999997,"match_graph":0.25,"freshness":0.52,"weights":{"adoption":0.25,"quality":0.25,"ecosystem":0.15,"match_graph":0.23,"freshness":0.12}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-06-17T09:51:02.370Z","last_scraped_at":"2026-05-03T14:00:15.503Z","last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=apache-doris","compare_url":"https://unfragile.ai/compare?artifact=apache-doris"}},"signature":"bBHjVAFssvmvKV94vCuNYqlUVwv6vj85iVO/qiaJo6vxT1kYeR2W+fhlZyzSNZW/nNj1wVC027JUWvgnunJ0BA==","signedAt":"2026-06-21T08:46:20.571Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/apache-doris","artifact":"https://unfragile.ai/apache-doris","verify":"https://unfragile.ai/api/v1/verify?slug=apache-doris","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}