{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"ragas","slug":"ragas","name":"Ragas","type":"benchmark","url":"https://github.com/explodinggradients/ragas","page_url":"https://unfragile.ai/ragas","categories":["testing-quality","rag-knowledge"],"tags":[],"pricing":{"model":"free","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"ragas__cap_0","uri":"capability://data.processing.analysis.llm.based.rag.evaluation.with.multi.metric.synthesis","name":"llm-based rag evaluation with multi-metric synthesis","description":"Evaluates RAG pipeline quality by orchestrating multiple LLM-based metrics (faithfulness, answer relevancy, context precision/recall) through a unified evaluation pipeline that accepts only questions and ground-truth answers as input. Uses PydanticPrompt architecture with structured output parsing via Instructor adapter pattern to extract metric scores from LLM responses, with built-in retry logic and async execution via Executor pattern for batch processing.","intents":["Measure whether RAG system answers are grounded in retrieved context without manual annotation","Compare RAG pipeline quality across different retrieval strategies or LLM backends","Identify failure modes in RAG systems by decomposing evaluation into orthogonal metrics","Automate quality gates for RAG deployments with minimal labeled data"],"best_for":["Teams building production RAG systems who need automated quality measurement","Researchers comparing RAG architectures and retrieval strategies","ML engineers optimizing retrieval-augmented generation pipelines"],"limitations":["Metric quality depends on underlying LLM capability — weaker models produce less reliable scores","Requires API access to LLM provider (OpenAI, Anthropic, etc.) or local model deployment","No built-in human-in-the-loop validation — scores are LLM-generated, not ground truth","Evaluation latency scales linearly with number of samples and metrics (typically 5-30s per sample)"],"requires":["Python 3.9+","API key for OpenAI, Anthropic, or compatible LLM provider","Dataset with question-answer pairs (ground truth answers optional for some metrics)","Retrieved context documents for each question"],"input_types":["structured dataset (questions, ground_truth_answers, contexts)","JSON/CSV with evaluation samples","Python Dataset objects via HuggingFace integration"],"output_types":["metric scores (0-1 floats)","aggregated statistics (mean, std, percentiles)","detailed results with per-sample breakdowns","cost tracking and token usage analytics"],"categories":["data-processing-analysis","rag-evaluation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"ragas__cap_1","uri":"capability://data.processing.analysis.metric.composition.and.custom.criteria.evaluation","name":"metric composition and custom criteria evaluation","description":"Provides extensible metric system with base classes (Metric, SingleTurnMetric) supporting both built-in metrics and user-defined custom criteria via rubric-based evaluation. Metrics are composable into evaluation sets and execute through a unified pipeline with configurable LLM backends, prompt templates, and output parsing via PydanticPrompt architecture with error recovery mechanisms.","intents":["Define domain-specific evaluation criteria beyond standard RAG metrics","Compose multiple metrics into a single evaluation run with shared LLM context","Swap LLM backends (OpenAI → Anthropic → local Ollama) without changing metric code","Build rubric-based scoring for subjective quality dimensions (tone, clarity, domain accuracy)"],"best_for":["Teams with custom evaluation requirements beyond faithfulness/relevancy","Researchers experimenting with different metric definitions and LLM prompts","Organizations needing to evaluate domain-specific RAG outputs (legal, medical, financial)"],"limitations":["Custom metrics require Python code — no low-code metric builder UI","Metric training/alignment requires labeled data and iterative prompt tuning","Output parsing failures fall back to error recovery but may lose structured data","No built-in metric versioning — prompt changes require manual tracking"],"requires":["Python 3.9+","Understanding of Ragas Metric base class interface","LLM provider configuration (API key or local model)","Labeled evaluation data for metric validation"],"input_types":["metric definitions (Python classes inheriting from Metric)","rubric specifications (structured text or JSON)","evaluation samples with question/answer/context"],"output_types":["metric scores (numeric or categorical)","structured metric metadata (name, description, range)","evaluation results with per-metric breakdowns"],"categories":["data-processing-analysis","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"ragas__cap_10","uri":"capability://automation.workflow.configuration.and.runtime.control.via.runconfig","name":"configuration and runtime control via runconfig","description":"Centralizes evaluation configuration via RunConfig system managing LLM selection, embedding models, timeout settings, retry policies, and cost tracking parameters. Enables per-evaluation customization without code changes, with support for environment variable overrides and configuration files. RunConfig propagates settings through evaluation pipeline to all metrics and LLM calls.","intents":["Configure evaluation parameters (LLM, embedding model, timeouts) without code changes","Override configuration via environment variables for CI/CD integration","Track and limit evaluation costs via RunConfig cost parameters","Enable reproducible evaluation by capturing full configuration"],"best_for":["Teams running evaluation in different environments (dev, staging, prod)","ML engineers integrating evaluation into CI/CD pipelines","Organizations needing cost control and budget tracking"],"limitations":["Configuration is centralized — may be inflexible for per-metric customization","Environment variable overrides require careful naming to avoid conflicts","No built-in configuration validation — invalid settings may fail at runtime","Cost tracking is approximate — actual API costs may vary"],"requires":["Python 3.9+","RunConfig object with LLM and embedding model configuration"],"input_types":["RunConfig object with parameters","environment variables (optional overrides)","configuration files (YAML, JSON)"],"output_types":["resolved configuration used for evaluation","cost estimates and tracking"],"categories":["automation-workflow","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"ragas__cap_11","uri":"capability://data.processing.analysis.multi.turn.conversation.and.agent.evaluation","name":"multi-turn conversation and agent evaluation","description":"Extends evaluation beyond single-turn RAG to support multi-turn conversations and agent traces via specialized metric types (MultiTurnMetric, AgentMetric) and sample schemas. Handles message history, tool calls, and agent actions as evaluation context, enabling assessment of conversational coherence, tool use correctness, and multi-step reasoning. Metrics can access full conversation history for context-aware scoring.","intents":["Evaluate conversational RAG systems that maintain context across turns","Assess agent tool use correctness and reasoning quality","Measure conversation coherence and consistency across multiple turns","Debug multi-turn failures by inspecting conversation traces"],"best_for":["Teams building conversational AI and agent systems","Researchers studying multi-turn evaluation metrics","Organizations evaluating complex agentic workflows"],"limitations":["Multi-turn evaluation is more complex — metrics must handle variable conversation lengths","Agent evaluation requires understanding tool schemas and execution traces","No built-in conversation quality metrics — requires custom metric implementation","Conversation context can be large — may exceed LLM context windows"],"requires":["Python 3.9+","MultiTurnSample or AgentSample schema with message history","LLM provider with sufficient context window for conversation history"],"input_types":["message history (list of role/content pairs)","tool calls and execution results (for agent evaluation)","agent actions and state transitions"],"output_types":["multi-turn metric scores","per-turn breakdowns","conversation quality assessments"],"categories":["data-processing-analysis","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"ragas__cap_12","uri":"capability://automation.workflow.integration.with.observability.platforms.for.tracing.and.monitoring","name":"integration with observability platforms for tracing and monitoring","description":"Integrates with observability platforms (Langfuse, etc.) via a tracing adapter pattern that logs evaluation events (metric computations, LLM calls, results) to external systems. Metrics can emit structured events that are automatically captured and sent to configured observability backends. Enables real-time monitoring of evaluation runs, cost tracking across multiple evaluations, and debugging of metric behavior through detailed trace logs. Integration is optional and transparent — evaluation works without observability configuration.","intents":["Monitor evaluation runs in real-time across distributed systems","Debug metric behavior by examining detailed trace logs","Track evaluation costs and performance metrics over time","Correlate evaluation results with production RAG performance"],"best_for":["Teams running evaluation in production or CI/CD pipelines","Organizations with observability infrastructure (Langfuse, etc.)","Debugging complex evaluation failures across multiple metrics"],"limitations":["Observability integration adds latency to evaluation (network calls to external systems)","Tracing data can be verbose — may incur significant storage costs for large evaluations","Not all observability platforms are supported — requires custom adapter implementation","No built-in sampling — all events are traced, which can be expensive at scale"],"requires":["Python 3.9+","Observability platform account (Langfuse, etc.)","API key for observability platform"],"input_types":["evaluation_run (EvaluationResults object)","observability_config (dict with platform credentials)"],"output_types":["trace_events (structured logs sent to observability platform)","trace_url (link to view traces in observability UI)"],"categories":["automation-workflow","safety-moderation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"ragas__cap_2","uri":"capability://automation.workflow.async.batch.evaluation.pipeline.with.cost.tracking","name":"async batch evaluation pipeline with cost tracking","description":"Executes evaluation across large datasets using async/await pattern via Executor abstraction, supporting parallel metric computation with configurable concurrency limits. Integrates cost tracking via RunConfig system that logs token usage and API costs per metric, with callback hooks for real-time progress monitoring and results persistence. Supports both sync (evaluate) and async (aevaluate) entry points with identical semantics.","intents":["Evaluate large RAG datasets (1000+ samples) without blocking or timeout issues","Track evaluation costs to understand LLM API spend per metric and dataset","Monitor evaluation progress in real-time with callbacks for logging/alerting","Integrate evaluation into CI/CD pipelines with async-friendly execution"],"best_for":["Teams evaluating production RAG systems with thousands of samples","Cost-conscious organizations needing visibility into LLM evaluation spend","ML engineers building automated evaluation workflows in async frameworks"],"limitations":["Async execution adds complexity — requires event loop management and async-aware code","Cost tracking is approximate (based on token counts) — actual API costs may vary","Callback system is fire-and-forget — no guarantee of callback execution order or completion","Executor pattern abstracts away provider-specific rate limiting — may hit API throttles"],"requires":["Python 3.9+ with asyncio support","RunConfig object with LLM provider and cost model configuration","Executor implementation (default: ThreadPoolExecutor or AsyncExecutor)","Callback handlers for progress tracking (optional but recommended)"],"input_types":["evaluation dataset (list of samples)","metric set (list of Metric objects)","RunConfig with LLM and cost parameters"],"output_types":["EvaluationResults with per-sample and aggregated scores","cost breakdown by metric and sample","callback events (start, progress, complete, error)"],"categories":["automation-workflow","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"ragas__cap_3","uri":"capability://tool.use.integration.multi.provider.llm.integration.with.adapter.pattern","name":"multi-provider llm integration with adapter pattern","description":"Abstracts LLM provider differences through LLM factory and adapter pattern, supporting OpenAI, Anthropic, Ollama, and custom providers via litellm integration. Adapters (Instructor, litellm) handle provider-specific structured output formats and API conventions, with unified interface for message passing, streaming, and error handling. Supports both sync and async LLM calls with built-in retry logic and caching.","intents":["Switch between LLM providers (OpenAI → Anthropic) without changing metric code","Use local models (Ollama) for evaluation without cloud API dependencies","Ensure structured output from LLMs via Instructor adapter with Pydantic validation","Cache LLM responses to reduce API costs and latency in iterative evaluation"],"best_for":["Teams evaluating with multiple LLM providers to compare metric quality","Organizations with privacy constraints requiring local model evaluation","Cost-optimizing teams wanting to use cheaper models for evaluation"],"limitations":["Adapter pattern adds abstraction overhead — provider-specific optimizations may be hidden","Structured output support varies by provider — some require fallback parsing","Caching is in-memory only — no persistent cache across evaluation runs","Retry logic uses exponential backoff — may add significant latency on API failures"],"requires":["Python 3.9+","API key for chosen LLM provider (OpenAI, Anthropic, etc.) OR local Ollama instance","litellm library for provider abstraction","Instructor library for structured output (if using Instructor adapter)"],"input_types":["LLM provider configuration (model name, API key, base URL)","message list with role/content pairs","output schema (Pydantic model for structured extraction)"],"output_types":["LLM response text","structured output (Pydantic model instance)","token usage (input/output counts)","cost estimate"],"categories":["tool-use-integration","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"ragas__cap_4","uri":"capability://data.processing.analysis.synthetic.test.data.generation.for.rag.evaluation","name":"synthetic test data generation for rag evaluation","description":"Generates synthetic evaluation datasets (questions, answers, contexts) from source documents using TestsetGenerator with configurable synthesizers and transformations. Uses LLM-based generation with knowledge graph construction to ensure diversity and coverage, supporting both single-turn and multi-turn conversation synthesis. Integrates with test data validation to filter low-quality synthetic samples.","intents":["Create evaluation datasets without manual annotation when ground truth is unavailable","Generate diverse question-answer pairs covering different document aspects","Produce multi-turn conversations for evaluating conversational RAG systems","Validate synthetic data quality before using in evaluation pipelines"],"best_for":["Teams bootstrapping evaluation datasets for new RAG systems","Researchers studying RAG evaluation with synthetic vs. real data","Organizations lacking labeled evaluation data for domain-specific documents"],"limitations":["Synthetic data quality depends on source document quality and LLM capability","Generation is computationally expensive — can take hours for large document sets","No guarantee of distribution matching real user queries","Knowledge graph construction requires document preprocessing and may miss implicit relationships"],"requires":["Python 3.9+","Source documents (text, PDF, or structured format)","LLM provider configuration for synthesis","Embedding model for knowledge graph construction"],"input_types":["document collection (text files, PDFs, or structured documents)","TestsetGenerator configuration (number of samples, synthesizer types)","transformation specifications (filtering, augmentation rules)"],"output_types":["Testset with questions, ground_truth_answers, contexts","metadata (source document, generation method, quality scores)","knowledge graph representation of documents"],"categories":["data-processing-analysis","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"ragas__cap_5","uri":"capability://text.generation.language.prompt.management.and.adaptation.system","name":"prompt management and adaptation system","description":"Centralizes prompt templates via PydanticPrompt architecture with PromptMixin for dynamic prompt management across metrics. Supports prompt adaptation (localization, parameter substitution) and version control, with built-in output parsing and error recovery for malformed LLM responses. Prompts are composable and reusable across different metrics and evaluation contexts.","intents":["Manage evaluation prompts centrally without hardcoding in metric classes","Adapt prompts for different languages or evaluation contexts","Version and track prompt changes for reproducibility","Debug metric behavior by inspecting and modifying prompts without code changes"],"best_for":["Teams iterating on metric prompts to improve evaluation quality","Multilingual evaluation requiring prompt localization","Researchers studying impact of prompt wording on metric scores"],"limitations":["Prompt management adds abstraction — harder to understand what LLM actually sees","Output parsing is heuristic-based — may fail on unexpected LLM formats","No built-in A/B testing framework for prompt comparison","Prompt versioning requires manual tracking — no automatic version control"],"requires":["Python 3.9+","Pydantic models for prompt schema definition","LLM provider configuration for prompt execution"],"input_types":["prompt template (Pydantic model with template variables)","context data (question, answer, context, etc.)","output schema (expected LLM response format)"],"output_types":["rendered prompt string","parsed LLM response (structured or text)","error recovery information"],"categories":["text-generation-language","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"ragas__cap_6","uri":"capability://memory.knowledge.embedding.model.integration.for.semantic.evaluation","name":"embedding model integration for semantic evaluation","description":"Abstracts embedding model selection via embedding_factory supporting multiple providers (OpenAI, HuggingFace, local models). Embeddings are used for semantic similarity calculations in metrics like context precision/recall and for knowledge graph construction in test data generation. Supports both sync and async embedding computation with caching and batch processing.","intents":["Compute semantic similarity between questions, answers, and contexts","Use embeddings for knowledge graph construction in test data generation","Swap embedding models without changing metric code","Cache embeddings to reduce computation in iterative evaluation"],"best_for":["Teams evaluating semantic relevance of RAG outputs","Researchers comparing embedding models' impact on metric quality","Cost-optimizing teams wanting to use cheaper embedding models"],"limitations":["Embedding quality varies significantly by model — no universal best choice","Caching is in-memory only — no persistent cache across runs","Batch embedding computation requires careful memory management for large datasets","Semantic similarity is task-dependent — generic embeddings may not capture domain nuances"],"requires":["Python 3.9+","Embedding model provider (OpenAI, HuggingFace, local Ollama)","API key for cloud-based embedding models (optional for local models)"],"input_types":["text strings (questions, answers, contexts)","embedding model configuration (model name, provider)"],"output_types":["embedding vectors (float arrays)","similarity scores (cosine similarity between embeddings)"],"categories":["memory-knowledge","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"ragas__cap_7","uri":"capability://data.processing.analysis.dataset.schema.validation.and.transformation","name":"dataset schema validation and transformation","description":"Defines and validates evaluation dataset structure via Pydantic-based schemas (EvaluationDataset, Sample types) supporting different evaluation contexts (single-turn RAG, multi-turn conversations, agent traces). Provides data format conversion (JSON, CSV, HuggingFace datasets) with validation and error reporting. Supports schema evolution and backward compatibility.","intents":["Validate evaluation datasets before running metrics to catch data quality issues early","Convert between different dataset formats (JSON, CSV, HuggingFace) without manual parsing","Support different evaluation contexts (RAG, agents, multi-turn) with unified schema","Ensure dataset compatibility with metrics and evaluation pipeline"],"best_for":["Teams managing multiple evaluation datasets in different formats","Data engineers building evaluation data pipelines","Researchers comparing evaluation across different dataset formats"],"limitations":["Schema validation is strict — may reject valid data with minor format differences","Format conversion may lose metadata or context-specific information","No built-in data cleaning — validation fails on malformed data without recovery","Schema evolution requires manual migration — no automatic schema versioning"],"requires":["Python 3.9+","Pydantic for schema definition and validation","Dataset files in supported formats (JSON, CSV, HuggingFace)"],"input_types":["dataset files (JSON, CSV, Parquet)","HuggingFace Dataset objects","Python dictionaries or lists"],"output_types":["validated EvaluationDataset objects","converted dataset in target format","validation error reports"],"categories":["data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"ragas__cap_8","uri":"capability://data.processing.analysis.human.feedback.annotation.and.alignment","name":"human feedback annotation and alignment","description":"Provides annotation system for collecting human judgments on evaluation samples, supporting different annotation types (binary, rating, ranking, free-text). Integrates with metric training/alignment workflows to calibrate LLM-based metrics against human judgments using labeled data. Supports annotation workflows with quality control and inter-annotator agreement metrics.","intents":["Collect human judgments to validate LLM-based metric quality","Train custom metrics to align with human preferences","Measure inter-annotator agreement to assess annotation quality","Build ground truth datasets for metric evaluation"],"best_for":["Teams validating metric quality against human judgment","Researchers studying metric-human alignment in RAG evaluation","Organizations building domain-specific evaluation metrics"],"limitations":["Annotation is manual and expensive — requires human time and expertise","Inter-annotator agreement may be low for subjective dimensions","No built-in annotation platform — requires external tools or custom UI","Metric alignment requires sufficient labeled data — typically 100+ samples"],"requires":["Python 3.9+","Annotated evaluation samples with human judgments","Annotation schema definition (rating scale, categories, etc.)"],"input_types":["evaluation samples (question, answer, context)","human annotations (scores, labels, rankings)","annotator metadata (annotator ID, timestamp)"],"output_types":["annotation statistics (agreement, distribution)","aligned metric models","quality reports"],"categories":["data-processing-analysis","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"ragas__cap_9","uri":"capability://automation.workflow.observability.and.tracing.integration","name":"observability and tracing integration","description":"Integrates with observability platforms (Langfuse, custom tracing) via callback system to log evaluation traces, metrics, and costs. Provides structured logging of LLM calls, metric computations, and evaluation results with full context for debugging and monitoring. Supports real-time trace visualization and cost analytics.","intents":["Debug metric behavior by inspecting LLM prompts and responses","Monitor evaluation costs and performance in production","Trace evaluation pipeline execution for performance optimization","Integrate evaluation into observability dashboards"],"best_for":["Teams running evaluation in production with monitoring requirements","ML engineers debugging metric failures and unexpected scores","Organizations tracking evaluation costs across teams"],"limitations":["Tracing adds overhead — may increase evaluation latency by 5-10%","Trace storage requires external platform (Langfuse, custom backend)","Sensitive data (prompts, responses) may be logged — requires privacy consideration","Callback system is fire-and-forget — no guarantee of trace delivery"],"requires":["Python 3.9+","Observability platform (Langfuse) or custom tracing backend","API key for observability platform (if using cloud service)"],"input_types":["evaluation events (start, metric_compute, complete)","LLM calls with prompts and responses","metric scores and metadata"],"output_types":["structured traces in observability platform","cost analytics and performance metrics","debugging information"],"categories":["automation-workflow","safety-moderation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"ragas__headline","uri":"capability://testing.quality.rag.evaluation.framework","name":"rag evaluation framework","description":"Ragas is an open-source evaluation framework designed specifically for assessing RAG pipelines, focusing on metrics like faithfulness, answer relevancy, and context precision, making it essential for quality measurement in LLM applications.","intents":["best RAG evaluation framework","RAG quality measurement tool","how to evaluate RAG pipelines","top frameworks for assessing LLM outputs","RAG metrics evaluation solutions"],"best_for":["developers working with RAG systems","researchers in LLM evaluation"],"limitations":[],"requires":[],"input_types":["questions","ground truth answers"],"output_types":["evaluation metrics","quality reports"],"categories":["testing-quality","rag-knowledge"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":64,"verified":false,"data_access_risk":"low","permissions":["Python 3.9+","API key for OpenAI, Anthropic, or compatible LLM provider","Dataset with question-answer pairs (ground truth answers optional for some metrics)","Retrieved context documents for each question","Understanding of Ragas Metric base class interface","LLM provider configuration (API key or local model)","Labeled evaluation data for metric validation","RunConfig object with LLM and embedding model configuration","MultiTurnSample or AgentSample schema with message history","LLM provider with sufficient context window for conversation history"],"failure_modes":["Metric quality depends on underlying LLM capability — weaker models produce less reliable scores","Requires API access to LLM provider (OpenAI, Anthropic, etc.) or local model deployment","No built-in human-in-the-loop validation — scores are LLM-generated, not ground truth","Evaluation latency scales linearly with number of samples and metrics (typically 5-30s per sample)","Custom metrics require Python code — no low-code metric builder UI","Metric training/alignment requires labeled data and iterative prompt tuning","Output parsing failures fall back to error recovery but may lose structured data","No built-in metric versioning — prompt changes require manual tracking","Configuration is centralized — may be inflexible for per-metric customization","Environment variable overrides require careful naming to avoid conflicts","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.7,"quality":0.9,"ecosystem":0.49999999999999994,"match_graph":0.25,"freshness":0.52,"weights":{"adoption":0.25,"quality":0.35,"ecosystem":0.15,"match_graph":0.2,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-06-17T09:51:05.295Z","last_scraped_at":null,"last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=ragas","compare_url":"https://unfragile.ai/compare?artifact=ragas"}},"signature":"KVKGTZGK7mM29/Y2JqkGqq+8ThPQxIXr+IA0MnEXhdk1M0PUyXRPA/QwOPMAhfCAYPVSqnSHkTawpqy0iVMADA==","signedAt":"2026-06-22T07:51:58.580Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/ragas","artifact":"https://unfragile.ai/ragas","verify":"https://unfragile.ai/api/v1/verify?slug=ragas","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}