{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"hf-model-zai-org--cogvideox-5b","slug":"zai-org--cogvideox-5b","name":"CogVideoX-5b","type":"model","url":"https://huggingface.co/zai-org/CogVideoX-5b","page_url":"https://unfragile.ai/zai-org--cogvideox-5b","categories":["video-generation"],"tags":["diffusers","safetensors","cogvideox","video-generation","thudm","text-to-video","en","arxiv:2408.06072","license:other","diffusers:CogVideoXPipeline","region:us"],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"hf-model-zai-org--cogvideox-5b__cap_0","uri":"capability://image.visual.text.to.video.generation.with.diffusion.based.synthesis","name":"text-to-video generation with diffusion-based synthesis","description":"Generates short-form videos (typically 4-8 seconds) from natural language text prompts using a latent diffusion architecture. The model operates in a compressed latent space rather than pixel space, reducing computational overhead by ~8-16x compared to pixel-space diffusion. It employs a multi-stage denoising process where noise is iteratively removed from random latent tensors conditioned on text embeddings, producing coherent video frames with temporal consistency across the sequence.","intents":["generate short promotional or social media videos from text descriptions without manual filming","create visual storyboards or concept videos for creative projects based on written narratives","prototype video content ideas quickly for testing before investing in production","automate video asset creation for e-commerce, marketing, or educational content at scale"],"best_for":["content creators and marketers needing rapid video prototyping without production infrastructure","AI application developers building video generation features into larger platforms","researchers experimenting with diffusion-based video synthesis and temporal coherence"],"limitations":["Output limited to ~4-8 second videos due to memory constraints and training data; longer sequences require stitching or external composition","Temporal consistency degrades with complex multi-object interactions or rapid scene changes; single-subject or slow-motion prompts perform better","Inference latency typically 2-5 minutes on consumer GPUs (RTX 4090) or 10-30 minutes on CPU, making real-time or batch processing of large volumes impractical without distributed infrastructure","Quality sensitive to prompt engineering; vague or overly complex descriptions produce incoherent or distorted outputs","No built-in support for video editing, frame interpolation, or post-processing; output is raw diffusion result without refinement"],"requires":["Python 3.8+","PyTorch 2.0+ with CUDA 11.8+ for GPU acceleration (NVIDIA GPU with 8GB+ VRAM recommended; 24GB+ for optimal inference speed)","Hugging Face Transformers library (4.30+) for text tokenization and embedding","Diffusers library (0.24.0+) with CogVideoXPipeline implementation","~15-20GB disk space for model weights (safetensors format)","Hugging Face API token for model access (free tier available)"],"input_types":["text (natural language prompts, 10-200 tokens optimal)","optional: negative prompts (text) to guide generation away from unwanted content"],"output_types":["video (MP4 or WebM format, 1024x576 or 768x512 resolution, 8 fps, ~4-8 second duration)","raw tensor output (optional, for downstream processing or frame extraction)"],"categories":["image-visual","video-generation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-zai-org--cogvideox-5b__cap_1","uri":"capability://image.visual.prompt.conditioned.video.generation.with.text.embedding.alignment","name":"prompt-conditioned video generation with text embedding alignment","description":"Encodes natural language prompts into high-dimensional embeddings using a frozen CLIP or T5 text encoder, then conditions the diffusion process on these embeddings through cross-attention layers. The model learns to align semantic meaning from text with visual features in the latent video space, allowing fine-grained control over video content, style, and composition through prompt variation. This approach decouples language understanding from video synthesis, enabling transfer learning from large text-image datasets.","intents":["control video generation output by iterating on text prompts without retraining or fine-tuning the model","generate multiple video variations from a single prompt by sampling different random seeds","combine multiple concepts or styles in a single prompt (e.g., 'cinematic shot of a cat dancing in a cyberpunk city')","integrate video generation into chatbot or conversational AI systems where users describe desired videos in natural language"],"best_for":["non-technical content creators who prefer text-based control over technical parameters","product teams building user-facing video generation features with intuitive interfaces","researchers studying prompt-to-video alignment and semantic grounding in generative models"],"limitations":["Prompt understanding limited by text encoder's training data; domain-specific or highly technical descriptions may be misinterpreted","No explicit control over camera movement, object placement, or timing; these emerge implicitly from prompt semantics","Prompt sensitivity creates reproducibility challenges; minor wording changes can produce drastically different outputs","No support for image-to-video or multi-modal conditioning; text is the only input modality"],"requires":["Text encoder weights (CLIP or T5) loaded alongside video model; adds ~500MB-2GB memory overhead","Tokenizer compatible with chosen text encoder (typically included in Transformers library)","Prompt engineering knowledge or user education to achieve desired outputs"],"input_types":["text (English language prompts, 10-200 tokens)"],"output_types":["video (conditioned on prompt semantics)"],"categories":["image-visual","text-generation-language"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-zai-org--cogvideox-5b__cap_10","uri":"capability://image.visual.negative.prompt.conditioning.for.artifact.avoidance","name":"negative prompt conditioning for artifact avoidance","description":"Allows users to specify negative prompts (undesired content) that guide generation away from certain visual elements or styles. The model encodes negative prompts similarly to positive prompts and uses them during classifier-free guidance to suppress unwanted features. This is implemented by computing predictions conditioned on both positive and negative prompts, then interpolating in a direction that increases positive prompt alignment while decreasing negative prompt alignment.","intents":["avoid common artifacts or undesired visual elements (e.g., 'avoid distorted faces, avoid blurry motion')","exclude specific styles or aesthetics from generation (e.g., 'avoid cartoon style, avoid watermarks')","improve generation quality by explicitly specifying what not to generate"],"best_for":["content creators who know what they don't want and can articulate it clearly","quality-critical applications where artifact avoidance is important","iterative refinement workflows where negative prompts help converge to desired output"],"limitations":["Negative prompt effectiveness depends on prompt clarity and model's understanding of concepts; vague negative prompts may have minimal effect","Negative prompts add computational overhead (similar to guidance scale); each negative prompt requires additional forward pass","No guarantee that negative prompt will be respected; model may still generate unwanted content if it's strongly implied by positive prompt","Negative prompt semantics can be counterintuitive; 'avoid blurry' may not work as expected if model interprets it differently"],"requires":["Negative prompt text (string)","Model support for negative prompt conditioning (typically built into CogVideoXPipeline)"],"input_types":["negative_prompt parameter (string)"],"output_types":["video with suppressed negative prompt features"],"categories":["image-visual","safety-moderation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-zai-org--cogvideox-5b__cap_2","uri":"capability://image.visual.latent.space.video.diffusion.with.iterative.denoising","name":"latent space video diffusion with iterative denoising","description":"Performs iterative denoising in a compressed latent space (typically 4-8x compression vs pixel space) using a U-Net or Transformer-based denoiser that predicts noise to subtract at each timestep. The process starts with random Gaussian noise and progressively refines it over 20-50 denoising steps, with each step conditioned on text embeddings and previous frame context. This approach reduces memory usage and computation time while maintaining visual quality through learned latent representations that capture semantic video structure.","intents":["generate videos efficiently on resource-constrained hardware (consumer GPUs, edge devices) compared to pixel-space diffusion","control generation quality vs speed tradeoff by adjusting number of denoising steps (fewer steps = faster but lower quality)","integrate video generation into real-time or near-real-time applications where inference latency is critical"],"best_for":["developers building video generation features with strict latency or cost constraints","teams deploying models on edge devices or serverless infrastructure with limited GPU memory","researchers optimizing diffusion efficiency through latent space design and step scheduling"],"limitations":["Latent space compression introduces artifacts or loss of fine details; some visual quality is traded for efficiency","Denoising step count (typically 20-50) creates linear latency scaling; reducing steps below 15 produces visible quality degradation","Latent space is model-specific and not directly interpretable; debugging or fine-tuning requires understanding learned representations","No adaptive step scheduling; same number of steps used regardless of prompt complexity or desired output quality"],"requires":["GPU with 8GB+ VRAM for inference (16GB+ recommended for batch processing)","Understanding of diffusion process and timestep scheduling for advanced tuning","Scheduler implementation (e.g., DDPM, DPM-Solver) compatible with model architecture"],"input_types":["noise tensor (random Gaussian, shape matching latent video dimensions)","text embeddings (from text encoder)","timestep index (integer, 0-999 or model-specific range)"],"output_types":["denoised latent tensor (same shape as input noise)","decoded video (after VAE decoding from latent to pixel space)"],"categories":["image-visual","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-zai-org--cogvideox-5b__cap_3","uri":"capability://image.visual.temporal.consistency.modeling.with.frame.to.frame.attention","name":"temporal consistency modeling with frame-to-frame attention","description":"Maintains visual coherence across video frames by incorporating temporal attention mechanisms that allow each frame's generation to depend on previously generated frames. The model uses causal masking in attention layers to ensure frames are generated in sequence, with each frame conditioned on the accumulated context of prior frames. This prevents temporal flickering, jitter, and inconsistent object appearance across the video duration, producing smooth, coherent motion.","intents":["generate videos with smooth, natural motion and consistent object appearance across frames","avoid common artifacts like flickering, jitter, or sudden object teleportation between frames","enable autoregressive video extension where generated frames can be used as context for generating additional frames"],"best_for":["applications requiring high-quality, flicker-free video output (marketing, professional content)","researchers studying temporal coherence in generative models and video synthesis","developers building video extension or frame interpolation features"],"limitations":["Temporal consistency degrades with complex multi-object interactions or rapid scene changes; model performs better on single-subject or slow-motion content","Causal masking prevents bidirectional temporal context; future frames cannot influence past frames, limiting some editing use cases","Temporal attention adds computational overhead (~20-30% vs spatial-only attention); longer videos require more memory and computation","No explicit control over motion speed or direction; temporal dynamics emerge implicitly from prompt and learned patterns"],"requires":["Attention mechanism implementation supporting causal masking (typically built into Transformers library)","Sufficient GPU memory to store attention matrices for full video sequence (scales quadratically with frame count)"],"input_types":["latent video tensor (sequence of frame latents)","text embeddings (applied to all frames uniformly)"],"output_types":["temporally-coherent video (with smooth motion and consistent object appearance)"],"categories":["image-visual","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-zai-org--cogvideox-5b__cap_4","uri":"capability://image.visual.multi.resolution.video.generation.with.adaptive.latent.scaling","name":"multi-resolution video generation with adaptive latent scaling","description":"Generates videos at multiple resolutions (e.g., 768x512, 1024x576) by adapting the latent space dimensions and decoder output size without retraining the core diffusion model. The model uses resolution-aware embeddings or positional encodings to condition generation on target resolution, allowing a single model to produce outputs at different quality/speed tradeoffs. Lower resolutions generate faster with lower memory overhead, while higher resolutions produce more detailed outputs.","intents":["generate videos at different resolutions for different use cases (thumbnails, social media, high-quality exports) without multiple models","balance quality vs latency by selecting appropriate resolution for deployment constraints","support variable output formats (16:9, 9:16, 1:1 aspect ratios) from a single model"],"best_for":["platforms serving diverse user needs (mobile vs desktop, social media vs broadcast)","cost-conscious deployments where model size is constrained but flexibility is needed","developers building adaptive video generation systems that adjust resolution based on available resources"],"limitations":["Quality at lower resolutions may be noticeably degraded; model trained primarily at higher resolution","Aspect ratio support limited to those seen during training; unusual aspect ratios may produce distorted outputs","Resolution scaling adds complexity to model architecture and training; may introduce resolution-specific artifacts","No explicit control over aspect ratio or resolution; must be specified at inference time and affects generation quality"],"requires":["Resolution-aware model variant or checkpoint supporting target resolution","Decoder capable of upsampling/downsampling latent space to target resolution"],"input_types":["target resolution specification (e.g., 768x512, 1024x576)","text prompt","optional: aspect ratio hint"],"output_types":["video at specified resolution"],"categories":["image-visual","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-zai-org--cogvideox-5b__cap_5","uri":"capability://image.visual.batch.video.generation.with.parallel.inference","name":"batch video generation with parallel inference","description":"Processes multiple text prompts simultaneously through the diffusion pipeline, leveraging GPU parallelization to generate multiple videos in a single forward pass. The model batches prompts into a single tensor, processes them through the text encoder and diffusion denoiser in parallel, and decodes the resulting latents into separate videos. This approach reduces per-video overhead and enables efficient large-scale video generation for content platforms or batch processing workflows.","intents":["generate multiple videos from different prompts efficiently for content platforms or marketing campaigns","process large batches of video generation requests with lower per-video latency than sequential generation","maximize GPU utilization by keeping hardware busy with multiple concurrent generations"],"best_for":["content platforms or services generating videos at scale (e.g., e-commerce, marketing automation)","batch processing workflows where latency is less critical than throughput","teams with access to high-end GPUs (A100, H100) where batch processing is cost-effective"],"limitations":["Batch size limited by GPU memory; typical batch size 1-4 on consumer GPUs (RTX 4090), 4-8 on enterprise GPUs (A100)","All videos in batch must use same resolution and duration; mixed-resolution batches require padding or separate passes","Batch processing adds latency for first video (waiting for batch to fill) vs single-video generation; not suitable for real-time, single-request scenarios","Memory overhead scales linearly with batch size; OOM errors possible if batch size exceeds GPU capacity"],"requires":["GPU with sufficient VRAM for batch size (8GB per video + overhead; e.g., 24GB+ for batch size 2-3)","Batch processing implementation in inference pipeline (typically handled by Diffusers library)","Prompt list or queue management system for collecting batch requests"],"input_types":["list of text prompts (batch size 1-8 typical)","optional: batch-level parameters (resolution, duration, seed)"],"output_types":["list of videos (one per prompt in batch)"],"categories":["image-visual","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-zai-org--cogvideox-5b__cap_6","uri":"capability://data.processing.analysis.safetensors.model.format.loading.with.memory.mapped.inference","name":"safetensors model format loading with memory-mapped inference","description":"Loads model weights from the safetensors format (a safer, faster alternative to pickle-based PyTorch checkpoints) using memory-mapped file access, enabling efficient loading and inference without loading entire model into memory upfront. Safetensors provides type safety, faster deserialization, and protection against arbitrary code execution compared to traditional PyTorch format. Memory mapping allows GPU to access weights on-demand, reducing peak memory usage during model loading.","intents":["load large model weights (15-20GB) safely and efficiently without security risks from pickle deserialization","reduce model loading time from minutes to seconds through optimized safetensors format","enable inference on memory-constrained systems by avoiding full model materialization in RAM"],"best_for":["security-conscious deployments where arbitrary code execution during model loading is a concern","production systems where model loading time impacts user experience or service availability","edge deployments with limited RAM where memory-mapped access is necessary"],"limitations":["Safetensors format requires explicit conversion from PyTorch checkpoints; not all models available in this format","Memory mapping adds slight latency for first access to each weight tensor; not suitable for ultra-low-latency inference","Requires Hugging Face safetensors library (0.3.1+); not compatible with older PyTorch versions","Memory-mapped weights may cause page faults during inference, introducing unpredictable latency spikes"],"requires":["safetensors library (0.3.1+)","Model weights in safetensors format (.safetensors file)","Sufficient disk space for model file (15-20GB for CogVideoX-5b)"],"input_types":["safetensors model file path"],"output_types":["loaded model weights (in GPU or CPU memory as needed)"],"categories":["data-processing-analysis","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-zai-org--cogvideox-5b__cap_7","uri":"capability://tool.use.integration.diffusers.pipeline.integration.with.standardized.inference.api","name":"diffusers pipeline integration with standardized inference api","description":"Implements the CogVideoXPipeline class within the Hugging Face Diffusers library, providing a standardized, high-level API for video generation that abstracts away low-level diffusion details. The pipeline handles text encoding, noise scheduling, denoising loop, VAE decoding, and output formatting in a single unified interface. This integration enables seamless composition with other Diffusers components (schedulers, safety filters, memory optimizations) and ensures compatibility with the broader Hugging Face ecosystem.","intents":["use video generation with minimal boilerplate code through high-level pipeline API","swap schedulers, safety filters, or other components without modifying core generation logic","integrate video generation into existing Hugging Face workflows and applications"],"best_for":["developers familiar with Hugging Face Diffusers ecosystem who want minimal learning curve","teams building multi-modal applications combining text, image, and video generation","researchers experimenting with different diffusion schedulers or safety mechanisms"],"limitations":["Pipeline abstraction adds ~50-100ms overhead per generation vs direct model calls; not suitable for ultra-low-latency applications","Limited customization of intermediate steps; advanced use cases may require subclassing or direct model access","Pipeline API stability depends on Diffusers library versioning; breaking changes possible across major versions","Documentation and examples may lag behind core Diffusers development"],"requires":["Diffusers library (0.24.0+)","Transformers library (4.30+) for text encoding","PyTorch 2.0+ with CUDA support"],"input_types":["text prompt (string)","optional: negative prompt, num_inference_steps, guidance_scale, height, width, num_frames"],"output_types":["video tensor or PIL Image list (depending on output_type parameter)"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-zai-org--cogvideox-5b__cap_8","uri":"capability://image.visual.guidance.scaled.conditional.generation.with.classifier.free.guidance","name":"guidance-scaled conditional generation with classifier-free guidance","description":"Implements classifier-free guidance (CFG) to strengthen the influence of text conditioning on video generation by interpolating between unconditional and conditional denoising predictions. During inference, the model generates predictions both with and without text conditioning, then blends them using a guidance scale parameter (typically 7.5-15.0). Higher guidance scales produce videos more closely aligned to the prompt but may reduce diversity and introduce artifacts; lower scales produce more creative but less controlled outputs.","intents":["control the strength of prompt adherence vs creative variation through guidance scale parameter","improve video quality and prompt alignment by increasing guidance scale for important generations","reduce computational cost by lowering guidance scale for exploratory or draft generations"],"best_for":["applications requiring fine-grained control over prompt adherence vs creativity","iterative content creation workflows where users adjust guidance scale based on results","research into prompt-to-video alignment and the role of guidance in generative models"],"limitations":["Guidance scale is a hyperparameter requiring tuning; optimal value varies by prompt and desired output style","High guidance scales (>15) can produce artifacts, oversaturation, or unnatural motion; requires careful tuning","Classifier-free guidance requires generating both conditional and unconditional predictions, doubling inference cost","No principled way to select guidance scale; users must experiment or rely on heuristics"],"requires":["Model trained with classifier-free guidance (unconditional predictions must be available)","Guidance scale parameter (float, typically 7.5-15.0)"],"input_types":["text prompt","guidance_scale parameter (float)"],"output_types":["video (with guidance-scaled conditioning)"],"categories":["image-visual","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-zai-org--cogvideox-5b__cap_9","uri":"capability://automation.workflow.seed.based.reproducible.generation.with.deterministic.sampling","name":"seed-based reproducible generation with deterministic sampling","description":"Enables reproducible video generation by seeding the random number generator with a fixed value, ensuring identical videos are produced for the same prompt and seed. The implementation uses PyTorch's random seed management to control noise initialization and all stochastic operations during diffusion. This allows users to reproduce specific videos, compare variations across different parameters, and debug generation issues deterministically.","intents":["reproduce specific video outputs for testing, debugging, or sharing with collaborators","generate multiple variations of a prompt by iterating seed values while keeping other parameters fixed","ensure consistent results across different hardware or software versions for reproducibility"],"best_for":["research and development workflows requiring reproducible results","quality assurance and testing where specific outputs must be verified","collaborative workflows where team members need to reproduce each other's results"],"limitations":["Reproducibility only guaranteed within same PyTorch version, CUDA version, and hardware; different environments may produce slightly different results due to floating-point non-determinism","Seed-based reproducibility requires disabling all non-deterministic operations; some optimizations (e.g., cuDNN benchmarking) must be disabled","Seed space is large (2^32 or 2^64) but finite; seed collisions are theoretically possible but practically negligible","No guarantee of reproducibility across different model versions or checkpoints"],"requires":["Seed parameter (integer, typically 0-2^32-1)","PyTorch with deterministic mode enabled (torch.use_deterministic_algorithms(True))","CUDA 11.0+ for deterministic GPU operations"],"input_types":["seed value (integer)"],"output_types":["deterministically-generated video (identical for same seed and prompt)"],"categories":["automation-workflow","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":41,"verified":false,"data_access_risk":"low","permissions":["Python 3.8+","PyTorch 2.0+ with CUDA 11.8+ for GPU acceleration (NVIDIA GPU with 8GB+ VRAM recommended; 24GB+ for optimal inference speed)","Hugging Face Transformers library (4.30+) for text tokenization and embedding","Diffusers library (0.24.0+) with CogVideoXPipeline implementation","~15-20GB disk space for model weights (safetensors format)","Hugging Face API token for model access (free tier available)","Text encoder weights (CLIP or T5) loaded alongside video model; adds ~500MB-2GB memory overhead","Tokenizer compatible with chosen text encoder (typically included in Transformers library)","Prompt engineering knowledge or user education to achieve desired outputs","Negative prompt text (string)"],"failure_modes":["Output limited to ~4-8 second videos due to memory constraints and training data; longer sequences require stitching or external composition","Temporal consistency degrades with complex multi-object interactions or rapid scene changes; single-subject or slow-motion prompts perform better","Inference latency typically 2-5 minutes on consumer GPUs (RTX 4090) or 10-30 minutes on CPU, making real-time or batch processing of large volumes impractical without distributed infrastructure","Quality sensitive to prompt engineering; vague or overly complex descriptions produce incoherent or distorted outputs","No built-in support for video editing, frame interpolation, or post-processing; output is raw diffusion result without refinement","Prompt understanding limited by text encoder's training data; domain-specific or highly technical descriptions may be misinterpreted","No explicit control over camera movement, object placement, or timing; these emerge implicitly from prompt semantics","Prompt sensitivity creates reproducibility challenges; minor wording changes can produce drastically different outputs","No support for image-to-video or multi-modal conditioning; text is the only input modality","Negative prompt effectiveness depends on prompt clarity and model's understanding of concepts; vague negative prompts may have minimal effect","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.5218473463536445,"quality":0.32,"ecosystem":0.5000000000000001,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.35,"quality":0.2,"ecosystem":0.1,"match_graph":0.3,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:22.766Z","last_scraped_at":"2026-05-03T14:22:52.093Z","last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":39484,"model_likes":672}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=zai-org--cogvideox-5b","compare_url":"https://unfragile.ai/compare?artifact=zai-org--cogvideox-5b"}},"signature":"s8mCiIDsFl5IZRBdER4lY8qBYLlSz07vgUMmb5xwKRQXZOLfWy2PvQEJlEF1wuQbl71koSj8j3AvwkfOzrdBBQ==","signedAt":"2026-06-21T09:19:19.837Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/zai-org--cogvideox-5b","artifact":"https://unfragile.ai/zai-org--cogvideox-5b","verify":"https://unfragile.ai/api/v1/verify?slug=zai-org--cogvideox-5b","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}