{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"deepspeed","slug":"deepspeed","name":"DeepSpeed","type":"framework","url":"https://github.com/microsoft/DeepSpeed","page_url":"https://unfragile.ai/deepspeed","categories":["model-training"],"tags":[],"pricing":{"model":"free","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"deepspeed__cap_0","uri":"capability://automation.workflow.zero.optimizer.with.multi.stage.memory.partitioning","name":"zero optimizer with multi-stage memory partitioning","description":"Implements three-stage memory optimization (ZeRO-1, ZeRO-2, ZeRO-3) that partitions optimizer states, gradients, and model parameters across distributed GPUs/TPUs, reducing per-device memory footprint by 4-8x. Uses gradient checkpointing and activation partitioning to enable training of trillion-parameter models on commodity hardware clusters without model parallelism overhead.","intents":["Train models larger than single GPU memory by distributing state across multiple devices","Reduce per-GPU memory consumption to fit larger batch sizes and longer sequences","Scale training from 8 GPUs to thousands without rewriting model code","Achieve near-linear scaling efficiency on distributed clusters"],"best_for":["ML teams training large language models (7B+ parameters) on multi-GPU clusters","Researchers optimizing memory efficiency for constrained hardware budgets","Organizations scaling from single-node to distributed training without architectural refactoring"],"limitations":["ZeRO-3 introduces 10-20% training throughput overhead vs ZeRO-2 due to all-gather communication for parameter reconstruction","Requires NCCL 2.8+ and specific GPU interconnect topology (NVLink preferred for <100ms latency)","Communication overhead scales with cluster size; diminishing returns beyond 512 GPUs without gradient accumulation tuning","Incompatible with some custom CUDA kernels that assume contiguous parameter tensors"],"requires":["PyTorch 1.8+","CUDA 11.0+ or ROCm 4.0+","NCCL 2.8+ for multi-node communication","Distributed training harness (torch.distributed or Hugging Face Accelerate)","2+ GPUs (single-GPU training not supported)"],"input_types":["PyTorch model definitions (nn.Module)","Training loops with backward() calls","Optimizer state dictionaries"],"output_types":["Distributed training checkpoints with partitioned state","Memory usage metrics and communication profiles","Trained model weights (reconstructed on single device for inference)"],"categories":["automation-workflow","distributed-training"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"deepspeed__cap_1","uri":"capability://automation.workflow.deepspeed.inference.with.kernel.fusion.and.quantization","name":"deepspeed-inference with kernel fusion and quantization","description":"Optimizes inference serving through kernel fusion (combining attention, MLP, normalization into single CUDA kernels), INT8/FP16 quantization with calibration, and batch scheduling. Reduces latency by 2-10x and memory by 4-8x compared to standard PyTorch inference through operator-level optimization and graph-level transformations.","intents":["Deploy large models for real-time inference with sub-100ms latency requirements","Reduce inference memory footprint to fit multiple model replicas on single GPU","Serve models with dynamic batch sizes while maintaining throughput SLAs","Quantize models to INT8 without retraining while preserving accuracy"],"best_for":["Production ML teams serving LLMs with strict latency SLAs (<100ms p99)","Cost-conscious organizations optimizing GPU utilization per inference request","Edge deployment scenarios requiring memory-constrained inference"],"limitations":["Kernel fusion optimizations are GPU-architecture-specific (A100, H100, V100); limited support for older GPUs","Quantization calibration requires representative dataset; accuracy degradation of 1-3% typical for INT8 on large models","Dynamic shape inference not supported; requires fixed batch sizes or padding overhead","No built-in batching scheduler; requires external orchestration (vLLM, Ray Serve) for production workloads"],"requires":["CUDA 11.0+ with compute capability 7.0+ (V100 or newer)","PyTorch 1.8+","Triton or custom CUDA kernels for target model architecture","Calibration dataset for quantization (100-1000 samples typical)"],"input_types":["PyTorch model checkpoints","Quantization calibration data (text or embeddings)","Inference request batches (token sequences)"],"output_types":["Optimized inference engine (compiled CUDA kernels)","Quantized model weights (INT8 or FP16)","Latency/throughput metrics and memory profiles"],"categories":["automation-workflow","model-optimization"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"deepspeed__cap_10","uri":"capability://automation.workflow.training.profiling.and.performance.analysis","name":"training profiling and performance analysis","description":"Provides built-in profiling tools to analyze training performance including computation time, communication overhead, memory usage, and I/O bottlenecks. Generates detailed reports identifying optimization opportunities and bottlenecks in distributed training.","intents":["Identify performance bottlenecks in distributed training (compute vs communication vs I/O)","Analyze GPU utilization and memory efficiency","Optimize hyperparameters based on profiling data"],"best_for":["ML engineers optimizing training performance","Teams debugging slow training or poor scaling efficiency","Organizations analyzing cost per training step"],"limitations":["Profiling overhead adds 5-10% to training time; not suitable for production inference","Detailed profiling requires synchronization across distributed devices; can hide communication overlap","Reports are complex; requires expertise to interpret and act on findings","Profiling data can be large for long training runs; requires efficient storage"],"requires":["PyTorch 1.8+","NVIDIA Nsight Systems or similar profiling tools (optional)","Distributed training setup"],"input_types":["Training loop with profiling enabled","Configuration for profiling scope and granularity"],"output_types":["Profiling reports with timing breakdown","Memory usage analysis","Communication overhead metrics","Optimization recommendations"],"categories":["automation-workflow","model-training"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"deepspeed__cap_11","uri":"capability://automation.workflow.model.compression.through.pruning.and.distillation","name":"model compression through pruning and distillation","description":"Implements structured and unstructured pruning strategies to remove redundant weights, and knowledge distillation to transfer knowledge from large teacher models to smaller student models. Reduces model size by 2-10x and inference latency by 2-5x with minimal accuracy loss.","intents":["Reduce model size for deployment on resource-constrained devices","Accelerate inference by removing redundant parameters","Transfer knowledge from large models to smaller models for deployment"],"best_for":["Teams deploying models on edge devices or mobile","Organizations optimizing inference cost and latency","Research groups exploring model compression techniques"],"limitations":["Pruning requires fine-tuning to recover accuracy; adds training overhead","Unstructured pruning requires specialized hardware (sparse tensor support) for speedup; dense hardware sees minimal benefit","Distillation requires access to teacher model; adds training cost","Accuracy degradation of 1-5% typical; task-dependent and requires validation"],"requires":["PyTorch 1.8+","Pretrained model for pruning or teacher model for distillation","Fine-tuning dataset for accuracy recovery"],"input_types":["Pretrained model weights","Pruning configuration (sparsity ratio, layer selection)","Fine-tuning data for distillation"],"output_types":["Pruned model weights","Distilled student model","Accuracy and latency metrics"],"categories":["automation-workflow","model-optimization"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"deepspeed__cap_12","uri":"capability://automation.workflow.multi.gpu.training.with.automatic.device.placement","name":"multi-gpu training with automatic device placement","description":"Automatically places model layers and operations on appropriate GPUs based on memory and compute constraints. Handles device synchronization, gradient aggregation, and communication scheduling transparently to enable multi-GPU training with minimal code changes.","intents":["Train models on multiple GPUs without manual device placement","Automatically balance load across GPUs","Synchronize gradients and parameters across devices"],"best_for":["Teams new to distributed training seeking simple multi-GPU setup","Organizations with heterogeneous GPU clusters","Researchers prototyping models quickly"],"limitations":["Automatic placement may not be optimal for all models; manual placement often outperforms","Requires all GPUs to have similar memory; heterogeneous clusters may have suboptimal placement","Communication overhead scales with number of GPUs; diminishing returns beyond 8 GPUs without optimization","Debugging is harder due to automatic placement; errors can be difficult to trace"],"requires":["PyTorch 1.8+","2+ GPUs with CUDA support","torch.nn.DataParallel or torch.nn.parallel.DistributedDataParallel"],"input_types":["PyTorch model definition","Training data","GPU configuration"],"output_types":["Trained model weights","Training metrics"],"categories":["automation-workflow","model-training"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"deepspeed__cap_2","uri":"capability://automation.workflow.deepspeed.chat.with.rlhf.pipeline.orchestration","name":"deepspeed-chat with rlhf pipeline orchestration","description":"Implements end-to-end Reinforcement Learning from Human Feedback (RLHF) training pipeline with actor-critic architecture, reward model training, and policy optimization. Orchestrates four-model training loop (actor, critic, reward model, reference) with ZeRO optimization and automatic gradient accumulation scheduling to fit on limited GPU memory.","intents":["Fine-tune language models using human feedback without building custom RLHF infrastructure","Train reward models to score model outputs based on human preferences","Optimize policy models with PPO (Proximal Policy Optimization) while maintaining training stability","Scale RLHF training from single GPU to multi-node clusters"],"best_for":["ML teams building instruction-tuned models (ChatGPT-style) with limited RLHF expertise","Organizations with human feedback datasets (preference pairs) ready for training","Research groups experimenting with alignment techniques without infrastructure overhead"],"limitations":["Requires simultaneous training of 4 models (actor, critic, reward, reference); memory overhead 3-4x vs supervised fine-tuning","Reward model quality directly impacts policy optimization; poor reward models lead to reward hacking","PPO training is inherently unstable; requires careful hyperparameter tuning (learning rate, KL penalty) per dataset","Preference data collection and annotation is expensive; no built-in data collection or labeling tools"],"requires":["PyTorch 1.8+","Base language model (7B+ parameters recommended)","Human preference dataset (10k+ preference pairs typical)","8+ GPUs for reasonable training speed (single GPU training extremely slow)","Hugging Face Transformers library for model loading"],"input_types":["Pretrained language model checkpoint","Preference dataset (prompt, chosen_response, rejected_response tuples)","Hyperparameter configuration (learning rates, KL penalty, batch sizes)"],"output_types":["Fine-tuned actor model (policy)","Trained reward model weights","Training logs with reward/loss curves and KL divergence metrics"],"categories":["automation-workflow","model-training"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"deepspeed__cap_3","uri":"capability://automation.workflow.distributed.training.with.automatic.mixed.precision.and.gradient.accumulation","name":"distributed training with automatic mixed precision and gradient accumulation","description":"Provides automatic mixed precision (AMP) training with FP16 forward/backward passes and FP32 master weights, combined with gradient accumulation scheduling across distributed devices. Handles loss scaling, gradient clipping, and synchronization automatically to prevent numerical instability while reducing memory and compute by 2-3x.","intents":["Train models faster by using FP16 computation while maintaining FP32 numerical stability","Reduce per-GPU memory footprint by 40-50% through mixed precision without accuracy loss","Accumulate gradients across multiple batches to simulate larger effective batch sizes","Automatically synchronize gradients across distributed GPUs with minimal code changes"],"best_for":["Teams training large models on limited GPU memory budgets","Organizations seeking 2-3x training speedup with minimal code refactoring","Researchers experimenting with different batch sizes and learning rates"],"limitations":["FP16 training can cause loss spikes or divergence if loss scaling not tuned correctly; requires monitoring","Gradient accumulation increases training time per step by 10-20% due to synchronization overhead","Not all operations support FP16; some layers (normalization, loss computation) must stay in FP32","Requires GPU with Tensor Cores (V100+); older GPUs see minimal speedup"],"requires":["PyTorch 1.6+","NVIDIA GPU with compute capability 7.0+ (V100 or newer)","APEX library or native PyTorch AMP (torch.cuda.amp)","Distributed training harness (torch.distributed)"],"input_types":["PyTorch training loop with loss.backward() calls","Model parameters and optimizer state"],"output_types":["Trained model weights in FP32","Training metrics (loss, throughput, memory usage)"],"categories":["automation-workflow","model-training"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"deepspeed__cap_4","uri":"capability://automation.workflow.activation.checkpointing.with.selective.layer.recomputation","name":"activation checkpointing with selective layer recomputation","description":"Trades compute for memory by selectively recomputing activations during backward pass instead of storing them. Implements layer-wise checkpointing strategy that recomputes only expensive layers (attention, MLP) while keeping normalization activations in memory, reducing memory by 30-50% with <10% compute overhead.","intents":["Increase sequence length or batch size by reducing activation memory footprint","Train models with longer context windows without increasing GPU memory","Fit larger models on single GPU by trading compute for memory"],"best_for":["Teams training transformer models with long sequences (2K+ tokens)","Researchers experimenting with larger batch sizes on memory-constrained hardware","Organizations optimizing cost per training step"],"limitations":["Recomputation adds 5-15% training time overhead depending on layer selection","Requires careful tuning of which layers to checkpoint; wrong selection can increase memory usage","Not compatible with some custom CUDA kernels that assume persistent activations","Debugging is harder due to activations not being available during backward pass"],"requires":["PyTorch 1.8+","Understanding of model architecture to select checkpointing strategy","Sufficient compute to handle recomputation overhead"],"input_types":["PyTorch model with nn.Module layers","Training loop with backward() calls"],"output_types":["Memory usage reduction metrics","Training time overhead measurements"],"categories":["automation-workflow","model-training"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"deepspeed__cap_5","uri":"capability://automation.workflow.pipeline.parallelism.with.gpipe.style.stage.scheduling","name":"pipeline parallelism with gpipe-style stage scheduling","description":"Implements pipeline parallelism by splitting model layers across multiple GPUs and scheduling forward/backward passes in stages to maximize GPU utilization. Uses micro-batching and bubble minimization to reduce idle time, enabling training of models too large for single GPU with better scaling than naive pipeline approaches.","intents":["Train models larger than single GPU memory by splitting layers across devices","Achieve better GPU utilization than data parallelism alone for very large models","Reduce per-GPU memory footprint for models with billions of parameters"],"best_for":["Teams training extremely large models (100B+ parameters) requiring layer-level parallelism","Organizations with high-bandwidth GPU interconnects (NVLink) for efficient communication","Research groups exploring model parallelism strategies"],"limitations":["Pipeline bubbles reduce GPU utilization; typical utilization 60-80% vs 90%+ with data parallelism","Requires careful load balancing across stages; unbalanced stages create bottlenecks","Communication overhead between stages can dominate for small models or slow interconnects","Debugging is complex due to distributed execution across multiple GPUs","Incompatible with dynamic control flow (if statements, loops) in model forward pass"],"requires":["PyTorch 1.8+","4+ GPUs with high-bandwidth interconnect (NVLink preferred)","Model architecture amenable to layer-wise splitting (sequential layers)"],"input_types":["PyTorch model with sequential layer structure","Training configuration (number of pipeline stages, micro-batch size)"],"output_types":["Distributed model split across stages","Training metrics with pipeline utilization and bubble analysis"],"categories":["automation-workflow","model-training"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"deepspeed__cap_6","uri":"capability://automation.workflow.automatic.model.partitioning.and.load.balancing","name":"automatic model partitioning and load balancing","description":"Analyzes model architecture and computational graph to automatically partition layers across available GPUs, balancing compute and memory load. Uses heuristics based on layer FLOPs and parameter counts to minimize communication overhead while ensuring no single GPU becomes a bottleneck.","intents":["Automatically split large models across GPUs without manual partitioning","Balance computational load to prevent GPU bottlenecks","Minimize communication overhead between pipeline stages"],"best_for":["Teams without expertise in manual model partitioning","Organizations with heterogeneous GPU clusters requiring dynamic load balancing","Researchers prototyping different model architectures"],"limitations":["Heuristics may not be optimal for all model architectures; custom partitioning often outperforms automatic","Requires model to be expressible as computational graph; dynamic models not supported","Load balancing assumes uniform GPU performance; heterogeneous clusters may have suboptimal partitions","Rebalancing requires recompilation; not suitable for dynamic workloads"],"requires":["PyTorch 1.8+","Model architecture definition in standard format","Computational graph analysis tools"],"input_types":["PyTorch model definition","GPU cluster configuration (number of GPUs, memory per GPU)"],"output_types":["Partitioning strategy (which layers on which GPUs)","Load balancing metrics and communication analysis"],"categories":["automation-workflow","model-training"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"deepspeed__cap_7","uri":"capability://automation.workflow.gradient.compression.and.communication.optimization","name":"gradient compression and communication optimization","description":"Reduces communication overhead in distributed training through gradient compression (top-k sparsification, quantization), overlapping communication with computation, and hierarchical gradient aggregation. Reduces communication volume by 10-100x depending on compression ratio while maintaining convergence.","intents":["Reduce communication bottleneck in distributed training on slow interconnects","Train on multi-node clusters with limited bandwidth without sacrificing convergence","Overlap gradient communication with backward computation to hide latency"],"best_for":["Teams training on multi-node clusters with limited bandwidth (<100 Gbps)","Organizations optimizing training cost on cloud infrastructure with expensive bandwidth","Research groups exploring communication-efficient distributed training"],"limitations":["Gradient compression introduces quantization error; convergence may require learning rate adjustment","Top-k sparsification adds overhead for sparse gradient aggregation; benefits only visible at scale (8+ nodes)","Compression/decompression adds CPU overhead; GPU-based compression kernels required for efficiency","Convergence guarantees depend on compression ratio; aggressive compression (>100x) may cause divergence"],"requires":["PyTorch 1.8+","Multi-node distributed training setup","NCCL 2.8+ for efficient collective operations"],"input_types":["Gradient tensors from backward pass","Compression configuration (ratio, algorithm)"],"output_types":["Compressed gradients for communication","Communication volume reduction metrics"],"categories":["automation-workflow","model-training"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"deepspeed__cap_8","uri":"capability://automation.workflow.checkpoint.management.with.distributed.state.saving","name":"checkpoint management with distributed state saving","description":"Handles distributed checkpoint saving/loading for models trained with ZeRO, pipeline parallelism, or other distributed strategies. Automatically consolidates partitioned state across devices, manages checkpoint versioning, and supports incremental checkpointing to reduce I/O overhead.","intents":["Save and resume training for distributed models without manual state consolidation","Manage multiple checkpoint versions for experiment tracking","Reduce checkpoint I/O overhead through incremental saving"],"best_for":["Teams training large models requiring frequent checkpointing","Organizations managing long-running training jobs with fault tolerance requirements","Research groups experimenting with different training strategies"],"limitations":["Checkpoint consolidation requires temporary disk space equal to model size; can be bottleneck for very large models","Loading distributed checkpoints requires same number of GPUs as training; no automatic repartitioning","Incremental checkpointing adds complexity; requires careful management of checkpoint deltas","No built-in cloud storage integration; requires external tools for S3/GCS backup"],"requires":["PyTorch 1.8+","Sufficient disk space for checkpoint consolidation","Distributed training harness with state management"],"input_types":["Model state (parameters, optimizer state, training state)","Checkpoint configuration (save frequency, retention policy)"],"output_types":["Consolidated checkpoint files","Checkpoint metadata (timestamp, training step, metrics)"],"categories":["automation-workflow","model-training"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"deepspeed__cap_9","uri":"capability://code.generation.editing.custom.cuda.kernel.integration.and.optimization","name":"custom cuda kernel integration and optimization","description":"Provides framework for integrating custom CUDA kernels (attention, normalization, activation functions) into training pipeline with automatic gradient computation. Enables kernel fusion and operator-level optimization while maintaining compatibility with standard PyTorch autograd.","intents":["Integrate custom CUDA kernels for model-specific optimizations","Fuse multiple operations into single kernel to reduce memory bandwidth","Implement efficient attention patterns (FlashAttention-style) with automatic gradients"],"best_for":["ML engineers optimizing specific model architectures with custom kernels","Research teams implementing novel attention mechanisms or activation functions","Organizations requiring extreme performance optimization for production inference"],"limitations":["Requires CUDA programming expertise; steep learning curve for most ML engineers","Custom kernels are GPU-architecture-specific; requires separate implementations for different GPUs","Debugging custom kernels is difficult; errors can cause silent numerical issues","Maintenance burden increases with number of custom kernels; compatibility issues with PyTorch updates"],"requires":["CUDA 11.0+","CUDA C++ programming knowledge","PyTorch 1.8+ with custom extension support","GPU with compute capability 7.0+ (V100 or newer)"],"input_types":["CUDA kernel source code","Gradient computation definitions","Input tensors and hyperparameters"],"output_types":["Optimized kernel implementation","Gradient computation kernels","Performance benchmarks"],"categories":["code-generation-editing","model-optimization"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"deepspeed__headline","uri":"capability://model.training.deep.learning.optimization.framework.for.large.scale.model.training","name":"deep learning optimization framework for large-scale model training","description":"DeepSpeed is a powerful deep learning optimization library designed for training massive models with trillions of parameters efficiently, making it ideal for researchers and developers working on cutting-edge AI projects.","intents":["best deep learning optimization framework","deep learning framework for large models","how to train models with trillions of parameters","optimized serving for AI models","DeepSpeed vs other deep learning frameworks"],"best_for":["training large-scale AI models","optimized inference"],"limitations":[],"requires":["compatible hardware","Python environment"],"input_types":["model architectures","training datasets"],"output_types":["trained models","inference results"],"categories":["model-training"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":57,"verified":false,"data_access_risk":"high","permissions":["PyTorch 1.8+","CUDA 11.0+ or ROCm 4.0+","NCCL 2.8+ for multi-node communication","Distributed training harness (torch.distributed or Hugging Face Accelerate)","2+ GPUs (single-GPU training not supported)","CUDA 11.0+ with compute capability 7.0+ (V100 or newer)","Triton or custom CUDA kernels for target model architecture","Calibration dataset for quantization (100-1000 samples typical)","NVIDIA Nsight Systems or similar profiling tools (optional)","Distributed training setup"],"failure_modes":["ZeRO-3 introduces 10-20% training throughput overhead vs ZeRO-2 due to all-gather communication for parameter reconstruction","Requires NCCL 2.8+ and specific GPU interconnect topology (NVLink preferred for <100ms latency)","Communication overhead scales with cluster size; diminishing returns beyond 512 GPUs without gradient accumulation tuning","Incompatible with some custom CUDA kernels that assume contiguous parameter tensors","Kernel fusion optimizations are GPU-architecture-specific (A100, H100, V100); limited support for older GPUs","Quantization calibration requires representative dataset; accuracy degradation of 1-3% typical for INT8 on large models","Dynamic shape inference not supported; requires fixed batch sizes or padding overhead","No built-in batching scheduler; requires external orchestration (vLLM, Ray Serve) for production workloads","Profiling overhead adds 5-10% to training time; not suitable for production inference","Detailed profiling requires synchronization across distributed devices; can hide communication overlap","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.7,"quality":0.9,"ecosystem":0.39999999999999997,"match_graph":0.25,"freshness":0.52,"weights":{"adoption":0.3,"quality":0.2,"ecosystem":0.15,"match_graph":0.23,"freshness":0.12}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-06-17T09:51:04.690Z","last_scraped_at":null,"last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=deepspeed","compare_url":"https://unfragile.ai/compare?artifact=deepspeed"}},"signature":"UQeqP7m0gpWzWEHt7DaQzsC1lAeDaEbgv/Bm8EGnQQz8uRDGS4sAMD/XzYgRu6M5nBZWcZbRYl5iioOgmZJaCQ==","signedAt":"2026-06-22T02:44:51.637Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/deepspeed","artifact":"https://unfragile.ai/deepspeed","verify":"https://unfragile.ai/api/v1/verify?slug=deepspeed","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}