{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"hf-model-shi-labs--oneformer_coco_swin_large","slug":"shi-labs--oneformer_coco_swin_large","name":"oneformer_coco_swin_large","type":"model","url":"https://huggingface.co/shi-labs/oneformer_coco_swin_large","page_url":"https://unfragile.ai/shi-labs--oneformer_coco_swin_large","categories":["image-generation"],"tags":["transformers","pytorch","oneformer","vision","image-segmentation","dataset:ydshieh/coco_dataset_script","arxiv:2211.06220","license:mit","endpoints_compatible","deploy:azure","region:us"],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"hf-model-shi-labs--oneformer_coco_swin_large__cap_0","uri":"capability://image.visual.unified.image.segmentation.with.task.conditioning","name":"unified-image-segmentation-with-task-conditioning","description":"Performs semantic, instance, and panoptic segmentation in a single unified model architecture using task-conditioned prompting. The model uses a Swin Transformer backbone with a unified segmentation head that accepts a task token (semantic/instance/panoptic) as input conditioning, enabling dynamic task selection at inference time without model switching. This eliminates the need for separate task-specific models while maintaining competitive performance across all three segmentation paradigms through a shared feature extraction and decoding pathway.","intents":["I need to segment images for multiple tasks (semantic, instance, panoptic) without maintaining separate models","I want to dynamically switch between segmentation tasks at inference time based on application requirements","I need a single model that handles both stuff (background) and thing (object) classes efficiently","I want to reduce model deployment complexity by using one unified architecture instead of three task-specific models"],"best_for":["computer vision teams building multi-task segmentation pipelines","researchers prototyping unified vision architectures","production systems with memory/latency constraints requiring single-model deployment","edge deployment scenarios where model size and inference speed are critical"],"limitations":["Task conditioning adds ~15-25ms latency per inference compared to task-specific models due to additional prompt encoding","Performance on panoptic segmentation is ~2-3% lower than specialized panoptic-only models (Mask2Former) on COCO benchmark","Requires explicit task token input — cannot auto-detect optimal task from image content","Training convergence is slower than single-task models due to multi-task learning complexity","Limited to COCO dataset distribution — generalization to domain-specific segmentation tasks not validated"],"requires":["PyTorch 1.9+","transformers library 4.25+","CUDA 11.0+ for GPU inference (CPU inference supported but slow)","minimum 8GB GPU memory for batch_size=1 inference with large variant","Python 3.7+"],"input_types":["RGB images (3-channel, arbitrary resolution)","task token string: 'semantic', 'instance', or 'panoptic'","image tensors normalized to ImageNet statistics (mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])"],"output_types":["segmentation masks (H×W integer tensor with class IDs)","instance IDs (for instance/panoptic tasks)","class probability maps (optional, H×W×num_classes float tensor)","panoptic segmentation IDs combining semantic and instance information"],"categories":["image-visual","vision-transformers"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-shi-labs--oneformer_coco_swin_large__cap_1","uri":"capability://image.visual.swin.transformer.backbone.feature.extraction","name":"swin-transformer-backbone-feature-extraction","description":"Extracts multi-scale hierarchical image features using a Swin Transformer backbone with shifted window attention mechanisms. The backbone operates in 4 stages (C1-C4) producing feature maps at 4×, 8×, 16×, and 32× downsampling ratios. Shifted window attention reduces computational complexity from O(n²) to O(n log n) by partitioning feature maps into local windows and shifting window positions between layers, enabling efficient processing of high-resolution images while maintaining global receptive fields through cross-window connections.","intents":["I need efficient multi-scale feature extraction for segmentation without quadratic attention complexity","I want to process high-resolution images (2K+) with reasonable memory footprint","I need hierarchical features at multiple scales for both semantic and instance-level predictions","I want to leverage pre-trained vision transformer weights from ImageNet-21K for transfer learning"],"best_for":["teams processing high-resolution medical or satellite imagery","applications requiring real-time inference on edge devices","researchers studying efficient vision transformer architectures","production pipelines where inference latency must stay under 100ms"],"limitations":["Shifted window attention introduces ~10-15% computational overhead compared to standard attention due to window shifting and masking operations","Feature extraction is resolution-dependent — very high resolutions (>2048×2048) require gradient checkpointing to fit in memory","Swin backbone is less interpretable than CNN-based backbones due to window-based attention patterns","Pre-trained weights are ImageNet-biased — performance may degrade on out-of-distribution domains (medical, satellite) without fine-tuning"],"requires":["PyTorch 1.9+","timm library 0.4.12+ for backbone implementation","CUDA 11.0+ for efficient attention computation","minimum 6GB GPU memory for 512×512 image processing","transformers 4.25+"],"input_types":["RGB images at arbitrary resolution (typically 512×512 to 2048×2048)","normalized image tensors (ImageNet normalization applied)"],"output_types":["4 hierarchical feature maps (C1, C2, C3, C4) at 4×, 8×, 16×, 32× downsampling","feature dimensions: C1=96, C2=192, C3=384, C4=768 channels for large variant","float32 tensors ready for decoder input"],"categories":["image-visual","vision-transformers"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-shi-labs--oneformer_coco_swin_large__cap_2","uri":"capability://image.visual.multi.scale.decoder.with.cross.attention.fusion","name":"multi-scale-decoder-with-cross-attention-fusion","description":"Decodes multi-scale backbone features into segmentation predictions using a cross-attention based decoder that progressively fuses features from all 4 backbone stages. The decoder uses learnable query embeddings that attend to backbone features at each scale through cross-attention mechanisms, enabling selective feature aggregation and adaptive weighting of information from different scales. This approach avoids simple concatenation by learning task-aware feature combinations that emphasize relevant scales for each prediction location.","intents":["I need to fuse multi-scale features intelligently rather than through simple concatenation","I want the model to learn which scales are most relevant for different semantic regions","I need to handle both small objects (requiring high-resolution features) and large objects (requiring low-resolution context)","I want to reduce decoder parameters while maintaining expressiveness through attention-based fusion"],"best_for":["applications with objects at highly variable scales (small vehicles + large buildings)","memory-constrained deployments where parameter efficiency matters","teams requiring interpretable feature fusion (attention weights reveal scale importance)","research exploring attention-based decoder architectures"],"limitations":["Cross-attention adds ~20-30ms latency per inference compared to CNN decoders due to O(n²) attention computation","Decoder requires careful tuning of attention head counts and hidden dimensions — suboptimal configurations lead to 5-10% accuracy drops","Attention mechanisms are less stable during training — requires gradient clipping and careful learning rate scheduling","Multi-scale fusion increases memory consumption during training by ~30% compared to single-scale decoders"],"requires":["PyTorch 1.9+ with autograd support","transformers 4.25+ for attention implementations","CUDA 11.0+ for efficient attention kernels","minimum 8GB GPU memory for training with batch_size=2","gradient checkpointing enabled for memory efficiency"],"input_types":["4 hierarchical feature maps from Swin backbone (C1-C4)","task token embedding (semantic/instance/panoptic)","optional: mask queries for instance segmentation"],"output_types":["per-pixel class predictions (H/4 × W/4 × num_classes)","instance masks (for instance/panoptic tasks)","attention weight maps showing scale importance per location","upsampled predictions to original image resolution"],"categories":["image-visual","attention-mechanisms"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-shi-labs--oneformer_coco_swin_large__cap_3","uri":"capability://image.visual.task.conditioned.prediction.head.with.dynamic.routing","name":"task-conditioned-prediction-head-with-dynamic-routing","description":"Generates task-specific segmentation predictions (semantic/instance/panoptic) from decoded features using a task-conditioned prediction head that dynamically routes computation based on the input task token. The head uses separate prediction branches for semantic segmentation (per-pixel class logits) and instance segmentation (mask logits + class predictions), with task conditioning controlling which branches are active and how features are processed. For panoptic segmentation, both branches execute and their outputs are combined through learned fusion weights that depend on the task token.","intents":["I need to switch between segmentation tasks (semantic/instance/panoptic) at inference time without reloading models","I want to share computation between tasks where possible while maintaining task-specific optimizations","I need to generate both per-pixel class predictions and instance-level masks from a single forward pass","I want to avoid maintaining separate prediction heads for each task"],"best_for":["multi-task vision systems requiring dynamic task selection","applications where task requirements change per-image or per-batch","production systems optimizing for model size and inference speed","research exploring task-agnostic vision architectures"],"limitations":["Task conditioning adds ~5-10ms overhead per inference due to task token embedding and routing logic","Panoptic segmentation requires running both semantic and instance branches, increasing latency by ~40% compared to semantic-only","Task token must be specified explicitly — no automatic task detection from image content","Cross-task interference during training can reduce per-task performance by 2-3% compared to single-task models","Instance branch requires NMS post-processing which adds variable latency (50-200ms depending on object density)"],"requires":["PyTorch 1.9+","transformers 4.25+","CUDA 11.0+ for efficient routing operations","minimum 6GB GPU memory","Python 3.7+"],"input_types":["decoded feature maps from multi-scale decoder","task token: 'semantic', 'instance', or 'panoptic' (string or integer ID)","optional: mask queries for instance branch"],"output_types":["semantic segmentation: (H/4 × W/4 × 133) logits for COCO classes","instance segmentation: (num_instances × H/4 × W/4) mask logits + (num_instances × 133) class logits","panoptic segmentation: (H/4 × W/4) combined semantic+instance IDs","upsampled predictions to original image resolution via bilinear interpolation"],"categories":["image-visual","conditional-computation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-shi-labs--oneformer_coco_swin_large__cap_4","uri":"capability://image.visual.coco.dataset.pretraining.with.133.class.vocabulary","name":"coco-dataset-pretraining-with-133-class-vocabulary","description":"Provides pre-trained weights optimized for COCO dataset segmentation with a 133-class vocabulary covering 80 thing classes (objects) and 53 stuff classes (background regions). The model was trained on COCO 2017 train split (118K images) using multi-task learning across semantic, instance, and panoptic segmentation objectives. Pre-training uses a combination of cross-entropy loss for semantic predictions and dice loss for instance masks, with class-balanced sampling to handle long-tail class distributions in COCO.","intents":["I want to use pre-trained weights for COCO-compatible segmentation tasks without training from scratch","I need a model that understands COCO's 80 object classes and 53 background categories","I want to fine-tune on custom datasets while leveraging COCO pre-training","I need competitive baseline performance on COCO benchmark without model training"],"best_for":["teams building COCO-based segmentation systems (autonomous driving, robotics, surveillance)","researchers benchmarking against COCO leaderboard","practitioners fine-tuning on custom datasets with COCO-like class distributions","production systems requiring immediate deployment without training infrastructure"],"limitations":["Pre-training is COCO-specific — performance degrades significantly on out-of-distribution datasets (medical, satellite, industrial) without fine-tuning","133-class vocabulary is fixed — adding new classes requires fine-tuning or retraining","COCO dataset bias toward natural images means poor performance on synthetic, medical, or specialized imagery","Class imbalance in COCO (person class dominates) can cause suboptimal performance on rare classes without re-weighting during fine-tuning","Panoptic segmentation performance on COCO is ~2-3% lower than specialized panoptic models due to multi-task learning trade-offs"],"requires":["PyTorch 1.9+","transformers 4.25+","CUDA 11.0+ for inference (CPU inference possible but slow)","minimum 6GB GPU memory for inference","COCO dataset format compatibility for fine-tuning"],"input_types":["RGB images in COCO format (arbitrary resolution, typically 512-1024px)","task token: 'semantic', 'instance', or 'panoptic'"],"output_types":["segmentation predictions with COCO class IDs (0-132)","instance IDs for instance/panoptic tasks","confidence scores per class"],"categories":["image-visual","transfer-learning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-shi-labs--oneformer_coco_swin_large__cap_5","uri":"capability://image.visual.efficient.inference.with.mixed.precision.support","name":"efficient-inference-with-mixed-precision-support","description":"Supports mixed-precision inference (FP16/BF16) to reduce memory consumption and latency while maintaining accuracy. The model can run in FP32 (full precision) for maximum accuracy or FP16 (half precision) for 2× memory reduction and 1.5-2× speedup on NVIDIA GPUs with Tensor Cores. BF16 precision is supported on newer hardware (A100, H100) for better numerical stability than FP16. Automatic mixed precision (AMP) can be enabled to selectively cast operations to lower precision while keeping numerically sensitive operations in FP32.","intents":["I need to reduce GPU memory consumption to fit larger batch sizes or higher resolution images","I want to speed up inference for real-time applications without significant accuracy loss","I need to deploy on resource-constrained hardware (edge GPUs, mobile devices)","I want to process multiple images in parallel with limited VRAM"],"best_for":["production systems with strict latency requirements (<50ms per image)","edge deployment on NVIDIA Jetson or similar platforms","batch processing pipelines where throughput matters more than per-image latency","cost-sensitive cloud deployments where GPU hours are expensive"],"limitations":["FP16 precision can cause numerical instability in attention mechanisms — requires careful gradient scaling during training","Accuracy loss with FP16 is typically 0.5-1.5% mIoU on COCO, acceptable for most applications but not for high-precision tasks","Mixed precision requires NVIDIA GPU with Tensor Cores (V100+, RTX 2080+, A100) — not supported on older hardware","BF16 support is limited to newer GPUs (A100, H100, RTX 4090) — not available on older hardware","Some operations (softmax in attention) may have reduced numerical stability in FP16, requiring manual precision management"],"requires":["PyTorch 1.9+ with AMP support","NVIDIA GPU with Tensor Cores (V100, A100, RTX series)","CUDA 11.0+","torch.cuda.amp for automatic mixed precision","optional: apex library for advanced mixed precision features"],"input_types":["RGB images at arbitrary resolution","task token"],"output_types":["segmentation predictions (same format as FP32, but computed with mixed precision)"],"categories":["image-visual","optimization"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-shi-labs--oneformer_coco_swin_large__cap_6","uri":"capability://image.visual.batch.processing.with.variable.resolution.support","name":"batch-processing-with-variable-resolution-support","description":"Processes multiple images in a single batch with support for variable input resolutions through dynamic padding and batching strategies. Images are padded to a common size within each batch (typically the maximum resolution in the batch) to enable efficient GPU computation. The model supports arbitrary input resolutions from 256×256 to 2048×2048, automatically adjusting internal computation to handle different aspect ratios and sizes. Post-processing includes resolution-aware upsampling to restore predictions to original image dimensions.","intents":["I need to process multiple images efficiently in a single batch","I have images with different resolutions and want to avoid resizing","I want to maximize GPU utilization by batching images of similar sizes","I need to handle variable-aspect-ratio images without distortion"],"best_for":["batch processing pipelines (video frame segmentation, image dataset processing)","production systems processing diverse image sources with different resolutions","applications requiring high throughput over per-image latency","teams with heterogeneous image datasets (mixed resolutions, aspect ratios)"],"limitations":["Variable resolution batching requires padding to maximum resolution in batch — wastes computation on padded regions (5-20% overhead depending on resolution variance)","Dynamic padding adds ~10-15ms overhead per batch for padding/unpadding operations","Very high resolution images (>2048×2048) require gradient checkpointing during training, reducing training speed by ~30%","Batch size must be reduced for high-resolution images — typical batch_size=1 for 2048×2048 images on 24GB GPUs","Post-processing upsampling to original resolution adds ~5-10ms latency per image"],"requires":["PyTorch 1.9+","minimum 8GB GPU memory for batch_size=4 at 512×512 resolution","CUDA 11.0+","Python 3.7+"],"input_types":["batch of RGB images with arbitrary resolutions (256×256 to 2048×2048)","task token (same for all images in batch)","optional: per-image metadata (original resolution for post-processing)"],"output_types":["batch of segmentation predictions upsampled to original image resolutions","instance IDs and class predictions for each image","attention maps showing scale importance (optional)"],"categories":["image-visual","batch-processing"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-shi-labs--oneformer_coco_swin_large__cap_7","uri":"capability://image.visual.post.processing.with.instance.mask.refinement","name":"post-processing-with-instance-mask-refinement","description":"Refines instance segmentation predictions through post-processing that includes non-maximum suppression (NMS), mask refinement, and boundary smoothing. The post-processor takes raw mask logits and class predictions from the model and applies learned refinement operations including morphological operations (dilation/erosion) to clean up small artifacts, boundary smoothing using Gaussian filtering, and instance-level filtering to remove low-confidence predictions. NMS is applied in mask space rather than box space, enabling more accurate instance separation for overlapping objects.","intents":["I need to clean up noisy instance predictions and remove small artifacts","I want to refine instance boundaries for better visual quality","I need to filter low-confidence instances while preserving high-quality predictions","I want to handle overlapping instances more accurately than box-based NMS"],"best_for":["applications requiring high-quality instance masks (medical imaging, quality control)","systems where visual quality of predictions matters (visualization, annotation tools)","scenarios with overlapping objects where box-based NMS fails","production pipelines where post-processing latency is acceptable"],"limitations":["Post-processing adds 50-200ms latency per image depending on number of instances and refinement operations","Morphological operations can remove small objects — requires careful parameter tuning for datasets with small instances","Boundary smoothing can blur fine details — may reduce accuracy on fine-grained segmentation tasks","NMS in mask space is more computationally expensive than box-based NMS — O(n²) complexity with n instances","Post-processing parameters are dataset-specific — requires tuning for different domains"],"requires":["PyTorch 1.9+","OpenCV 4.5+ for morphological operations","scipy for advanced filtering operations","CUDA 11.0+ for GPU-accelerated post-processing (optional)","Python 3.7+"],"input_types":["raw mask logits (num_instances × H/4 × W/4)","class predictions (num_instances × 133)","confidence scores per instance"],"output_types":["refined instance masks (num_instances × H × W) at original resolution","filtered instance IDs and class predictions","confidence scores after filtering"],"categories":["image-visual","post-processing"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-shi-labs--oneformer_coco_swin_large__cap_8","uri":"capability://tool.use.integration.huggingface.model.hub.integration.with.one.line.loading","name":"huggingface-model-hub-integration-with-one-line-loading","description":"Integrates with HuggingFace Model Hub for one-line model loading and inference through the transformers library. The model is registered with model ID 'shi-labs/oneformer_coco_swin_large' and can be loaded using AutoModel.from_pretrained() with automatic weight downloading and caching. The integration includes model card documentation, inference examples, and compatibility with HuggingFace's inference API for serverless deployment. Model weights are versioned and cached locally to avoid repeated downloads.","intents":["I want to load the model with a single line of code without manual weight management","I need to deploy the model on HuggingFace Spaces or Inference API without infrastructure setup","I want to access model documentation and usage examples from the Hub","I need to version control model weights and track changes over time"],"best_for":["practitioners building quick prototypes and demos","teams without dedicated ML infrastructure","researchers sharing models with the community","applications deployed on HuggingFace Spaces or similar platforms"],"limitations":["First-time model loading requires downloading ~1.3GB weights from HuggingFace servers — adds 30-120s latency depending on network speed","Model caching uses local disk space — requires ~2GB free space for full model + optimizer states","HuggingFace API rate limiting may affect inference if using serverless endpoints","Inference API has cold-start latency of 5-10s for first request after deployment","No built-in support for custom model variants — requires forking or creating new model cards"],"requires":["transformers library 4.25+","huggingface-hub library 0.11+","PyTorch 1.9+","internet connection for first-time model download","Python 3.7+"],"input_types":["model ID string: 'shi-labs/oneformer_coco_swin_large'","optional: revision/branch for version control"],"output_types":["loaded model object ready for inference","model configuration and metadata"],"categories":["tool-use-integration","model-deployment"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-shi-labs--oneformer_coco_swin_large__cap_9","uri":"capability://data.processing.analysis.benchmark.evaluation.on.coco.metrics","name":"benchmark-evaluation-on-coco-metrics","description":"Provides pre-computed benchmark results on COCO 2017 validation set using standard evaluation metrics including mIoU (mean Intersection-over-Union) for semantic segmentation, AP (Average Precision) for instance segmentation, and PQ (Panoptic Quality) for panoptic segmentation. Results are computed using official COCO evaluation scripts with IoU thresholds at 0.5:0.95 (standard COCO metric). The model achieves 45.1 PQ on COCO panoptic segmentation, competitive with state-of-the-art methods while maintaining unified architecture.","intents":["I need to understand model performance on standard benchmarks before deployment","I want to compare this model against other segmentation methods using standard metrics","I need to validate that pre-trained weights meet performance requirements","I want to use benchmark results to estimate performance on similar datasets"],"best_for":["teams evaluating models for production deployment","researchers comparing methods on standard benchmarks","practitioners estimating transfer learning performance","applications requiring performance guarantees"],"limitations":["Benchmark results are COCO-specific — performance on other datasets may differ significantly","COCO metrics may not align with application-specific requirements (e.g., small object detection, rare class performance)","Benchmark results assume standard evaluation protocol — custom evaluation setups may yield different results","Performance varies with input resolution — benchmark uses standard 512×512 resolution, higher resolutions may improve accuracy","Panoptic segmentation metrics (PQ) are less interpretable than semantic mIoU — requires understanding of stuff/thing class separation"],"requires":["COCO 2017 validation dataset for reproduction","pycocotools library for metric computation","Python 3.7+"],"input_types":["COCO validation images and annotations"],"output_types":["mIoU scores for semantic segmentation","AP scores for instance segmentation","PQ scores for panoptic segmentation","per-class performance metrics"],"categories":["data-processing-analysis","evaluation"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":38,"verified":false,"data_access_risk":"low","permissions":["PyTorch 1.9+","transformers library 4.25+","CUDA 11.0+ for GPU inference (CPU inference supported but slow)","minimum 8GB GPU memory for batch_size=1 inference with large variant","Python 3.7+","timm library 0.4.12+ for backbone implementation","CUDA 11.0+ for efficient attention computation","minimum 6GB GPU memory for 512×512 image processing","transformers 4.25+","PyTorch 1.9+ with autograd support"],"failure_modes":["Task conditioning adds ~15-25ms latency per inference compared to task-specific models due to additional prompt encoding","Performance on panoptic segmentation is ~2-3% lower than specialized panoptic-only models (Mask2Former) on COCO benchmark","Requires explicit task token input — cannot auto-detect optimal task from image content","Training convergence is slower than single-task models due to multi-task learning complexity","Limited to COCO dataset distribution — generalization to domain-specific segmentation tasks not validated","Shifted window attention introduces ~10-15% computational overhead compared to standard attention due to window shifting and masking operations","Feature extraction is resolution-dependent — very high resolutions (>2048×2048) require gradient checkpointing to fit in memory","Swin backbone is less interpretable than CNN-based backbones due to window-based attention patterns","Pre-trained weights are ImageNet-biased — performance may degrade on out-of-distribution domains (medical, satellite) without fine-tuning","Cross-attention adds ~20-30ms latency per inference compared to CNN decoders due to O(n²) attention computation","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.44791076386626366,"quality":0.3,"ecosystem":0.5000000000000001,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.35,"quality":0.2,"ecosystem":0.1,"match_graph":0.3,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:22.766Z","last_scraped_at":"2026-05-03T14:23:00.162Z","last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":54407,"model_likes":8}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=shi-labs--oneformer_coco_swin_large","compare_url":"https://unfragile.ai/compare?artifact=shi-labs--oneformer_coco_swin_large"}},"signature":"N1Tf1ViL2l+yud/a3Azu0+03FIPgaFU6BCPuIyPgb+mPLVDDrSj3yhfK23lsUwa4o7TwXgR55/xf7RVwrkWPBw==","signedAt":"2026-06-20T03:45:41.290Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/shi-labs--oneformer_coco_swin_large","artifact":"https://unfragile.ai/shi-labs--oneformer_coco_swin_large","verify":"https://unfragile.ai/api/v1/verify?slug=shi-labs--oneformer_coco_swin_large","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}