{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"hf-model-hpcai-tech--open-sora-v2","slug":"hpcai-tech--open-sora-v2","name":"Open-Sora-v2","type":"model","url":"https://huggingface.co/hpcai-tech/Open-Sora-v2","page_url":"https://unfragile.ai/hpcai-tech--open-sora-v2","categories":["video-generation"],"tags":["open-sora","safetensors","text-to-video","arxiv:2503.09642","arxiv:2412.20404","license:apache-2.0","region:us"],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"hf-model-hpcai-tech--open-sora-v2__cap_0","uri":"capability://image.visual.text.to.video.generation.with.diffusion.based.synthesis","name":"text-to-video generation with diffusion-based synthesis","description":"Generates video sequences from natural language text prompts using a latent diffusion architecture that iteratively denoises video representations in compressed latent space. The model employs a multi-stage pipeline: text encoding via CLIP or similar embeddings, spatial-temporal noise prediction through a transformer-based UNet, and progressive decoding back to pixel space. Supports variable-length video generation (typically 1-60 seconds) with configurable frame rates and resolutions through adaptive sampling strategies.","intents":["Generate short-form video content from text descriptions for social media or marketing","Create visual storyboards or animatic sequences from screenplay or narrative text","Prototype video concepts without filming or expensive production resources","Batch-generate multiple video variations from a single text prompt with different random seeds"],"best_for":["Content creators and video producers seeking rapid prototyping workflows","AI researchers experimenting with video generation architectures and training techniques","Teams building video generation APIs or SaaS products on open-source foundations","Developers integrating video synthesis into multimodal applications or creative tools"],"limitations":["Inference latency typically 30-120 seconds per video on consumer GPUs (RTX 4090), longer on CPU-only systems","Generated videos exhibit temporal inconsistencies and object tracking artifacts in complex multi-object scenes","Maximum practical resolution limited to 720p or lower; higher resolutions require significant VRAM (24GB+)","Text prompts with specific visual styles, camera movements, or precise object interactions often produce suboptimal results","No built-in support for video editing, frame interpolation, or post-processing refinement","Model weights (~7-14GB depending on variant) require substantial storage and download bandwidth"],"requires":["Python 3.8+","PyTorch 2.0+ with CUDA 11.8+ (for GPU acceleration) or CPU fallback (significantly slower)","Minimum 8GB VRAM for inference at 480p; 16GB+ recommended for 720p or batch processing","HuggingFace Transformers library (>=4.30.0) for model loading and tokenization","Diffusers library (>=0.21.0) for pipeline orchestration and sampling strategies","Optional: xFormers or Flash Attention for memory-efficient attention computation"],"input_types":["text (natural language prompts, 10-500 characters typical)","optional: seed (integer for reproducibility)","optional: negative prompts (text describing unwanted visual elements)","optional: guidance scale (float, typically 7.5-15.0 for prompt adherence strength)"],"output_types":["video file (MP4, WebM, or raw frame sequences)","frame rate: 24fps or 30fps (configurable)","resolution: 256x256, 512x512, 576x1024, or 1024x576 (model-dependent)","duration: 2-8 seconds typical (variable based on model variant and compute budget)"],"categories":["image-visual","generative-ai"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-hpcai-tech--open-sora-v2__cap_1","uri":"capability://image.visual.prompt.conditioned.video.generation.with.clip.based.semantic.guidance","name":"prompt-conditioned video generation with clip-based semantic guidance","description":"Encodes text prompts into high-dimensional semantic embeddings using CLIP or similar vision-language models, then uses these embeddings to guide the diffusion process through cross-attention mechanisms in the video UNet. The architecture injects text conditioning at multiple temporal and spatial scales, allowing fine-grained control over which regions and frames respond to specific prompt components. Supports classifier-free guidance to dynamically adjust prompt adherence strength during sampling.","intents":["Control video content semantics and composition through detailed natural language descriptions","Adjust prompt influence strength to balance creativity vs. prompt fidelity in generated videos","Generate videos with specific visual concepts, objects, or scenes described in text","Experiment with prompt engineering techniques to improve video quality and consistency"],"best_for":["Prompt engineers and creative technologists optimizing text descriptions for video generation","Researchers studying vision-language alignment and semantic control in generative models","Developers building interactive video generation interfaces with real-time prompt refinement"],"limitations":["CLIP embeddings may not capture fine-grained visual details or rare object combinations accurately","Guidance scale tuning is empirical and dataset-dependent; optimal values vary by prompt complexity","Conflicting or ambiguous prompts often produce averaged or degraded visual results","No explicit support for spatial or temporal prompt conditioning (e.g., 'object A moves left in frame 1-30')"],"requires":["CLIP model weights (typically ~350MB) loaded alongside video model","Text tokenizer compatible with CLIP (usually OpenAI's tokenizer)","Guidance scale parameter tuning (typically 7.5-15.0 range)"],"input_types":["text prompt (natural language, 10-500 characters)","guidance scale (float, 1.0-20.0, where 1.0 = no guidance, higher = stricter adherence)","optional: negative prompt (text describing unwanted visual elements)"],"output_types":["video file with semantically-aligned visual content","attention maps (optional, for interpretability)"],"categories":["image-visual","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-hpcai-tech--open-sora-v2__cap_2","uri":"capability://image.visual.variable.length.video.generation.with.adaptive.temporal.modeling","name":"variable-length video generation with adaptive temporal modeling","description":"Generates videos of different lengths (typically 2-8 seconds) by dynamically adjusting temporal positional embeddings and frame sampling strategies based on target duration. The model uses a temporal transformer that learns to extrapolate or compress motion patterns across variable frame counts, avoiding the need for separate models per duration. Supports both uniform frame sampling (constant temporal resolution) and adaptive sampling (higher density for key frames).","intents":["Generate videos of specific durations (e.g., 3-second clips for TikTok, 8-second for YouTube Shorts)","Create variable-length content from a single model without retraining or model switching","Optimize inference latency by generating shorter videos when full-length synthesis is unnecessary"],"best_for":["Content creators needing platform-specific video lengths (TikTok, Instagram Reels, YouTube)","Batch processing pipelines generating videos of heterogeneous durations","Developers building adaptive video generation systems that adjust length based on downstream constraints"],"limitations":["Temporal coherence degrades significantly for videos longer than 8 seconds; motion becomes jittery or inconsistent","Shorter videos (<2 seconds) may exhibit abrupt motion or incomplete action sequences","Adaptive temporal modeling adds ~10-15% inference overhead compared to fixed-length generation","No explicit control over motion speed or temporal pacing; duration scaling affects both implicitly"],"requires":["Temporal positional embedding configuration for target duration","Frame count parameter (typically 16-48 frames, mapped to duration via frame rate)","Optional: temporal interpolation weights for smooth extrapolation"],"input_types":["text prompt","target duration in seconds (float, 2.0-8.0 typical range)","frame count (integer, 16-48, or auto-computed from duration)"],"output_types":["video file with variable frame count and duration","frame rate: 24fps or 30fps (constant across durations)"],"categories":["image-visual","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-hpcai-tech--open-sora-v2__cap_3","uri":"capability://image.visual.batch.video.generation.with.seed.based.reproducibility","name":"batch video generation with seed-based reproducibility","description":"Generates multiple video variations from a single text prompt by iterating over different random seeds, enabling deterministic reproduction of specific outputs and systematic exploration of the generation space. The implementation uses PyTorch's random number generator seeding to ensure identical results across runs with the same seed, while different seeds produce diverse visual variations. Supports batch processing of multiple prompts in parallel on multi-GPU systems.","intents":["Generate multiple video variations from one prompt to select the best output","Reproduce specific video outputs for debugging, documentation, or quality assurance","Systematically explore the generation space by varying seeds while holding prompts constant","Parallelize video generation across multiple GPUs to reduce wall-clock time for large batches"],"best_for":["Content creators selecting best outputs from multiple generations","Researchers studying generative model diversity and output distribution","Production pipelines requiring reproducible, versioned video generation","Teams with multi-GPU infrastructure seeking to maximize throughput"],"limitations":["Seed reproducibility is only guaranteed within the same hardware, PyTorch version, and CUDA version; cross-platform reproducibility is not guaranteed","Batch processing on multi-GPU requires careful memory management; naive batching can cause OOM errors","Generating N variations requires N full inference passes; no amortization or shared computation across seeds","Seed-based variation produces diverse outputs but no explicit control over variation type (e.g., 'vary only camera angle')"],"requires":["PyTorch with deterministic mode enabled (torch.manual_seed, torch.cuda.manual_seed)","CUDA 11.8+ for reproducible GPU operations","Multi-GPU setup (optional, for parallel batch processing)","Sufficient VRAM for simultaneous inference (8GB per GPU minimum)"],"input_types":["text prompt (single or batch of prompts)","seed values (integer array, e.g., [42, 123, 456])","optional: num_variations (integer, number of seeds to generate)"],"output_types":["video files (one per seed)","metadata: seed, prompt, generation timestamp, inference time"],"categories":["image-visual","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-hpcai-tech--open-sora-v2__cap_4","uri":"capability://image.visual.latent.space.compression.and.efficient.video.encoding","name":"latent space compression and efficient video encoding","description":"Compresses video frames into a compact latent representation using a learned autoencoder (VAE), reducing the spatial dimensionality by 4-8x and enabling faster diffusion sampling in latent space rather than pixel space. The encoder maps raw video frames to latent codes, the diffusion process operates on these codes, and a decoder reconstructs frames from denoised latents. This architecture reduces memory consumption and inference time compared to pixel-space diffusion, while maintaining visual quality through careful VAE training.","intents":["Reduce GPU memory requirements for video generation, enabling inference on consumer-grade hardware","Accelerate diffusion sampling by operating in compressed latent space instead of high-resolution pixel space","Enable longer video generation by reducing per-frame memory footprint","Improve visual quality through learned compression that preserves perceptually important features"],"best_for":["Developers deploying video generation on resource-constrained hardware (RTX 3060, RTX 4060)","Production systems requiring sub-minute inference latency for real-time or near-real-time generation","Researchers studying learned compression and its trade-offs with generative quality","Teams optimizing inference cost in cloud environments (GPU time is expensive)"],"limitations":["VAE compression introduces reconstruction artifacts, especially for fine details and high-frequency textures","Latent space dimensionality is a fixed hyperparameter; changing it requires retraining the VAE and diffusion model","Compression ratio is typically 4-8x spatial (16-64x total); further compression degrades video quality significantly","VAE training requires large, diverse video datasets; poor VAE quality directly impacts final video quality","Latent space is not interpretable; direct manipulation of latents for fine-grained control is difficult"],"requires":["Pre-trained VAE encoder/decoder (typically ~500MB)","Latent shape and scaling factors (model-specific, e.g., 4x spatial compression, 8 latent channels)","Scaling factors for latent distribution normalization (typically learned during VAE training)"],"input_types":["raw video frames (uint8, 0-255 range) or normalized tensors (float, 0-1 range)","frame dimensions (H, W, must be divisible by compression factor)"],"output_types":["latent codes (float32, shape [batch, channels, H/4, W/4, T] for 4x compression)","reconstructed video frames (uint8 or float, same shape as input)"],"categories":["image-visual","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-hpcai-tech--open-sora-v2__cap_5","uri":"capability://image.visual.inference.optimization.through.attention.mechanism.acceleration","name":"inference optimization through attention mechanism acceleration","description":"Accelerates the diffusion sampling process by replacing standard multi-head attention with memory-efficient variants (Flash Attention, xFormers) that reduce computational complexity from O(N²) to O(N) or use fused kernels for faster computation. The model supports optional attention optimization flags that can be toggled at inference time without retraining. Typical speedups are 2-4x for attention-heavy layers, with minimal quality degradation.","intents":["Reduce inference latency from 60-120 seconds to 30-60 seconds on consumer GPUs","Enable real-time or near-real-time video generation on resource-constrained devices","Reduce peak memory consumption during inference, allowing larger batch sizes or longer videos","Optimize inference cost in cloud environments by reducing GPU time per video"],"best_for":["Developers deploying video generation in latency-sensitive applications (interactive tools, APIs)","Teams running inference at scale in cloud environments (AWS, GCP, Azure) where GPU time is billed","Researchers benchmarking inference efficiency and hardware utilization","Content creators using local hardware who want faster iteration cycles"],"limitations":["Flash Attention requires NVIDIA GPUs with compute capability 7.5+ (RTX 20-series or newer); older hardware falls back to standard attention","xFormers library adds a dependency and may require compilation for some hardware configurations","Attention optimization is most effective for long sequences (T > 32 frames); shorter videos see minimal speedup","Some attention variants (e.g., linear attention) may introduce subtle quality degradation in fine details","Optimization flags are not always compatible with all model variants or checkpoint versions"],"requires":["PyTorch 2.0+ with CUDA 11.8+ (for Flash Attention support)","Optional: xFormers library (>=0.0.20) for additional attention variants","Optional: Triton compiler (for custom fused kernels)","NVIDIA GPU with compute capability 7.5+ (RTX 20-series or newer) for Flash Attention"],"input_types":["enable_attention_slicing (boolean, trades memory for speed)","enable_flash_attention (boolean, requires compatible GPU)","enable_xformers_memory_efficient_attention (boolean, requires xFormers library)"],"output_types":["video file (same as standard inference)","inference timing metrics (optional, for benchmarking)"],"categories":["image-visual","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-hpcai-tech--open-sora-v2__cap_6","uri":"capability://image.visual.multi.resolution.video.generation.with.adaptive.upsampling","name":"multi-resolution video generation with adaptive upsampling","description":"Generates videos at multiple resolutions (256x256, 512x512, 576x1024, 1024x576) by training separate model variants or using a single model with resolution-conditioned generation. The architecture supports adaptive upsampling where lower-resolution videos are progressively refined to higher resolutions, reducing inference cost compared to direct high-resolution generation. Supports both fixed-resolution and variable-resolution outputs.","intents":["Generate videos at platform-specific resolutions (e.g., 1024x576 for YouTube, 512x512 for Instagram)","Reduce inference latency by generating at lower resolution and upsampling, rather than direct high-resolution synthesis","Explore resolution trade-offs between visual quality and inference speed","Support diverse output formats without maintaining separate models per resolution"],"best_for":["Content creators targeting multiple platforms with different resolution requirements","Production pipelines optimizing for inference latency and cost","Researchers studying resolution-conditioned generation and progressive refinement","Teams with limited GPU memory seeking to generate high-resolution videos through multi-stage synthesis"],"limitations":["Upsampling-based generation may introduce artifacts at resolution boundaries; direct high-resolution generation often produces better quality","Multiple model variants (one per resolution) increase storage and download requirements (7-14GB per variant)","Progressive refinement adds latency compared to single-stage generation; speedup depends on upsampling efficiency","Aspect ratio constraints (e.g., 16:9 only) limit flexibility for non-standard formats","Very high resolutions (1024x576+) require 24GB+ VRAM; practical limit is 720p on consumer hardware"],"requires":["Model variant for target resolution (e.g., 'Open-Sora-v2-512' for 512x512)","Optional: upsampling module (e.g., Real-ESRGAN) for progressive refinement","Resolution parameter (tuple, e.g., (512, 512) or (1024, 576))"],"input_types":["text prompt","target resolution (tuple, e.g., (512, 512), (576, 1024), (1024, 576))","optional: aspect ratio (float, e.g., 16/9 for widescreen)"],"output_types":["video file at specified resolution","frame dimensions: 256x256, 512x512, 576x1024, 1024x576 (model-dependent)"],"categories":["image-visual","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-hpcai-tech--open-sora-v2__cap_7","uri":"capability://tool.use.integration.model.weight.distribution.and.efficient.loading.via.huggingface.hub","name":"model weight distribution and efficient loading via huggingface hub","description":"Distributes model weights (7-14GB per variant) through HuggingFace Model Hub with safetensors format for secure, efficient loading. The implementation supports lazy loading (downloading only required layers), streaming (loading weights during inference), and caching (storing downloaded weights locally). Integration with HuggingFace's transformers and diffusers libraries enables one-line model loading with automatic dependency resolution.","intents":["Download and load model weights efficiently without manual configuration or dependency management","Cache model weights locally to avoid repeated downloads and reduce bandwidth usage","Stream model weights during inference to reduce initial load time and memory footprint","Integrate Open-Sora-v2 into existing HuggingFace-based pipelines and workflows"],"best_for":["Developers using HuggingFace ecosystem (transformers, diffusers, datasets)","Teams deploying models in cloud environments with limited local storage","Researchers experimenting with multiple model variants without managing weights manually","Users with limited bandwidth seeking efficient model distribution and caching"],"limitations":["Initial download is large (7-14GB); requires stable internet connection and sufficient local storage","Safetensors format is newer and may not be compatible with older PyTorch versions or custom loading scripts","Lazy loading adds latency on first access to each layer; not suitable for latency-critical applications","HuggingFace Hub rate-limiting may throttle downloads for high-volume users","No built-in support for model quantization or compression; full-precision weights are downloaded by default"],"requires":["HuggingFace Transformers library (>=4.30.0)","HuggingFace Diffusers library (>=0.21.0)","HuggingFace Hub library (>=0.16.0) for model downloading and caching","Internet connection for initial model download","Local storage: 7-14GB per model variant (or ~2-3GB if quantized)"],"input_types":["model identifier string (e.g., 'hpcai-tech/Open-Sora-v2')","optional: cache directory path (defaults to ~/.cache/huggingface/hub)","optional: revision (e.g., 'main', specific commit hash)"],"output_types":["loaded model object (diffusers.StableDiffusionPipeline or similar)","model weights in memory (float32 or quantized format)"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-hpcai-tech--open-sora-v2__cap_8","uri":"capability://code.generation.editing.open.source.model.architecture.and.training.code.accessibility","name":"open-source model architecture and training code accessibility","description":"Provides full model architecture definitions, training scripts, and dataset preprocessing code on GitHub, enabling researchers and developers to understand, modify, and fine-tune the model. The codebase includes configuration files (YAML/JSON) for model hyperparameters, training loops with distributed training support (DDP, DeepSpeed), and evaluation metrics. Supports fine-tuning on custom video datasets with configurable training objectives (diffusion loss, adversarial loss, etc.).","intents":["Understand the model architecture and training methodology through open-source code inspection","Fine-tune the model on custom video datasets for domain-specific applications (e.g., product videos, medical imaging)","Modify model components (e.g., attention mechanisms, conditioning modules) for research or optimization","Reproduce published results and validate claims through independent training runs","Build derivative models or architectures based on Open-Sora-v2 design patterns"],"best_for":["AI researchers studying video generation architectures and training techniques","Teams building proprietary video generation systems with custom datasets","Developers optimizing model performance for specific hardware or use cases","Organizations requiring full control over model training and deployment (data privacy, compliance)"],"limitations":["Training from scratch requires significant compute resources (8-16 A100 GPUs, weeks of training time)","Fine-tuning requires curating and preprocessing custom video datasets (labor-intensive)","Training code may have undocumented dependencies or environment-specific configurations","Reproducing published results requires careful hyperparameter tuning and may not be exact due to randomness","No commercial support or guarantees; community-driven maintenance and bug fixes"],"requires":["Python 3.8+","PyTorch 2.0+ with CUDA 11.8+","Distributed training framework (PyTorch DDP or DeepSpeed) for multi-GPU training","Large video dataset (10K-100K+ videos) for fine-tuning","Significant compute resources (8+ A100 GPUs for full training, 1-2 GPUs for fine-tuning)","Git and GitHub access for cloning training code"],"input_types":["model configuration (YAML/JSON with hyperparameters)","training dataset (video files, text captions, metadata)","optional: pre-trained checkpoint for fine-tuning"],"output_types":["trained model weights (safetensors format)","training logs and metrics (TensorBoard, Weights & Biases)","evaluation results (FVD, CLIP score, user studies)"],"categories":["code-generation-editing","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-hpcai-tech--open-sora-v2__cap_9","uri":"capability://safety.moderation.safetensors.format.support.for.secure.model.loading","name":"safetensors format support for secure model loading","description":"Uses safetensors format for model weight serialization, which is a safer alternative to pickle that prevents arbitrary code execution during deserialization. The format is language-agnostic (supported in Python, Rust, JavaScript, etc.) and includes built-in metadata (model architecture, training hyperparameters, license). Loading is faster than pickle due to memory-mapped access and zero-copy deserialization.","intents":["Load model weights securely without risk of arbitrary code execution from untrusted sources","Integrate Open-Sora-v2 weights into non-Python environments (JavaScript, Rust, Go) via safetensors libraries","Verify model integrity and metadata (architecture, training details) before loading","Improve model loading performance through memory-mapped and zero-copy deserialization"],"best_for":["Security-conscious teams deploying models from untrusted sources or in restricted environments","Developers building multi-language inference systems (Python backend, JavaScript frontend)","Organizations with strict security policies requiring safe deserialization formats","Performance-sensitive applications where model loading latency is critical"],"limitations":["Safetensors is newer and less widely adopted than pickle; some legacy tools may not support it","Memory-mapped loading requires specific file system support; may not work on all storage backends (network drives, cloud storage)","Metadata is optional; not all safetensors files include complete architecture or training information","Converting existing pickle checkpoints to safetensors requires a conversion step (one-time cost)"],"requires":["safetensors library (Python: pip install safetensors, or language-specific equivalent)","Model weights in safetensors format (provided by HuggingFace Hub)","Optional: safetensors libraries for non-Python languages (safetensors-rs for Rust, etc.)"],"input_types":["safetensors file path (local or HuggingFace Hub URL)","optional: metadata verification flags"],"output_types":["loaded model weights (PyTorch tensor, NumPy array, or language-specific format)","metadata dictionary (architecture, training config, license)"],"categories":["safety-moderation","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":37,"verified":false,"data_access_risk":"high","permissions":["Python 3.8+","PyTorch 2.0+ with CUDA 11.8+ (for GPU acceleration) or CPU fallback (significantly slower)","Minimum 8GB VRAM for inference at 480p; 16GB+ recommended for 720p or batch processing","HuggingFace Transformers library (>=4.30.0) for model loading and tokenization","Diffusers library (>=0.21.0) for pipeline orchestration and sampling strategies","Optional: xFormers or Flash Attention for memory-efficient attention computation","CLIP model weights (typically ~350MB) loaded alongside video model","Text tokenizer compatible with CLIP (usually OpenAI's tokenizer)","Guidance scale parameter tuning (typically 7.5-15.0 range)","Temporal positional embedding configuration for target duration"],"failure_modes":["Inference latency typically 30-120 seconds per video on consumer GPUs (RTX 4090), longer on CPU-only systems","Generated videos exhibit temporal inconsistencies and object tracking artifacts in complex multi-object scenes","Maximum practical resolution limited to 720p or lower; higher resolutions require significant VRAM (24GB+)","Text prompts with specific visual styles, camera movements, or precise object interactions often produce suboptimal results","No built-in support for video editing, frame interpolation, or post-processing refinement","Model weights (~7-14GB depending on variant) require substantial storage and download bandwidth","CLIP embeddings may not capture fine-grained visual details or rare object combinations accurately","Guidance scale tuning is empirical and dataset-dependent; optimal values vary by prompt complexity","Conflicting or ambiguous prompts often produce averaged or degraded visual results","No explicit support for spatial or temporal prompt conditioning (e.g., 'object A moves left in frame 1-30')","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.43203701868130223,"quality":0.3,"ecosystem":0.5000000000000001,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.35,"quality":0.2,"ecosystem":0.1,"match_graph":0.3,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:22.765Z","last_scraped_at":"2026-04-22T08:08:18.365Z","last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":16568,"model_likes":173}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=hpcai-tech--open-sora-v2","compare_url":"https://unfragile.ai/compare?artifact=hpcai-tech--open-sora-v2"}},"signature":"wqLazE+t6GuLlWj08t/hCYFhfry3+Kjsf98EJgyGO5eCP8+UTpTX5JECdUuBzuDYfywwe3gNLyLDnxQ0hObvAg==","signedAt":"2026-06-21T21:27:58.761Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/hpcai-tech--open-sora-v2","artifact":"https://unfragile.ai/hpcai-tech--open-sora-v2","verify":"https://unfragile.ai/api/v1/verify?slug=hpcai-tech--open-sora-v2","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}