{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"llama-3-2-90b-vision","slug":"llama-3-2-90b-vision","name":"Llama 3.2 90B Vision","type":"model","url":"https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/","page_url":"https://unfragile.ai/llama-3-2-90b-vision","categories":["model-training","documentation"],"tags":[],"pricing":{"model":"free","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"llama-3-2-90b-vision__cap_0","uri":"capability://image.visual.multimodal.vision.language.reasoning.with.128k.context.window","name":"multimodal vision-language reasoning with 128k context window","description":"Processes both text and image inputs simultaneously within a 128K token context window, enabling extended visual reasoning tasks that require maintaining state across multiple images and lengthy textual analysis. Built on a Llama 3.1 70B text backbone augmented with a vision encoder component that converts image data into token embeddings compatible with the transformer architecture, allowing unified attention mechanisms across modalities.","intents":["I need to analyze multiple images and documents together in a single conversation without losing context","I want to perform visual reasoning that requires referencing previous images and text in the same session","I need to process long documents with embedded charts and images while maintaining coherent understanding"],"best_for":["enterprises performing document analysis at scale with mixed text-image content","researchers building multimodal RAG systems requiring extended context","developers creating vision-enabled agents that need to reason across multiple visual inputs"],"limitations":["Requires multi-GPU setup for inference, making single-machine deployment impractical","Vision encoder architecture not publicly documented, limiting custom fine-tuning understanding","128K context is fixed and non-expandable; no rope scaling or dynamic context extension","Specific image format constraints and maximum resolution not documented"],"requires":["Multi-GPU system (specific VRAM requirements unknown but estimated 200GB+ for full precision)","PyTorch 2.0+ for inference","Access to model weights from llama.com or Hugging Face","torchtune framework for custom fine-tuning applications"],"input_types":["text (prompts, instructions, context)","images (format specifications unknown)","mixed text-image sequences"],"output_types":["text (natural language responses)","structured reasoning traces"],"categories":["image-visual","text-generation-language"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_1","uri":"capability://image.visual.state.of.the.art.visual.reasoning.on.open.weight.benchmarks","name":"state-of-the-art visual reasoning on open-weight benchmarks","description":"Achieves top performance on visual reasoning tasks including spatial relationships, object interactions, and scene understanding as measured against open-weight model benchmarks. The model leverages the 70B text backbone's reasoning capabilities combined with vision encoder embeddings to perform multi-step visual inference without external tools, enabling direct comparison against other open models on standardized evaluation sets.","intents":["I need to benchmark my vision model against open-source alternatives to justify architecture choices","I want to use a model with proven visual reasoning performance for production deployments","I need to understand how open-weight models compare to proprietary vision systems on reasoning tasks"],"best_for":["ML engineers evaluating open-source vision models for production use","researchers comparing multimodal architectures on standardized benchmarks","teams migrating from proprietary vision APIs to open-weight alternatives"],"limitations":["Benchmark scores not provided in source material — claims are qualitative only","Comparison limited to open-weight models; proprietary baseline comparisons lack numerical support","Specific benchmark datasets and evaluation protocols not documented","Performance claims cannot be independently verified from available sources"],"requires":["Access to benchmark evaluation code (not provided)","Sufficient compute to run inference on test sets","Understanding of benchmark methodology to interpret results"],"input_types":["images (benchmark test sets)","text (reasoning prompts)"],"output_types":["text (reasoning outputs)","benchmark scores (not publicly available)"],"categories":["image-visual","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_10","uri":"capability://tool.use.integration.rag.and.tool.enabled.application.support.with.safety.features","name":"rag and tool-enabled application support with safety features","description":"Supports integration with retrieval-augmented generation (RAG) systems and tool-calling frameworks with built-in safety features for preventing misuse in agent applications. The model can be integrated with function-calling interfaces and knowledge bases while maintaining safety guardrails that prevent harmful outputs or tool misuse.","intents":["I need to build a vision-language agent that can call tools and access knowledge bases safely","I want to integrate the model with RAG systems without compromising safety","I need to ensure tool-calling outputs are validated and safe before execution"],"best_for":["teams building multimodal agents with external tool access","enterprises deploying vision-language RAG systems","organizations requiring safety guarantees in agent applications"],"limitations":["Safety feature implementation details not documented","Tool-calling interface specifications not provided","RAG integration patterns not documented","Safety constraint trade-offs with capability unknown"],"requires":["RAG framework (e.g., LlamaIndex, LangChain) with vision support","Tool-calling interface compatible with Llama models","Safety evaluation and testing infrastructure","Multi-GPU system for inference"],"input_types":["text (prompts, tool specifications)","images (task inputs)","structured data (tool schemas, knowledge base queries)"],"output_types":["text (responses)","tool calls (function invocations)","structured data (JSON tool outputs)"],"categories":["tool-use-integration","memory-knowledge","safety-moderation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_11","uri":"capability://image.visual.competitive.performance.against.gpt.4v.on.vision.tasks","name":"competitive performance against gpt-4v on vision tasks","description":"Achieves performance competitive with OpenAI's GPT-4V on many vision-language tasks, positioning it as a capable open-weight alternative to proprietary vision models. The model's 90B parameter size and vision encoder design enable comparable reasoning and understanding on visual content without relying on proprietary APIs.","intents":["I need to replace GPT-4V with an open-weight alternative for cost or privacy reasons","I want to evaluate whether open models can match proprietary vision system performance","I need to benchmark my vision-language system against GPT-4V equivalents"],"best_for":["teams migrating from proprietary vision APIs to open-weight models","organizations with cost constraints requiring open alternatives","enterprises with privacy or data sovereignty requirements"],"limitations":["Performance claims are qualitative — no numerical benchmarks provided","Comparison limited to 'many' tasks without specifying which tasks","No documented failure modes or task categories where GPT-4V outperforms","Actual performance parity cannot be independently verified"],"requires":["Multi-GPU system for inference","Benchmark datasets for comparative evaluation","Understanding of GPT-4V capabilities for fair comparison"],"input_types":["images (vision tasks)","text (prompts)"],"output_types":["text (responses)","structured data"],"categories":["image-visual","text-generation-language"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_12","uri":"capability://image.visual.performance.exceeding.claude.3.haiku.on.image.understanding","name":"performance exceeding claude 3 haiku on image understanding","description":"Outperforms Anthropic's Claude 3 Haiku model on image understanding tasks, demonstrating stronger visual reasoning capability than smaller proprietary alternatives. The larger parameter count and specialized vision encoder enable more sophisticated image analysis than lightweight models optimized for efficiency.","intents":["I need stronger image understanding than Claude 3 Haiku provides","I want to replace Claude 3 Haiku with a more capable open-weight alternative","I need to evaluate whether larger open models outperform smaller proprietary models"],"best_for":["teams currently using Claude 3 Haiku seeking better performance","organizations evaluating open-weight alternatives to proprietary models","developers building image understanding features with performance requirements"],"limitations":["Performance comparison is qualitative without numerical metrics","Specific image understanding tasks not documented","No documented failure modes or task categories where Haiku outperforms","Actual performance advantage cannot be independently verified"],"requires":["Multi-GPU system for inference","Benchmark datasets for comparative evaluation","Understanding of Claude 3 Haiku capabilities"],"input_types":["images (understanding tasks)","text (prompts)"],"output_types":["text (responses)","structured data"],"categories":["image-visual"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_13","uri":"capability://text.generation.language.drop.in.replacement.for.llama.3.1.text.models.with.vision.capability","name":"drop-in replacement for llama 3.1 text models with vision capability","description":"Maintains API compatibility with Llama 3.1 70B text model while adding vision input support, enabling existing Llama 3.1 deployments to upgrade to multimodal capability without changing application code. The model preserves text-only inference paths for backward compatibility while extending the interface to accept image inputs.","intents":["I want to add vision capability to my existing Llama 3.1 deployment without refactoring","I need to upgrade my text-only application to support images with minimal code changes","I want to maintain compatibility with existing Llama 3.1 integrations while adding vision"],"best_for":["teams with existing Llama 3.1 deployments seeking vision capability","organizations wanting to extend text-only applications incrementally","developers maintaining backward compatibility with existing integrations"],"limitations":["API compatibility details not documented — unclear which interfaces are preserved","Performance impact of vision encoder on text-only tasks unknown","Migration path and compatibility guarantees not specified","Inference latency changes compared to text-only Llama 3.1 not documented"],"requires":["Existing Llama 3.1 deployment or integration","Multi-GPU system (larger than text-only requirements)","Understanding of Llama 3.1 API for compatibility verification"],"input_types":["text (prompts, compatible with Llama 3.1)","images (new capability)"],"output_types":["text (responses, compatible with Llama 3.1)"],"categories":["text-generation-language","image-visual"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_14","uri":"capability://automation.workflow.optimization.for.arm.processors.and.mobile.hardware","name":"optimization for arm processors and mobile hardware","description":"Includes optimizations for Arm-based processors and mobile hardware, enabling deployment on Qualcomm and MediaTek chipsets through ExecuTorch. The model supports device-specific operator fusion and quantization strategies that reduce memory footprint and latency on mobile platforms while maintaining inference quality.","intents":["I need to deploy vision-language inference on Arm-based mobile devices","I want to optimize the model for Qualcomm or MediaTek processors","I need to reduce model size and latency for on-device inference"],"best_for":["mobile app developers building on-device vision features","hardware manufacturers integrating AI into devices","teams deploying to Qualcomm or MediaTek-based systems"],"limitations":["Arm optimization details not documented","Specific Qualcomm and MediaTek chipset support not listed","Quantization strategy for mobile deployment unknown","Performance characteristics on mobile hardware not provided"],"requires":["Arm-based processor (Qualcomm or MediaTek mentioned)","PyTorch ExecuTorch framework","Model quantization (format and method not specified)","Sufficient device memory (requirements unknown)"],"input_types":["images (device camera or storage)","text (prompts)"],"output_types":["text (inference results)"],"categories":["automation-workflow","image-visual"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_2","uri":"capability://image.visual.chart.and.graph.understanding.with.visual.extraction","name":"chart and graph understanding with visual extraction","description":"Interprets charts, graphs, and data visualizations by analyzing visual structure, axis labels, legends, and data point relationships to extract quantitative insights and answer questions about trends, comparisons, and anomalies. The vision encoder processes the visual layout while the text backbone performs semantic reasoning about the data relationships, enabling both visual parsing and numerical inference in a single forward pass.","intents":["I need to extract data from charts in PDFs and images without manual transcription","I want to ask questions about trends and relationships in visualizations programmatically","I need to analyze financial reports, dashboards, and scientific papers containing complex charts"],"best_for":["data analysts automating chart extraction from reports and documents","financial services teams processing earnings reports and market analysis","research teams extracting data from scientific papers and technical documentation"],"limitations":["Chart type support not documented — unclear if handles all visualization types equally","Accuracy on complex multi-panel charts or non-standard visualizations unknown","No documented handling of 3D charts, animated visualizations, or interactive elements","Extraction accuracy benchmarks not provided"],"requires":["Multi-GPU system for inference","Images with sufficient resolution to preserve chart details","torchtune for fine-tuning on domain-specific chart types"],"input_types":["images (charts, graphs, visualizations)","text (questions about chart content)"],"output_types":["text (extracted data, answers, insights)","structured data (if prompted for JSON extraction)"],"categories":["image-visual","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_3","uri":"capability://image.visual.document.analysis.with.embedded.images.and.text","name":"document analysis with embedded images and text","description":"Analyzes documents containing mixed text and images (PDFs, scanned documents, reports) by maintaining coherent understanding across pages and sections within the 128K context window. The model processes both OCR-able text and visual elements (diagrams, photos, charts) simultaneously, enabling document-level comprehension without requiring separate preprocessing pipelines for text extraction and image analysis.","intents":["I need to extract information from multi-page PDFs with mixed text and images","I want to answer questions about document content that spans both text and visual elements","I need to classify or summarize documents that contain diagrams, photos, and tables"],"best_for":["legal tech teams processing contracts and regulatory documents","insurance companies analyzing claims documents with photos and forms","enterprise document management systems requiring intelligent indexing"],"limitations":["Document format support not specified — unclear if handles PDF, TIFF, or other formats natively","Page-by-page processing strategy not documented; unclear how multi-page documents are tokenized","OCR quality for scanned documents not benchmarked","Maximum document length within 128K context not specified"],"requires":["Multi-GPU system for inference","Document preprocessing to convert to image format if needed","torchtune for fine-tuning on domain-specific document types"],"input_types":["images (document pages, scans)","text (questions, extraction prompts)"],"output_types":["text (extracted information, answers, summaries)","structured data (if prompted for JSON extraction)"],"categories":["image-visual","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_4","uri":"capability://text.generation.language.instruction.tuned.multimodal.generation.with.alignment","name":"instruction-tuned multimodal generation with alignment","description":"Provides instruction-tuned variants that follow user directives for vision-language tasks through supervised fine-tuning on instruction-following datasets. The model learns to interpret task specifications (e.g., 'extract all prices', 'describe in bullet points', 'answer in JSON') and adapt output format accordingly, enabling more reliable task-specific behavior than base model inference.","intents":["I need the model to follow specific output format instructions (JSON, markdown, bullet points)","I want consistent behavior across different vision-language tasks without prompt engineering","I need to fine-tune the model on my own instruction-following data for domain-specific tasks"],"best_for":["teams building production systems requiring consistent output formatting","enterprises fine-tuning on proprietary instruction datasets","developers creating task-specific vision-language agents"],"limitations":["Instruction-tuning methodology not documented — unclear which datasets or techniques were used","Alignment properties (refusal behavior, safety constraints) not specified","Fine-tuning stability and convergence characteristics unknown","No documented comparison of instruction-tuned vs base model performance"],"requires":["torchtune framework for custom fine-tuning","Instruction-following training data (format specifications unknown)","Multi-GPU system for fine-tuning and inference","Understanding of LoRA or other parameter-efficient fine-tuning techniques"],"input_types":["text (instructions, task specifications)","images (task inputs)","training data (for fine-tuning)"],"output_types":["text (formatted according to instructions)","structured data (JSON, markdown, etc.)"],"categories":["text-generation-language","image-visual"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_5","uri":"capability://code.generation.editing.local.deployment.via.torchtune.fine.tuning.framework","name":"local deployment via torchtune fine-tuning framework","description":"Enables custom fine-tuning of the 90B vision model using Meta's torchtune framework, which provides distributed training abstractions, memory optimization, and checkpoint management for adapting the model to domain-specific tasks. The framework handles multi-GPU synchronization, gradient accumulation, and mixed-precision training to make fine-tuning accessible on typical enterprise hardware.","intents":["I need to fine-tune the model on proprietary data without sending it to external APIs","I want to adapt the model to domain-specific vision-language tasks with custom datasets","I need to maintain model weights on-premises for compliance or competitive reasons"],"best_for":["enterprises with proprietary training data requiring on-premises fine-tuning","teams building domain-specific vision-language models","organizations with regulatory requirements preventing cloud model training"],"limitations":["torchtune framework maturity and stability not documented","Fine-tuning memory requirements not specified — unclear if 90B model is practical to fine-tune on typical multi-GPU setups","No documented guidance on LoRA vs full fine-tuning trade-offs","Convergence characteristics and training stability unknown"],"requires":["torchtune framework (version not specified)","Multi-GPU system (specific VRAM requirements unknown)","PyTorch 2.0+","Training data in documented format (format specifications unknown)","Understanding of distributed training concepts"],"input_types":["training data (text-image pairs with labels)","configuration files (torchtune format)"],"output_types":["fine-tuned model weights","checkpoint files"],"categories":["code-generation-editing","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_6","uri":"capability://automation.workflow.on.device.deployment.via.pytorch.executorch","name":"on-device deployment via pytorch executorch","description":"Supports deployment on edge devices through PyTorch ExecuTorch, which converts the model to optimized bytecode and enables inference on mobile and embedded systems with reduced memory footprint. The framework handles quantization, operator fusion, and device-specific optimizations to make the model practical for on-device inference where cloud connectivity is unavailable or undesirable.","intents":["I need to run vision-language inference on mobile devices without cloud connectivity","I want to deploy the model on embedded systems with limited memory","I need to process sensitive images locally without sending them to external servers"],"best_for":["mobile app developers building on-device vision features","IoT teams deploying vision models on edge hardware","enterprises with privacy requirements preventing cloud inference"],"limitations":["ExecuTorch support for 90B model not confirmed — smaller variants (1B, 3B) explicitly mentioned for edge","On-device quantization strategy not documented","Latency and memory requirements for on-device inference unknown","Device compatibility matrix not provided"],"requires":["PyTorch ExecuTorch framework","Target device with sufficient memory (requirements unknown)","Model quantization (format and method not specified)","Device-specific optimization (Arm processors mentioned as supported)"],"input_types":["images (device camera or local storage)","text (prompts)"],"output_types":["text (inference results)","structured data"],"categories":["automation-workflow","image-visual"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_7","uri":"capability://tool.use.integration.single.node.inference.via.ollama.integration","name":"single-node inference via ollama integration","description":"Enables single-machine inference through Ollama, which provides a simplified interface for running the model locally with automatic model downloading, quantization, and memory management. Ollama abstracts away multi-GPU orchestration complexity and provides a REST API for integration with applications, making local deployment more accessible than raw PyTorch inference.","intents":["I want to run the model locally without managing multi-GPU setup complexity","I need a simple REST API for integrating vision-language inference into applications","I want to experiment with the model without cloud infrastructure"],"best_for":["developers prototyping vision-language applications locally","teams evaluating the model before production deployment","individuals experimenting with multimodal AI without cloud costs"],"limitations":["Ollama support for 90B model not explicitly confirmed","Single-node performance characteristics not documented","Quantization options and memory requirements unknown","REST API latency and throughput not specified"],"requires":["Ollama framework (version not specified)","Multi-GPU system or high-end single GPU (requirements unknown)","Sufficient disk space for model weights","Network connectivity for initial model download"],"input_types":["text (prompts via REST API)","images (via REST API)"],"output_types":["text (streaming or batch responses)","JSON (structured responses)"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_8","uri":"capability://automation.workflow.llama.stack.distribution.across.deployment.environments","name":"llama stack distribution across deployment environments","description":"Available through Llama Stack distributions that provide pre-configured deployments for single-node, on-premises, cloud, and on-device environments. Each distribution includes the model, inference runtime, and integration templates for common platforms (AWS, Azure, Google Cloud), reducing deployment configuration burden and enabling consistent model behavior across infrastructure types.","intents":["I need to deploy the model consistently across development, staging, and production environments","I want to migrate from one cloud provider to another without reconfiguring the model","I need pre-built integrations with my existing cloud infrastructure"],"best_for":["enterprises deploying across multiple cloud providers","teams managing hybrid on-premises and cloud infrastructure","organizations standardizing on Llama Stack for model deployment"],"limitations":["Llama Stack distribution details not documented — unclear what's included in each variant","Cloud provider integration specifics not provided","Configuration management and versioning strategy unknown","Support and maintenance model for distributions not specified"],"requires":["Llama Stack framework (version not specified)","Target deployment environment (single-node, on-prem, cloud, or on-device)","Cloud provider credentials (if using cloud distributions)","Understanding of Llama Stack configuration"],"input_types":["configuration files (Llama Stack format)","deployment specifications"],"output_types":["deployed model service","inference endpoints"],"categories":["automation-workflow","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__cap_9","uri":"capability://text.generation.language.immediate.testing.via.meta.ai.smart.assistant","name":"immediate testing via meta ai smart assistant","description":"Provides immediate access to the model through Meta's AI smart assistant interface, enabling users to test vision-language capabilities without local deployment or API key setup. The assistant handles model inference on Meta's infrastructure and provides a conversational interface for exploring the model's capabilities on images and text.","intents":["I want to quickly test the model's capabilities without setting up local infrastructure","I need to evaluate the model on my own images before committing to deployment","I want to explore vision-language features through a conversational interface"],"best_for":["developers evaluating the model before production decisions","non-technical stakeholders exploring AI capabilities","teams prototyping vision-language features quickly"],"limitations":["Meta AI assistant availability and terms of service not documented","Rate limiting and usage quotas unknown","Image privacy and data retention policies not specified","No API access — testing limited to conversational interface"],"requires":["Meta account (or access to Meta AI assistant)","Web browser or Meta app","No local infrastructure required"],"input_types":["text (conversational prompts)","images (uploaded to assistant)"],"output_types":["text (conversational responses)"],"categories":["text-generation-language","image-visual"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-90b-vision__headline","uri":"capability://image.visual.state.of.the.art.multimodal.ai.model","name":"state-of-the-art multimodal ai model","description":"Llama 3.2 90B Vision is the largest open multimodal model, excelling in visual reasoning, chart understanding, and document analysis with a 128K context window for both text and image inputs.","intents":["best multimodal AI model","multimodal model for visual reasoning","top model for document analysis","AI model for chart understanding","open-source multimodal model comparison"],"best_for":["research in visual reasoning","document analysis tasks","applications requiring high context windows"],"limitations":[],"requires":["multi-GPU setup"],"input_types":["text","image"],"output_types":["text","image"],"categories":["image-visual"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":58,"verified":false,"data_access_risk":"low","permissions":["Multi-GPU system (specific VRAM requirements unknown but estimated 200GB+ for full precision)","PyTorch 2.0+ for inference","Access to model weights from llama.com or Hugging Face","torchtune framework for custom fine-tuning applications","Access to benchmark evaluation code (not provided)","Sufficient compute to run inference on test sets","Understanding of benchmark methodology to interpret results","RAG framework (e.g., LlamaIndex, LangChain) with vision support","Tool-calling interface compatible with Llama models","Safety evaluation and testing infrastructure"],"failure_modes":["Requires multi-GPU setup for inference, making single-machine deployment impractical","Vision encoder architecture not publicly documented, limiting custom fine-tuning understanding","128K context is fixed and non-expandable; no rope scaling or dynamic context extension","Specific image format constraints and maximum resolution not documented","Benchmark scores not provided in source material — claims are qualitative only","Comparison limited to open-weight models; proprietary baseline comparisons lack numerical support","Specific benchmark datasets and evaluation protocols not documented","Performance claims cannot be independently verified from available sources","Safety feature implementation details not documented","Tool-calling interface specifications not provided","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.7,"quality":0.9,"ecosystem":0.39999999999999997,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.35,"quality":0.2,"ecosystem":0.1,"match_graph":0.3,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:23.327Z","last_scraped_at":null,"last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=llama-3-2-90b-vision","compare_url":"https://unfragile.ai/compare?artifact=llama-3-2-90b-vision"}},"signature":"/lqgxllJxkYJFJYvpnOrUQbocobURET0qbQvAv33z5QQkWp02FrUDii7zUvlphv5exR2s7U9ZqOl8suWYCAqBg==","signedAt":"2026-06-20T22:56:34.030Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/llama-3-2-90b-vision","artifact":"https://unfragile.ai/llama-3-2-90b-vision","verify":"https://unfragile.ai/api/v1/verify?slug=llama-3-2-90b-vision","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}