{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"llama-3-2-11b-vision","slug":"llama-3-2-11b-vision","name":"Llama 3.2 11B Vision","type":"model","url":"https://ai.meta.com/blog/llama-3-2-connect-2024-vision-edge-mobile-devices/","page_url":"https://unfragile.ai/llama-3-2-11b-vision","categories":["model-training","documentation"],"tags":[],"pricing":{"model":"free","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"llama-3-2-11b-vision__cap_0","uri":"capability://image.visual.multimodal.image.text.understanding.with.cross.attention.fusion","name":"multimodal image-text understanding with cross-attention fusion","description":"Processes images and text simultaneously using a cross-attention vision adapter layered on top of the Llama 3.1 8B text backbone. The architecture fuses visual features from an image encoder with token embeddings, enabling the model to reason about image content in natural language. Supports 128K token context window, allowing analysis of multiple images or lengthy documents alongside conversational text.","intents":["I need to ask questions about images and get detailed answers","I want to analyze documents with images and extract information","I need to describe what's happening in photos programmatically","I want to build a local multimodal chatbot without cloud dependencies"],"best_for":["developers building self-hosted multimodal applications","teams requiring on-device vision+language processing","organizations with privacy constraints preventing cloud image uploads","edge/mobile developers needing compact multimodal inference"],"limitations":["Vision encoder architecture not publicly documented — limits ability to fine-tune vision component independently","Maximum image resolution and count per input not specified — unknown practical limits for high-resolution documents","No quantitative benchmarks provided — 'competitive with Claude 3 Haiku' claim unsubstantiated with actual metrics","128K context window is fixed hard limit — cannot process arbitrarily long document sequences","Hallucination rates and factuality benchmarks for visual reasoning not documented"],"requires":["Single GPU with sufficient VRAM (specific requirement unknown, likely 16GB+ for full precision)","PyTorch 2.0+ for model loading and inference","Image input in standard formats (JPEG, PNG, WebP — specific support unknown)","torchtune or PyTorch ExecuTorch for deployment"],"input_types":["image (JPEG, PNG, WebP — formats inferred from standard multimodal model support)","text (natural language queries, instructions, document text)"],"output_types":["text (natural language responses, descriptions, extracted information)"],"categories":["image-visual","text-generation-language"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-11b-vision__cap_1","uri":"capability://image.visual.visual.question.answering.with.instruction.following","name":"visual question answering with instruction-following","description":"Instruction-tuned variant of the base model that specializes in answering natural language questions about image content. Uses supervised fine-tuning on VQA datasets to align the multimodal fusion with question-answering patterns. The 128K context window enables multi-turn conversations where previous questions and answers inform subsequent visual reasoning.","intents":["I want to ask follow-up questions about an image in a conversation","I need to extract specific details from photos (e.g., 'how many people are in this image?')","I want to build a visual search or image annotation system","I need to verify image content against text descriptions"],"best_for":["developers building image annotation or tagging systems","teams creating visual search or reverse image lookup tools","applications requiring conversational image analysis","accessibility tools that describe images to users"],"limitations":["No training data composition disclosed — unknown what VQA datasets were used or their biases","Instruction-following quality not benchmarked — no metrics on answer accuracy, hallucination rates, or failure modes","Multi-turn conversation context management not documented — unclear how model handles contradictory information across turns","No safety alignment details provided — unknown how model handles adversarial or sensitive image queries"],"requires":["Instruction-tuned model variant (separate from base model)","Image and text input capability","Inference framework supporting 128K context (torchtune, PyTorch ExecuTorch, or Ollama)"],"input_types":["image (with associated natural language question)","text (question or instruction about image content)"],"output_types":["text (natural language answer to visual question)"],"categories":["image-visual","text-generation-language"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-11b-vision__cap_10","uri":"capability://image.visual.multimodal.reasoning.with.persistent.image.context.across.turns","name":"multimodal reasoning with persistent image context across turns","description":"Enables multi-turn conversations where image context persists across multiple user queries and model responses. The 128K context window allows the model to maintain references to previously discussed images, enabling follow-up questions, comparative analysis, and reasoning that builds on prior visual understanding. Context management is handled at the token level, with both image and text tokens contributing to the context budget.","intents":["I want to ask multiple questions about the same image in a conversation","I need to compare or contrast multiple images across conversation turns","I want the model to remember visual context from earlier in the conversation","I need to build complex reasoning that references multiple images discussed earlier"],"best_for":["interactive visual analysis tools and dashboards","conversational image exploration and discovery","multi-step visual reasoning applications","accessibility tools providing detailed image descriptions through dialogue"],"limitations":["Context management strategy not documented — unclear how model prioritizes recent vs. early context","No guidance on optimal context composition for multi-turn conversations — unknown best practices","Conversation length limits not specified — unclear maximum turns before context exhaustion","Image reference tracking not documented — unknown how model disambiguates between multiple images","Inference latency for long conversations not benchmarked — unclear performance degradation with context growth"],"requires":["Inference framework supporting 128K context and multi-turn conversation management","Application logic to maintain conversation history and image references","Sufficient GPU VRAM to hold full conversation context"],"input_types":["image (multiple images across conversation turns)","text (questions, follow-ups, instructions)"],"output_types":["text (responses maintaining image context awareness)"],"categories":["image-visual","memory-knowledge","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-11b-vision__cap_11","uri":"capability://code.generation.editing.open.weight.model.with.community.fine.tuning.ecosystem","name":"open-weight model with community fine-tuning ecosystem","description":"Released as open-weight model on Hugging Face and llama.com, enabling community contributions, fine-tuning, and derivative works. The open-weight approach (vs. closed APIs) allows researchers and developers to inspect model weights, create custom variants, and build tools around the model. Community fine-tuning efforts create specialized variants for specific domains or tasks, expanding the model's capabilities beyond the base release.","intents":["I want to inspect and understand the model's internal representations","I need to create a custom variant for my specific domain or task","I want to contribute improvements or variants back to the community","I need to ensure model transparency and auditability for compliance"],"best_for":["researchers studying multimodal model architectures and behavior","open-source projects building on the model","organizations with transparency and auditability requirements","communities creating specialized variants for niche domains","developers avoiding vendor lock-in with proprietary models"],"limitations":["License terms not documented — unclear commercial use restrictions or attribution requirements","No official community governance or variant curation — unknown quality standards for community fine-tuning","Model card and documentation completeness unknown — may lack detailed capability descriptions","Community support quality variable — no guarantee of maintenance or bug fixes for community variants","Fragmentation risk — multiple incompatible variants may emerge, complicating ecosystem"],"requires":["Model weights from Hugging Face or llama.com (free download)","License compliance review (license terms not provided)","Infrastructure for hosting or serving custom variants (if creating derivatives)"],"input_types":["model weights (for inspection, fine-tuning, or derivative creation)"],"output_types":["custom model variants, research insights, community contributions"],"categories":["code-generation-editing","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-11b-vision__cap_2","uri":"capability://image.visual.document.analysis.and.ocr.adjacent.text.extraction","name":"document analysis and ocr-adjacent text extraction","description":"Processes scanned documents, PDFs, and images containing text by combining visual understanding with language generation to extract and summarize content. Unlike traditional OCR, the model understands document layout, context, and semantic meaning, enabling extraction of structured information (tables, forms, key-value pairs) from unstructured document images. Works within the 128K token context, allowing analysis of multi-page documents represented as sequential images.","intents":["I need to extract text and structure from scanned invoices or receipts","I want to parse tables from PDF images and convert to structured data","I need to identify and extract key information from forms or contracts","I want to summarize document content from images without manual transcription"],"best_for":["document processing teams automating invoice/receipt handling","legal tech companies analyzing contracts and forms","financial services extracting data from scanned documents","accessibility tools converting document images to structured text"],"limitations":["No OCR accuracy benchmarks provided — unknown error rates vs. traditional OCR engines","Maximum document length not specified — unclear if 128K context is sufficient for multi-page document sequences","Table extraction accuracy not documented — no metrics on structured data extraction quality","Language support for non-English documents unknown — likely limited to English-dominant training data","Handwriting recognition capability not mentioned — likely fails on cursive or handwritten text"],"requires":["Document images in standard formats (JPEG, PNG, PDF-to-image conversion required)","Inference framework supporting image+text input","Post-processing logic to parse extracted text into structured formats (model outputs raw text)"],"input_types":["image (scanned document, PDF page, form, invoice, receipt)","text (optional: instructions for extraction, e.g., 'extract all line items')"],"output_types":["text (extracted document content, structured summaries)","structured data (via post-processing: JSON, CSV for tables/forms)"],"categories":["image-visual","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-11b-vision__cap_3","uri":"capability://automation.workflow.single.gpu.local.inference.with.edge.mobile.optimization","name":"single-gpu local inference with edge/mobile optimization","description":"Engineered to run on a single GPU with optimizations for Arm processors and mobile hardware (Qualcomm Snapdragon, MediaTek). Uses PyTorch ExecuTorch for on-device distribution and torchtune for local fine-tuning. The 11B parameter size (vs. 70B+ alternatives) fits within memory constraints of consumer GPUs and edge accelerators, enabling real-time inference without cloud dependencies.","intents":["I want to run a multimodal model on my local machine without cloud APIs","I need to deploy vision+language on edge devices or mobile phones","I want to avoid latency and privacy issues of cloud inference","I need to fine-tune a multimodal model on proprietary data locally"],"best_for":["solo developers building local AI applications","teams with privacy requirements preventing cloud data transfer","edge computing deployments (robotics, autonomous systems, IoT)","mobile app developers targeting Android/iOS with on-device AI","organizations in regulated industries (healthcare, finance) requiring data residency"],"limitations":["Specific VRAM requirements not documented — unknown minimum GPU memory for full precision vs. quantized inference","Inference latency benchmarks not provided — unknown tokens-per-second on consumer GPUs","Quantization options not specified — unclear if INT8, FP8, or other formats are supported","Mobile deployment requires ExecuTorch conversion — adds engineering complexity vs. cloud APIs","No multi-GPU scaling documented — single GPU constraint may limit batch processing or concurrent requests"],"requires":["GPU with sufficient VRAM (estimated 16GB+ for full precision, likely 8GB+ for quantized)","PyTorch 2.0+ or PyTorch ExecuTorch for on-device deployment","torchtune for local fine-tuning (optional)","Ollama or torchchat for simplified single-node deployment","Arm processor support for mobile/edge (Qualcomm, MediaTek, Apple Silicon)"],"input_types":["image","text"],"output_types":["text"],"categories":["automation-workflow","image-visual"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-11b-vision__cap_4","uri":"capability://code.generation.editing.fine.tuning.with.torchtune.framework","name":"fine-tuning with torchtune framework","description":"Supports supervised fine-tuning on custom datasets using the torchtune framework, enabling adaptation to domain-specific tasks without retraining from scratch. The framework abstracts distributed training, gradient checkpointing, and memory optimization, allowing developers to fine-tune the full model or specific adapter layers on local hardware. Instruction-tuned variants are available as starting points for task-specific alignment.","intents":["I want to adapt the model to my domain-specific image/text tasks","I need to fine-tune on proprietary data without cloud training services","I want to create custom instruction-following behavior for my use case","I need to improve model performance on niche visual or textual domains"],"best_for":["teams with proprietary datasets requiring custom model adaptation","organizations with privacy constraints preventing cloud training","researchers experimenting with multimodal fine-tuning approaches","developers building domain-specific vision+language applications"],"limitations":["torchtune documentation and examples not provided — learning curve for framework setup","No guidance on dataset size, quality, or format requirements — unknown minimum data for effective fine-tuning","Training time and resource requirements not documented — unclear GPU hours needed for convergence","No comparison of full-model vs. adapter fine-tuning trade-offs — unclear which approach suits which use cases","Evaluation methodology not specified — no guidance on measuring fine-tuning success"],"requires":["torchtune framework (PyTorch-based)","GPU with sufficient VRAM for training (estimated 24GB+ for full model fine-tuning)","Custom training dataset in supported format (format not specified)","PyTorch 2.0+","Familiarity with supervised fine-tuning workflows"],"input_types":["image","text","structured training data (format unknown)"],"output_types":["fine-tuned model weights","adapter weights (if using LoRA or similar)"],"categories":["code-generation-editing","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-11b-vision__cap_5","uri":"capability://memory.knowledge.128k.token.context.window.for.multi.document.reasoning","name":"128k token context window for multi-document reasoning","description":"Supports a 128K token context window, enabling processing of long documents, multiple images, or extended conversational histories without context truncation. This allows the model to maintain coherence across multi-turn conversations, analyze document sequences, or reason over large amounts of reference material. Context is managed at the token level, with both image and text tokens counting toward the limit.","intents":["I want to have extended conversations with the model while maintaining image context","I need to analyze multiple documents or images in a single request","I want to provide detailed system prompts and reference material alongside queries","I need to maintain conversation history for multi-turn visual reasoning"],"best_for":["developers building conversational multimodal applications","document analysis systems processing long or multi-page documents","research tools requiring extended reasoning over reference material","applications with complex system prompts and few-shot examples"],"limitations":["128K is a hard limit — no dynamic context extension or retrieval-augmented generation built-in","Token counting methodology not documented — unclear how image tokens are calculated vs. text tokens","Context window utilization not benchmarked — unknown if model maintains coherence at full 128K capacity","No guidance on optimal context composition — unclear how to structure long contexts for best performance","Inference latency scales with context length — longer contexts incur higher computational cost (not quantified)"],"requires":["Inference framework supporting 128K context (torchtune, PyTorch ExecuTorch, Ollama)","Sufficient GPU VRAM to hold full context in memory (estimated 24GB+ for full precision)","Token counting logic to manage context budget across images and text"],"input_types":["image (multiple images counted as tokens)","text (conversation history, reference material, instructions)"],"output_types":["text (response maintaining context coherence)"],"categories":["memory-knowledge","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-11b-vision__cap_6","uri":"capability://automation.workflow.deployment.via.ollama.torchchat.and.pytorch.executorch","name":"deployment via ollama, torchchat, and pytorch executorch","description":"Provides three deployment pathways: Ollama for simplified single-node inference with automatic model management, torchchat for interactive local chatting, and PyTorch ExecuTorch for on-device mobile/edge distribution. Each pathway abstracts different layers of complexity — Ollama handles model downloading and serving, torchchat provides a chat interface, and ExecuTorch compiles models for mobile hardware. Models are available on Hugging Face and llama.com for direct download.","intents":["I want to run the model locally with minimal setup","I need to deploy the model on mobile or edge devices","I want to create a local chat interface for the model","I need to integrate the model into a Python application"],"best_for":["developers wanting quick local deployment without infrastructure setup","mobile app developers targeting on-device inference","teams building edge AI applications (robotics, IoT)","researchers prototyping multimodal applications"],"limitations":["Ollama abstracts model details — limited control over inference parameters and optimization","torchchat is a chat interface, not an API — requires custom integration for programmatic use","PyTorch ExecuTorch requires model compilation — adds deployment complexity vs. direct inference","No load balancing or multi-instance orchestration — single-node deployment only","Model format conversions (GGUF, safetensors, ExecuTorch) not documented — unclear compatibility and performance trade-offs"],"requires":["Ollama: Ollama CLI installed, GPU with sufficient VRAM","torchchat: Python 3.9+, PyTorch 2.0+, model weights downloaded","PyTorch ExecuTorch: ExecuTorch SDK, target hardware SDK (Android NDK, iOS SDK), model compilation toolchain","Direct inference: PyTorch 2.0+, model weights from Hugging Face or llama.com"],"input_types":["image","text"],"output_types":["text (chat interface or API response)"],"categories":["automation-workflow","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-11b-vision__cap_7","uri":"capability://tool.use.integration.partner.ecosystem.integration.aws.azure.google.cloud.databricks.etc","name":"partner ecosystem integration (aws, azure, google cloud, databricks, etc.)","description":"Available through a broad partner ecosystem including cloud providers (AWS, Microsoft Azure, Google Cloud, Oracle Cloud), inference platforms (Fireworks, Together AI, Groq), and enterprise software (Databricks, Snowflake, Dell, IBM, Infosys). Partners provide managed inference endpoints, fine-tuning services, and integration with existing data pipelines. Meta AI also provides direct interactive access for development and testing.","intents":["I want to use the model through my existing cloud provider","I need managed inference without self-hosting infrastructure","I want to integrate the model into my data warehouse or analytics platform","I need enterprise support and SLAs for production deployment"],"best_for":["enterprises with existing cloud commitments (AWS, Azure, GCP)","teams lacking infrastructure expertise for self-hosted deployment","organizations requiring managed services and SLAs","data teams using Databricks or Snowflake for analytics","companies needing inference at scale without capacity planning"],"limitations":["Partner pricing and terms not documented — unknown cost vs. self-hosting","API compatibility and feature parity not specified — unclear if all model capabilities available through all partners","Latency and throughput SLAs not provided — unknown performance guarantees vs. self-hosted inference","Data residency and privacy policies vary by partner — requires individual review for compliance","Vendor lock-in risk — switching partners requires API migration"],"requires":["Account with partner platform (AWS, Azure, GCP, Databricks, Snowflake, etc.)","API credentials and authentication setup","Billing/payment method for managed service","Integration code for partner-specific SDK or API"],"input_types":["image","text"],"output_types":["text (via partner API)"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-11b-vision__cap_8","uri":"capability://text.generation.language.text.generation.and.summarization.inherited.from.llama.3.1.backbone","name":"text generation and summarization (inherited from llama 3.1 backbone)","description":"Inherits text generation and summarization capabilities from the Llama 3.1 8B backbone, enabling general-purpose language tasks alongside multimodal reasoning. The model can generate coherent text, summarize documents, rewrite content, and follow complex instructions. These capabilities work independently of image input, allowing the model to function as a general-purpose language model when vision is not required.","intents":["I want to generate text or creative content using the model","I need to summarize long documents or articles","I want to rewrite or rephrase text for different audiences","I need the model to follow complex multi-step instructions"],"best_for":["developers building general-purpose language applications","content creation and editing tools","document summarization systems","instruction-following agents and chatbots"],"limitations":["Text generation quality not benchmarked — no comparison to Llama 3.1 8B or other language models","Summarization accuracy not documented — unknown factuality or information retention rates","Instruction-following robustness not specified — unclear how model handles conflicting or ambiguous instructions","No safety alignment details — unknown how model handles harmful, biased, or sensitive text generation requests","Hallucination rates not provided — unknown factuality for knowledge-based text generation"],"requires":["Text input (prompt, document, instruction)","Inference framework (torchtune, PyTorch, Ollama, or partner API)","Optional: image input for multimodal context"],"input_types":["text (prompt, document, instruction)","image (optional, for multimodal context)"],"output_types":["text (generated content, summary, rewrite)"],"categories":["text-generation-language"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-11b-vision__cap_9","uri":"capability://text.generation.language.instruction.tuned.variant.for.aligned.task.performance","name":"instruction-tuned variant for aligned task performance","description":"Instruction-tuned variant available alongside the base model, fine-tuned on instruction-following datasets to improve task alignment and reduce need for prompt engineering. The variant is optimized for following explicit instructions, answering questions, and completing structured tasks. Separate from the base model, allowing users to choose between raw language modeling (base) and task-optimized (instruction-tuned) variants.","intents":["I want the model to follow my instructions precisely without extensive prompt engineering","I need reliable task completion for structured queries","I want to reduce hallucinations and off-topic responses","I need the model to work well with few-shot examples and explicit formatting instructions"],"best_for":["developers building task-specific applications (Q&A, extraction, classification)","teams without prompt engineering expertise","applications requiring consistent, predictable model behavior","systems with structured input/output requirements"],"limitations":["Instruction-tuning methodology not documented — unknown datasets, techniques, or alignment approach","Performance comparison to base model not provided — unclear improvement metrics","Instruction-following robustness not benchmarked — unknown failure modes or edge cases","No guidance on when to use instruction-tuned vs. base variant — unclear trade-offs","Instruction format not specified — unknown optimal prompt structure for best performance"],"requires":["Instruction-tuned model variant (separate download from base model)","Inference framework supporting the model","Well-structured instructions or prompts for optimal performance"],"input_types":["text (instruction, question, task description)","image (optional, for multimodal tasks)"],"output_types":["text (task completion, answer, structured response)"],"categories":["text-generation-language","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"llama-3-2-11b-vision__headline","uri":"capability://image.visual.multimodal.ai.model.for.image.and.text.processing","name":"multimodal ai model for image and text processing","description":"Meta's Llama 3.2 11B Vision is an open-weight multimodal AI model that combines advanced image understanding with text processing, making it ideal for applications in visual question answering and document analysis.","intents":["best multimodal AI model","multimodal model for image and text tasks","top AI model for visual question answering","best model for document analysis","multimodal model for self-hosting"],"best_for":["self-hosted applications","image and text integration"],"limitations":[],"requires":["single GPU"],"input_types":["images","text"],"output_types":["text","analysis results"],"categories":["image-visual"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":58,"verified":false,"data_access_risk":"low","permissions":["Single GPU with sufficient VRAM (specific requirement unknown, likely 16GB+ for full precision)","PyTorch 2.0+ for model loading and inference","Image input in standard formats (JPEG, PNG, WebP — specific support unknown)","torchtune or PyTorch ExecuTorch for deployment","Instruction-tuned model variant (separate from base model)","Image and text input capability","Inference framework supporting 128K context (torchtune, PyTorch ExecuTorch, or Ollama)","Inference framework supporting 128K context and multi-turn conversation management","Application logic to maintain conversation history and image references","Sufficient GPU VRAM to hold full conversation context"],"failure_modes":["Vision encoder architecture not publicly documented — limits ability to fine-tune vision component independently","Maximum image resolution and count per input not specified — unknown practical limits for high-resolution documents","No quantitative benchmarks provided — 'competitive with Claude 3 Haiku' claim unsubstantiated with actual metrics","128K context window is fixed hard limit — cannot process arbitrarily long document sequences","Hallucination rates and factuality benchmarks for visual reasoning not documented","No training data composition disclosed — unknown what VQA datasets were used or their biases","Instruction-following quality not benchmarked — no metrics on answer accuracy, hallucination rates, or failure modes","Multi-turn conversation context management not documented — unclear how model handles contradictory information across turns","No safety alignment details provided — unknown how model handles adversarial or sensitive image queries","Context management strategy not documented — unclear how model prioritizes recent vs. early context","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.7,"quality":0.9,"ecosystem":0.39999999999999997,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.35,"quality":0.2,"ecosystem":0.1,"match_graph":0.3,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:23.327Z","last_scraped_at":null,"last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=llama-3-2-11b-vision","compare_url":"https://unfragile.ai/compare?artifact=llama-3-2-11b-vision"}},"signature":"NmB2aYcWeq4+xWCM8lVregzdvb6fLgB57obnY1zA71Nd4xubNpXhwA8RVD3JC1cynLWcKvFzSolLf2SW+WcaCw==","signedAt":"2026-06-20T17:40:44.153Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/llama-3-2-11b-vision","artifact":"https://unfragile.ai/llama-3-2-11b-vision","verify":"https://unfragile.ai/api/v1/verify?slug=llama-3-2-11b-vision","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}