{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"hf-model-xenova--all-minilm-l6-v2","slug":"xenova--all-minilm-l6-v2","name":"all-MiniLM-L6-v2","type":"model","url":"https://huggingface.co/Xenova/all-MiniLM-L6-v2","page_url":"https://unfragile.ai/xenova--all-minilm-l6-v2","categories":["model-training"],"tags":["transformers.js","onnx","bert","feature-extraction","base_model:sentence-transformers/all-MiniLM-L6-v2","base_model:quantized:sentence-transformers/all-MiniLM-L6-v2","license:apache-2.0","region:us"],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"hf-model-xenova--all-minilm-l6-v2__cap_0","uri":"capability://data.processing.analysis.semantic.text.embedding.generation","name":"semantic-text-embedding-generation","description":"Converts variable-length text inputs into fixed-dimensional dense vector embeddings (384 dimensions) using a distilled BERT architecture optimized for semantic similarity tasks. Implements mean pooling over the final transformer layer outputs to produce normalized embeddings suitable for cosine similarity comparisons. The model uses ONNX quantization to reduce model size from ~90MB to ~22MB while maintaining embedding quality, enabling browser-based and edge deployment via transformers.js.","intents":["I need to convert user queries and documents into comparable vector representations for semantic search","I want to build a similarity-based recommendation system without running a full-scale embedding service","I need embeddings that work in the browser or on-device without cloud API calls","I'm building a RAG pipeline and need lightweight embeddings that preserve semantic meaning"],"best_for":["developers building semantic search systems with budget constraints","teams implementing RAG pipelines requiring sub-100ms embedding latency","browser-based applications needing client-side semantic similarity without backend calls","resource-constrained environments (mobile, edge devices, serverless functions)"],"limitations":["Fixed 384-dimensional output — cannot be customized for domain-specific embedding spaces","Maximum sequence length of 128 tokens — longer documents require chunking or truncation","Mean pooling approach loses positional information — not suitable for tasks requiring token-level granularity","Distilled model trades some semantic precision for speed — ~5-10% accuracy loss vs full-size sentence-transformers/all-MiniLM-L12-v2","ONNX quantization introduces minor numerical precision loss in edge cases with very similar embeddings"],"requires":["transformers.js library (v2.0+) for browser/Node.js runtime","Node.js 14+ or modern browser with WebGL/WebAssembly support","~22MB disk space for ONNX model weights","Hugging Face model card access or local model cache"],"input_types":["plain text (UTF-8)","text with special tokens (preserved as-is)","variable-length strings (auto-tokenized and padded/truncated to 128 tokens)"],"output_types":["float32 dense vectors (384 dimensions)","normalized embeddings (L2 norm applied)","compatible with cosine similarity, Euclidean distance, or dot product operations"],"categories":["data-processing-analysis","embedding-generation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-xenova--all-minilm-l6-v2__cap_1","uri":"capability://data.processing.analysis.cross.lingual.semantic.matching","name":"cross-lingual-semantic-matching","description":"Performs semantic similarity matching across 50+ languages by leveraging multilingual BERT's shared embedding space, where embeddings from different languages cluster semantically rather than lexically. The model was trained on parallel sentence pairs across multiple languages, enabling zero-shot cross-lingual retrieval — a query in English can find semantically similar documents in Spanish, Mandarin, or Arabic without language-specific fine-tuning. Similarity is computed via cosine distance in the shared 384-dimensional space.","intents":["I need to find documents in multiple languages that match a user query regardless of query language","I'm building a multilingual search system and want to avoid maintaining separate embedding models per language","I need to cluster or deduplicate content across language boundaries based on semantic meaning","I want to match user-generated content in any language against a multilingual knowledge base"],"best_for":["global applications serving users in 10+ languages","teams building multilingual RAG systems without language detection preprocessing","content platforms deduplicating or clustering user submissions across language boundaries","research teams studying cross-lingual semantic similarity without labeled training data"],"limitations":["Cross-lingual performance degrades for language pairs underrepresented in training data (e.g., low-resource languages like Amharic, Tagalog)","Semantic alignment is approximate — homonyms and cultural idioms may not map correctly across languages","No explicit language identification — requires external language detection if language-specific processing is needed downstream","Performance varies by language pair — English-Spanish matching is more reliable than English-Swahili","Requires identical preprocessing (tokenization, lowercasing) across all languages for consistency"],"requires":["transformers.js library with multilingual tokenizer support","Input text in any of 50+ supported languages (ISO 639-1 codes)","Optional: language detection library (e.g., langdetect) for logging/monitoring"],"input_types":["text in any of 50+ languages","mixed-language text (each language embedded independently)","variable-length strings with diacritics and special characters preserved"],"output_types":["float32 embeddings in shared 384-dimensional space","cosine similarity scores (0-1 range) between embeddings","ranked lists of cross-lingual matches"],"categories":["data-processing-analysis","search-retrieval"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-xenova--all-minilm-l6-v2__cap_10","uri":"capability://planning.reasoning.semantic.text.classification.via.embedding.similarity","name":"semantic-text-classification-via-embedding-similarity","description":"Classifies text by embedding it and computing similarity to class prototypes (embeddings of representative examples or class names). For example, classifying a review as 'positive' or 'negative' by comparing its embedding to embeddings of 'this product is great' and 'this product is terrible'. This zero-shot approach requires no training data — just representative text for each class. Can be extended to multi-class classification by computing similarity to multiple class prototypes and selecting the highest-scoring class.","intents":["I need to classify text without labeled training data (zero-shot classification)","I want to add new classes without retraining a classifier","I'm building a content moderation system that categorizes user submissions","I need to classify customer feedback by sentiment or topic"],"best_for":["zero-shot text classification without training data","rapid prototyping of classification systems","dynamic classification with user-defined categories","low-data scenarios where training a classifier is impractical"],"limitations":["Classification accuracy depends heavily on quality of class prototypes — poorly chosen examples lead to misclassification","No learned decision boundaries — similarity-based classification is linear in embedding space, missing complex patterns","Requires manual selection of representative text for each class — no automatic prototype generation","Performance degrades with many classes (>10) because similarity scores become less discriminative","Cannot handle ambiguous or multi-label classification without post-processing","No confidence calibration — similarity scores don't directly correspond to classification confidence"],"requires":["Text to classify (embedded via semantic-text-embedding-generation)","Representative text or embeddings for each class (class prototypes)","Similarity threshold or top-K selection for classification"],"input_types":["text to classify (variable length)","class prototypes (text or pre-computed embeddings)"],"output_types":["predicted class label","similarity scores for each class","optional: confidence score (max similarity)"],"categories":["planning-reasoning","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-xenova--all-minilm-l6-v2__cap_2","uri":"capability://data.processing.analysis.browser.native.embedding.inference","name":"browser-native-embedding-inference","description":"Executes the entire embedding pipeline (tokenization, transformer inference, pooling) directly in the browser using transformers.js and ONNX Runtime Web, eliminating round-trips to a backend embedding service. The ONNX quantized model (~22MB) is downloaded once and cached in IndexedDB or local storage, then inference runs on the client's CPU/GPU via WebAssembly or WebGL. Latency is typically 50-200ms per embedding on modern hardware, with no network overhead after initial model load.","intents":["I want to build a semantic search UI that responds instantly without backend latency","I need to process sensitive text (PII, medical records) without sending it to external servers","I'm building an offline-first application that must work without internet connectivity","I want to reduce backend infrastructure costs by offloading embedding computation to clients"],"best_for":["single-page applications (React, Vue, Svelte) with real-time search requirements","privacy-sensitive applications (healthcare, legal, financial) avoiding cloud processing","offline-first or progressive web apps requiring local semantic search","cost-conscious teams with high embedding volume (millions of queries/month)"],"limitations":["First load requires downloading 22MB model — adds 5-30 seconds depending on network speed and browser caching","Inference speed varies dramatically by device — older phones/tablets may take 500ms+ per embedding vs 50ms on modern desktops","Browser memory constraints — processing very large batches (1000+ embeddings) may cause OOM on low-memory devices","No GPU acceleration in most browsers — inference is CPU-bound, making batch processing slower than server-side","IndexedDB storage limits (typically 50MB-1GB per origin) may be exceeded in storage-constrained environments","Requires modern browser with WebAssembly support (IE11 not supported)"],"requires":["Modern browser (Chrome 57+, Firefox 52+, Safari 11+, Edge 79+)","transformers.js library (v2.0+) installed as npm package or via CDN","~22MB available disk space in browser cache/IndexedDB","JavaScript enabled and WebAssembly support"],"input_types":["text strings from user input, DOM elements, or file uploads","batch arrays of text for processing multiple queries/documents"],"output_types":["float32 embeddings (384 dimensions) in JavaScript typed arrays","embeddings immediately available for in-memory similarity computation"],"categories":["data-processing-analysis","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-xenova--all-minilm-l6-v2__cap_3","uri":"capability://search.retrieval.semantic.similarity.ranking","name":"semantic-similarity-ranking","description":"Computes pairwise cosine similarity between query embeddings and a corpus of document embeddings, returning ranked results sorted by similarity score. The implementation leverages vectorized operations (dot products, L2 normalization) to efficiently compare a single query against thousands of documents in milliseconds. Similarity scores range from -1 to 1 (or 0 to 1 for normalized embeddings), with scores >0.7 typically indicating semantic relevance. Can be implemented in-memory for small corpora or with vector databases (Pinecone, Weaviate) for large-scale retrieval.","intents":["I need to rank documents by semantic relevance to a user query","I want to implement a 'find similar items' feature without keyword matching","I'm building a recommendation system that matches users to content based on semantic similarity","I need to retrieve the top-K most relevant documents from a corpus for a RAG pipeline"],"best_for":["search applications prioritizing semantic relevance over keyword matching","recommendation engines matching users/queries to items/documents","RAG pipelines requiring efficient document retrieval from large corpora","clustering and deduplication tasks based on semantic similarity"],"limitations":["Cosine similarity is symmetric — 'dog' and 'animal' have the same similarity as 'animal' and 'dog', losing directionality","Similarity scores are relative, not absolute — a score of 0.6 may be high in one corpus but low in another depending on data distribution","Requires pre-computed embeddings for all documents — cannot rank on-the-fly without embedding infrastructure","In-memory similarity computation scales to ~100K documents on typical hardware; larger corpora require vector database indexing","No built-in handling of semantic drift — embeddings may not capture domain-specific relevance (e.g., medical vs general English)"],"requires":["Query text and document corpus both embedded using the same model (all-MiniLM-L6-v2)","Embeddings stored as float32 arrays or in a vector database","Linear algebra library (numpy, JavaScript typed arrays) for similarity computation"],"input_types":["query embedding (384-dimensional float32 vector)","document embeddings (array of 384-dimensional vectors)","optional: similarity threshold for filtering results"],"output_types":["ranked list of (document_id, similarity_score) tuples","top-K results (typically K=5-50)","similarity scores as floats (0-1 range for normalized embeddings)"],"categories":["search-retrieval","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-xenova--all-minilm-l6-v2__cap_4","uri":"capability://data.processing.analysis.batch.embedding.computation","name":"batch-embedding-computation","description":"Processes multiple text inputs in a single forward pass through the transformer, amortizing tokenization and model loading overhead across the batch. Transformers.js implements dynamic batching where inputs are padded to the longest sequence in the batch, then processed together via ONNX Runtime. Batch sizes of 8-64 are typical; larger batches improve throughput (embeddings/second) but increase latency per batch. Outputs are a 2D array of embeddings (batch_size × 384 dimensions).","intents":["I need to embed a large corpus of documents efficiently without processing one-by-one","I want to minimize per-embedding latency by batching queries from multiple users","I'm indexing a knowledge base and need to compute embeddings for thousands of documents quickly","I need to embed user queries and candidate documents together for efficient similarity computation"],"best_for":["batch processing pipelines (ETL, data indexing, offline embedding generation)","server-side applications with multiple concurrent requests","RAG systems pre-computing embeddings for large document collections","applications with predictable embedding workloads (not real-time single-query)"],"limitations":["Batch processing introduces latency variance — first query in a batch waits for batch assembly, subsequent queries benefit from amortization","Memory usage scales linearly with batch size — batch of 64 uses ~4x memory of batch of 16","Optimal batch size depends on hardware (GPU memory, CPU cores) — no automatic tuning","Dynamic padding wastes computation on shorter sequences — batch of [100 tokens, 10 tokens] pads both to 100","No built-in batching across multiple requests — requires application-level queue management"],"requires":["transformers.js library with batch processing support","Sufficient memory for batch_size × 384 × 4 bytes (float32) + model weights (~22MB)","Array of text inputs (typically 8-64 items per batch)"],"input_types":["array of text strings (variable length, auto-tokenized)","batch size parameter (typically 8-64)"],"output_types":["2D float32 array (batch_size × 384 dimensions)","embeddings in same order as input array"],"categories":["data-processing-analysis","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-xenova--all-minilm-l6-v2__cap_5","uri":"capability://data.processing.analysis.quantized.model.inference","name":"quantized-model-inference","description":"Executes transformer inference using 8-bit integer quantization instead of 32-bit floating-point, reducing model size from ~90MB to ~22MB and improving inference speed by 2-4x on CPU-bound hardware. Quantization maps float32 weights to int8 values using learned scale factors, with minimal accuracy loss (<2% on semantic similarity benchmarks). ONNX Runtime automatically handles dequantization during inference, making quantization transparent to the user while providing speed and memory benefits.","intents":["I need to deploy embeddings on resource-constrained devices (mobile, edge, serverless)","I want to reduce model download time and storage footprint for browser-based applications","I need faster embedding inference without sacrificing semantic quality","I'm optimizing for cost in serverless environments where memory and compute time are billed"],"best_for":["mobile and edge device deployments with limited storage/memory","browser-based applications where model download time is critical","serverless/FaaS environments (AWS Lambda, Cloudflare Workers) with strict resource limits","high-throughput inference scenarios where speed matters more than maximum accuracy"],"limitations":["Quantization introduces ~1-2% accuracy loss on semantic similarity tasks — noticeable only in edge cases with very similar embeddings","Integer arithmetic is faster on CPU but not all hardware benefits equally — GPU inference may see minimal speedup","Quantized models are not human-readable — debugging embedding quality requires comparison with float32 baseline","ONNX quantization is one-way — cannot convert quantized model back to float32 without retraining","Some edge cases (very long sequences, unusual token combinations) may show larger accuracy degradation than average"],"requires":["ONNX Runtime (Web, Node.js, or native) with quantization support","transformers.js v2.0+ with ONNX model support","No code changes required — quantization is transparent to application code"],"input_types":["text inputs (identical to float32 model)","batch arrays of text"],"output_types":["float32 embeddings (dequantized at output layer)","identical output format to float32 model"],"categories":["data-processing-analysis","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-xenova--all-minilm-l6-v2__cap_6","uri":"capability://data.processing.analysis.semantic.clustering.and.deduplication","name":"semantic-clustering-and-deduplication","description":"Groups semantically similar texts by computing embeddings for all items, then applying clustering algorithms (k-means, hierarchical clustering, DBSCAN) on the 384-dimensional embedding space. Items with embeddings close in vector space are grouped together, enabling deduplication of near-duplicate content and discovery of semantic clusters without manual labeling. Clustering quality depends on the similarity threshold and algorithm choice; typical use cases set thresholds at 0.85-0.95 cosine similarity for deduplication.","intents":["I need to deduplicate user-generated content (reviews, comments, forum posts) that are semantically identical but not exact matches","I want to discover topics or themes in a large text corpus without manual annotation","I need to group customer support tickets by issue type based on semantic similarity","I'm deduplicating training data for ML models to remove near-duplicate examples"],"best_for":["content moderation and deduplication pipelines","exploratory data analysis on text corpora","customer support ticket triage and routing","training data cleaning for ML models"],"limitations":["Clustering quality is sensitive to hyperparameters (number of clusters, similarity threshold) — requires tuning per dataset","Semantic similarity doesn't always align with human judgment — two texts may be semantically similar but contextually different","Computational cost scales quadratically with corpus size — pairwise similarity computation is O(n²) for n documents","No built-in handling of hierarchical relationships — all clusters are flat unless using hierarchical clustering (slower)","Requires embedding all items upfront — cannot incrementally add new items to existing clusters without recomputation"],"requires":["Embeddings for all items in the corpus (computed via semantic-text-embedding-generation)","Clustering library (scikit-learn, scipy, custom implementation)","Similarity threshold or number of clusters parameter"],"input_types":["array of embeddings (n × 384 dimensions)","clustering algorithm choice (k-means, DBSCAN, hierarchical)","hyperparameters (number of clusters, similarity threshold, distance metric)"],"output_types":["cluster assignments (array of cluster IDs, one per item)","cluster centroids (384-dimensional vectors representing each cluster)","optional: dendrogram or distance matrix for hierarchical clustering"],"categories":["data-processing-analysis","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-xenova--all-minilm-l6-v2__cap_7","uri":"capability://search.retrieval.semantic.duplicate.detection","name":"semantic-duplicate-detection","description":"Identifies near-duplicate or paraphrased text by comparing embeddings of candidate pairs and flagging those with cosine similarity above a threshold (typically 0.85-0.95). Unlike exact matching or fuzzy string matching, this approach detects semantic duplicates — texts that convey the same meaning despite different wording. Can be implemented as a pairwise comparison (O(n²)) for small corpora or with approximate nearest neighbor (ANN) indexing (Faiss, Annoy) for large-scale detection.","intents":["I need to detect plagiarism or paraphrased content in user submissions","I want to identify duplicate bug reports or feature requests in issue trackers","I need to find similar questions in a FAQ or knowledge base to avoid redundant content","I'm detecting fraudulent or spam content that uses paraphrasing to evade keyword filters"],"best_for":["content moderation and plagiarism detection","issue tracking and ticket deduplication","knowledge base curation and FAQ management","fraud detection and spam filtering"],"limitations":["Similarity threshold is arbitrary — 0.85 may be too strict for some domains, too lenient for others","Semantic similarity doesn't distinguish between intentional paraphrasing and coincidental similarity","False positives are common for short texts (< 20 tokens) where random similarity is higher","Requires embedding both texts — cannot detect duplicates without computing embeddings for new content","Language-specific performance varies — works well for English, less reliable for low-resource languages"],"requires":["Embeddings for candidate texts (computed via semantic-text-embedding-generation)","Similarity threshold parameter (typically 0.85-0.95)","Optional: ANN index (Faiss, Annoy) for large-scale pairwise comparison"],"input_types":["two text embeddings (384-dimensional vectors each)","similarity threshold (float, 0-1 range)"],"output_types":["boolean flag (duplicate or not)","similarity score (float, 0-1 range)","optional: ranked list of duplicates with scores"],"categories":["search-retrieval","safety-moderation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-xenova--all-minilm-l6-v2__cap_8","uri":"capability://search.retrieval.semantic.text.search.with.ranking","name":"semantic-text-search-with-ranking","description":"Implements a complete semantic search pipeline: (1) embed user query, (2) retrieve candidate documents from a corpus via similarity search, (3) rank results by cosine similarity score. Unlike keyword search (BM25), this approach matches semantic meaning rather than term overlap, enabling queries like 'how do I fix a broken window' to find results about 'repairing glass panes' without keyword overlap. Can be implemented in-memory for small corpora (<100K docs) or with vector databases (Pinecone, Weaviate, Milvus) for large-scale retrieval.","intents":["I want to build a search engine that understands user intent rather than just matching keywords","I need to search a knowledge base or documentation with natural language queries","I'm building a customer support chatbot that finds relevant help articles based on user questions","I want to implement 'search as you type' with semantic results, not just autocomplete"],"best_for":["knowledge base and documentation search","customer support and FAQ systems","e-commerce product search with semantic understanding","research paper or academic article discovery"],"limitations":["Requires pre-computed embeddings for all documents — cannot search unembedded content","Semantic search may return irrelevant results if query and documents use different terminology (e.g., 'automobile' vs 'car' are semantically similar but may not cluster together)","Performance degrades for very short queries (< 5 tokens) where semantic meaning is ambiguous","No built-in handling of query expansion or synonym detection — 'bug' and 'defect' are treated as different concepts","Ranking is purely similarity-based — cannot incorporate other signals (popularity, recency, authority) without post-processing"],"requires":["Embeddings for all documents in the corpus","Vector database or in-memory similarity search implementation","Query text to embed and search"],"input_types":["query text (natural language, variable length)","document corpus (array of texts or pre-computed embeddings)"],"output_types":["ranked list of documents with similarity scores","top-K results (typically K=5-20)","optional: highlighted snippets or excerpts"],"categories":["search-retrieval","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-xenova--all-minilm-l6-v2__cap_9","uri":"capability://search.retrieval.document.similarity.comparison","name":"document-similarity-comparison","description":"Compares two or more documents by embedding each and computing pairwise cosine similarity, producing a similarity matrix that quantifies semantic overlap. Useful for finding similar documents in a corpus, measuring document coherence, or detecting plagiarism. Similarity scores range from -1 to 1 (or 0 to 1 for normalized embeddings); scores >0.7 typically indicate substantial semantic overlap. Can be extended to hierarchical comparison (comparing document sections or paragraphs) for fine-grained analysis.","intents":["I need to find documents in my corpus that are similar to a reference document","I want to measure how similar two documents are (e.g., for plagiarism detection)","I'm building a 'related articles' or 'similar products' feature","I need to detect if a new document is a duplicate or paraphrase of existing content"],"best_for":["document similarity and plagiarism detection","related content recommendation","document deduplication and clustering","quality assurance for content management systems"],"limitations":["Similarity is computed at document level — cannot identify which sections are similar without chunking","Long documents (>512 tokens) are truncated to 128 tokens, losing information and potentially reducing accuracy","Similarity scores are relative — a score of 0.6 may indicate high similarity in one domain but low in another","No built-in handling of document structure (headings, sections, metadata) — treats all text equally","Requires embedding all documents upfront — cannot incrementally compare new documents without recomputation"],"requires":["Embeddings for all documents to compare","Linear algebra library for similarity computation (numpy, JavaScript typed arrays)"],"input_types":["document texts (variable length, auto-embedded)","optional: similarity threshold for filtering results"],"output_types":["similarity matrix (n × n for n documents)","pairwise similarity scores (float, 0-1 range)","ranked list of similar documents"],"categories":["search-retrieval","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":50,"verified":false,"data_access_risk":"high","permissions":["transformers.js library (v2.0+) for browser/Node.js runtime","Node.js 14+ or modern browser with WebGL/WebAssembly support","~22MB disk space for ONNX model weights","Hugging Face model card access or local model cache","transformers.js library with multilingual tokenizer support","Input text in any of 50+ supported languages (ISO 639-1 codes)","Optional: language detection library (e.g., langdetect) for logging/monitoring","Text to classify (embedded via semantic-text-embedding-generation)","Representative text or embeddings for each class (class prototypes)","Similarity threshold or top-K selection for classification"],"failure_modes":["Fixed 384-dimensional output — cannot be customized for domain-specific embedding spaces","Maximum sequence length of 128 tokens — longer documents require chunking or truncation","Mean pooling approach loses positional information — not suitable for tasks requiring token-level granularity","Distilled model trades some semantic precision for speed — ~5-10% accuracy loss vs full-size sentence-transformers/all-MiniLM-L12-v2","ONNX quantization introduces minor numerical precision loss in edge cases with very similar embeddings","Cross-lingual performance degrades for language pairs underrepresented in training data (e.g., low-resource languages like Amharic, Tagalog)","Semantic alignment is approximate — homonyms and cultural idioms may not map correctly across languages","No explicit language identification — requires external language detection if language-specific processing is needed downstream","Performance varies by language pair — English-Spanish matching is more reliable than English-Swahili","Requires identical preprocessing (tokenization, lowercasing) across all languages for consistency","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.7899495253694906,"quality":0.32,"ecosystem":0.5000000000000001,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.35,"quality":0.2,"ecosystem":0.1,"match_graph":0.3,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:22.766Z","last_scraped_at":"2026-05-03T14:23:02.600Z","last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":3239437,"model_likes":116}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=xenova--all-minilm-l6-v2","compare_url":"https://unfragile.ai/compare?artifact=xenova--all-minilm-l6-v2"}},"signature":"AkozH+S0BBYVWVOLuVYlRBns1Ldktz3ofE5NetVMqCkU3HRBvRFG0XVqYb9WxD4fK2Frlk9CTTa61eFj0pZCDw==","signedAt":"2026-06-20T02:46:10.858Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/xenova--all-minilm-l6-v2","artifact":"https://unfragile.ai/xenova--all-minilm-l6-v2","verify":"https://unfragile.ai/api/v1/verify?slug=xenova--all-minilm-l6-v2","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}