{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"hf-model-tahadouaji--detr-doc-table-detection","slug":"tahadouaji--detr-doc-table-detection","name":"detr-doc-table-detection","type":"model","url":"https://huggingface.co/TahaDouaji/detr-doc-table-detection","page_url":"https://unfragile.ai/tahadouaji--detr-doc-table-detection","categories":["image-generation"],"tags":["transformers","pytorch","onnx","safetensors","detr","object-detection","- vision","dataset:MohamedExperio/ICDAR2019","arxiv:2005.12872","arxiv:1910.09700","base_model:facebook/detr-resnet-50","base_model:quantized:facebook/detr-resnet-50","license:apache-2.0","endpoints_compatible","region:us"],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"hf-model-tahadouaji--detr-doc-table-detection__cap_0","uri":"capability://image.visual.document.table.detection.via.transformer.based.object.localization","name":"document table detection via transformer-based object localization","description":"Detects and localizes tables within document images using DETR (Detection Transformer), a transformer-based object detection architecture that replaces traditional CNN-based detectors with a set-based prediction approach. The model processes document images through a ResNet-50 backbone for feature extraction, then applies transformer encoder-decoder layers to directly predict table bounding boxes and class labels without hand-crafted NMS or anchor generation, enabling end-to-end differentiable detection optimized for document layout understanding.","intents":["I need to automatically locate and extract table regions from scanned documents or PDFs for downstream OCR or data extraction","I want to build a document processing pipeline that identifies where tables are positioned in multi-page documents","I need to segment document images into table and non-table regions for selective processing","I'm building a document intelligence system that must handle mixed layouts with text, images, and tabular data"],"best_for":["document processing teams automating table extraction from invoices, reports, and research papers","developers building document intelligence platforms requiring table localization as a preprocessing step","organizations processing ICDAR2019-style document datasets with mixed table layouts","teams needing lightweight, deployable table detection without cloud API dependencies"],"limitations":["Trained exclusively on ICDAR2019 dataset — may have reduced accuracy on document types, table styles, or layouts not represented in training data","Requires GPU or significant CPU resources for real-time inference on high-resolution document images; CPU inference adds 500ms+ latency per image","No built-in handling for rotated, skewed, or severely degraded document images — preprocessing normalization required","Outputs only bounding box coordinates and class labels; does not perform table structure parsing, cell extraction, or content OCR","Fixed input resolution (typically 800x1066 for DETR) may require image resizing, potentially losing fine-grained table details in high-resolution documents"],"requires":["PyTorch 1.9+ or ONNX Runtime 1.10+ for model inference","transformers library 4.5.0+ for model loading and preprocessing","Python 3.7+","Pillow or OpenCV for image loading and preprocessing","GPU with CUDA 11.0+ recommended for production inference; CPU inference supported but slow"],"input_types":["image/jpeg","image/png","image/tiff","numpy arrays (H×W×3 format)","PIL Image objects"],"output_types":["structured data: bounding boxes (x_min, y_min, x_max, y_max format)","class labels (table vs non-table or multi-class table types)","confidence scores per detection","JSON serializable detection dictionaries compatible with COCO format"],"categories":["image-visual","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-tahadouaji--detr-doc-table-detection__cap_1","uri":"capability://tool.use.integration.multi.format.model.export.and.deployment.packaging","name":"multi-format model export and deployment packaging","description":"Provides pre-converted model artifacts in PyTorch, ONNX, and SafeTensors formats, enabling deployment across heterogeneous inference environments without requiring manual conversion pipelines. The model is packaged with HuggingFace Hub integration, allowing single-line loading via transformers library and direct compatibility with ONNX Runtime, TensorRT, and edge deployment frameworks, eliminating format conversion bottlenecks in production workflows.","intents":["I need to deploy this table detection model to both cloud GPU servers and edge devices with different runtime requirements","I want to use ONNX Runtime for faster inference on CPU or specialized hardware without PyTorch overhead","I need to integrate this model into existing pipelines that expect SafeTensors format for security and performance","I'm building a multi-platform application and need the model in formats compatible with web, mobile, and server runtimes"],"best_for":["MLOps teams managing multi-environment deployments (cloud, edge, on-premise)","developers building production systems requiring model format flexibility and zero-conversion overhead","organizations with security policies favoring SafeTensors format for model integrity verification","teams optimizing inference latency across heterogeneous hardware (GPUs, CPUs, TPUs, mobile accelerators)"],"limitations":["ONNX export may have minor numerical precision differences from PyTorch due to operator implementation variations; requires validation on target hardware","SafeTensors format support depends on downstream framework versions; older transformers versions may not support direct SafeTensors loading","No pre-built binaries for specialized runtimes (TensorRT, CoreML, NCNN); ONNX conversion is intermediate step requiring additional tooling","Model quantization (int8, fp16) not pre-provided; requires post-export quantization tooling for mobile/edge deployment"],"requires":["transformers 4.5.0+ for PyTorch format loading","ONNX Runtime 1.10+ for ONNX inference","safetensors 0.3.0+ for SafeTensors format support","PyTorch 1.9+ for native PyTorch inference","Optional: ONNX opset 14+ for full operator compatibility"],"input_types":["model weights in PyTorch (.pt, .pth)","model weights in ONNX (.onnx)","model weights in SafeTensors (.safetensors)"],"output_types":["loaded model objects compatible with transformers pipeline API","ONNX inference sessions ready for onnxruntime.InferenceSession","SafeTensors-backed model state dicts for memory-efficient loading"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-tahadouaji--detr-doc-table-detection__cap_2","uri":"capability://tool.use.integration.huggingface.hub.integrated.model.discovery.and.versioning","name":"huggingface hub-integrated model discovery and versioning","description":"Integrates with HuggingFace Model Hub infrastructure, providing automatic model versioning, revision tracking, and one-line loading via transformers library without manual weight downloads or path management. The model is registered with Hub endpoints compatibility, enabling direct inference via HuggingFace Inference API and automatic caching of model weights locally, with built-in support for model cards, dataset attribution (ICDAR2019), and Apache 2.0 license metadata for compliance tracking.","intents":["I want to load a pre-trained table detection model with a single line of code without managing file paths or downloads","I need to track which version of the model I'm using in production and easily roll back to previous versions if needed","I want to use this model via HuggingFace Inference API without running inference infrastructure myself","I need to verify the model's training dataset, license, and attribution before using it in my application"],"best_for":["rapid prototyping teams wanting instant model access without infrastructure setup","developers building applications with strict compliance requirements (license tracking, dataset attribution)","teams using HuggingFace ecosystem tools (Transformers, Datasets, Accelerate) as their primary ML stack","organizations preferring managed inference via HuggingFace Inference API over self-hosted deployment"],"limitations":["Requires internet connectivity for initial model download and Hub metadata fetching; offline inference requires pre-cached weights","HuggingFace Inference API has rate limits and latency SLAs that may not meet real-time requirements for high-throughput document processing","Model versioning is tied to HuggingFace Hub; if the repository is deleted or made private, dependent applications break","Automatic weight caching uses local disk space (~350MB for this model); no built-in cache eviction or size limits"],"requires":["transformers 4.5.0+","huggingface-hub 0.4.0+ for model downloading and caching","Internet connection for initial model download","Python 3.7+","Optional: HuggingFace API token for private model access or higher Inference API quotas"],"input_types":["model identifier string (e.g., 'TahaDouaji/detr-doc-table-detection')","revision/branch specification (e.g., 'main', 'v1.0', specific commit hash)"],"output_types":["transformers.AutoModelForObjectDetection instance","model metadata (training dataset, license, model card HTML)","model revision history and version information"],"categories":["tool-use-integration","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-tahadouaji--detr-doc-table-detection__cap_3","uri":"capability://image.visual.resnet.50.backbone.feature.extraction.with.transformer.refinement","name":"resnet-50 backbone feature extraction with transformer refinement","description":"Extracts visual features from document images using a pre-trained ResNet-50 CNN backbone (trained on ImageNet), which captures low-level document structure (edges, text regions, table grids) through hierarchical convolutional layers. These features are then refined through DETR's transformer encoder-decoder stack, which applies multi-head self-attention to reason about spatial relationships between document elements and predict table locations, enabling both local feature precision and global document layout understanding.","intents":["I need robust feature extraction that captures both fine-grained document details (table borders, cell boundaries) and high-level layout structure","I want to leverage pre-trained ImageNet knowledge to improve detection accuracy on document images without retraining from scratch","I need to understand which document regions the model attends to when making table detection decisions for interpretability","I'm building a system that must handle diverse document types (scanned, digital-born, mixed quality) with consistent feature quality"],"best_for":["document processing teams handling diverse document sources requiring robust feature extraction","developers building interpretable document AI systems needing attention visualization","teams with limited labeled data who can benefit from ImageNet pre-training transfer learning","organizations requiring feature extraction quality validation before downstream processing"],"limitations":["ResNet-50 backbone adds ~200MB model size; lighter backbones (ResNet-18, MobileNet) not provided in this artifact","Transformer refinement adds computational overhead (~2-3x slower than ResNet-50 alone); not suitable for real-time mobile inference without quantization","Feature extraction is optimized for 800x1066 resolution; significant upsampling/downsampling of input images may degrade feature quality","Attention visualization requires additional post-processing; raw model outputs don't directly provide interpretable attention maps"],"requires":["PyTorch 1.9+ with torchvision for ResNet-50 backbone loading","transformers 4.5.0+ for DETR architecture","Input images must be normalized to ImageNet statistics (mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])","GPU recommended for transformer encoder-decoder inference; CPU inference adds 500ms+ latency"],"input_types":["image/jpeg, image/png, image/tiff (document images)","numpy arrays (H×W×3, uint8 or float32)","PIL Image objects"],"output_types":["feature maps from ResNet-50 backbone (C×H'×W' tensors)","transformer encoder output (sequence of refined feature vectors)","attention weights from transformer multi-head attention (for visualization)","final detection predictions (bounding boxes + class logits)"],"categories":["image-visual","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-tahadouaji--detr-doc-table-detection__cap_4","uri":"capability://image.visual.icdar2019.dataset.specialized.table.detection.with.domain.adaptation","name":"icdar2019 dataset-specialized table detection with domain adaptation","description":"Fine-tuned specifically on the ICDAR2019 document analysis competition dataset, which contains diverse document layouts, table styles, and quality variations representative of real-world document processing scenarios. The model has learned document-specific patterns (table borders, cell structures, header rows, multi-column layouts) that generic object detectors lack, enabling higher precision on document tables while potentially requiring domain adaptation for out-of-distribution document types not represented in ICDAR2019.","intents":["I need a table detector optimized for document images similar to ICDAR2019 competition datasets (invoices, research papers, forms)","I want to understand how well this model will perform on my specific document type before investing in custom training","I need to detect tables in documents with diverse layouts, quality levels, and table styles without building a custom dataset","I'm building a document processing system and want to leverage domain-specialized models rather than generic object detectors"],"best_for":["teams processing document types similar to ICDAR2019 (invoices, research papers, technical documents, forms)","organizations with limited labeled data who can use this pre-trained model as a starting point for fine-tuning","developers building document intelligence systems requiring high table detection precision on standard document layouts","teams evaluating table detection approaches before investing in custom model training"],"limitations":["Accuracy may degrade significantly on document types not represented in ICDAR2019 (e.g., handwritten documents, non-English layouts, specialized medical/legal formats)","No information provided on ICDAR2019 train/val/test split; unclear if model was evaluated on held-out test set or if performance metrics are available","Domain shift from ICDAR2019 to production documents (different scanners, quality, table styles) requires validation and potentially fine-tuning","No multi-language support information; ICDAR2019 is primarily English-focused, limiting applicability to multilingual documents"],"requires":["Document images similar in style/quality to ICDAR2019 dataset for optimal performance","Validation dataset from target domain to measure performance degradation and determine if fine-tuning is needed","Optional: labeled data from target domain if domain adaptation fine-tuning is required"],"input_types":["document images (scanned PDFs, digital documents) similar to ICDAR2019 dataset","images with table-containing documents in English or similar languages"],"output_types":["table bounding box predictions optimized for ICDAR2019-style document layouts","confidence scores reflecting model's certainty on ICDAR2019-trained patterns"],"categories":["image-visual","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":44,"verified":false,"data_access_risk":"high","permissions":["PyTorch 1.9+ or ONNX Runtime 1.10+ for model inference","transformers library 4.5.0+ for model loading and preprocessing","Python 3.7+","Pillow or OpenCV for image loading and preprocessing","GPU with CUDA 11.0+ recommended for production inference; CPU inference supported but slow","transformers 4.5.0+ for PyTorch format loading","ONNX Runtime 1.10+ for ONNX inference","safetensors 0.3.0+ for SafeTensors format support","PyTorch 1.9+ for native PyTorch inference","Optional: ONNX opset 14+ for full operator compatibility"],"failure_modes":["Trained exclusively on ICDAR2019 dataset — may have reduced accuracy on document types, table styles, or layouts not represented in training data","Requires GPU or significant CPU resources for real-time inference on high-resolution document images; CPU inference adds 500ms+ latency per image","No built-in handling for rotated, skewed, or severely degraded document images — preprocessing normalization required","Outputs only bounding box coordinates and class labels; does not perform table structure parsing, cell extraction, or content OCR","Fixed input resolution (typically 800x1066 for DETR) may require image resizing, potentially losing fine-grained table details in high-resolution documents","ONNX export may have minor numerical precision differences from PyTorch due to operator implementation variations; requires validation on target hardware","SafeTensors format support depends on downstream framework versions; older transformers versions may not support direct SafeTensors loading","No pre-built binaries for specialized runtimes (TensorRT, CoreML, NCNN); ONNX conversion is intermediate step requiring additional tooling","Model quantization (int8, fp16) not pre-provided; requires post-export quantization tooling for mobile/edge deployment","Requires internet connectivity for initial model download and Hub metadata fetching; offline inference requires pre-cached weights","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.5848523525799134,"quality":0.35,"ecosystem":0.5000000000000001,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.35,"quality":0.2,"ecosystem":0.1,"match_graph":0.3,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:22.766Z","last_scraped_at":"2026-05-03T14:22:58.551Z","last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":204862,"model_likes":63}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=tahadouaji--detr-doc-table-detection","compare_url":"https://unfragile.ai/compare?artifact=tahadouaji--detr-doc-table-detection"}},"signature":"s9aYRb6OjF6cn+Tznz1PdL33yYQK414fQQ2/sMBDvS1cmdIQ4j+607n5A+5oGbFRuS7KWPnGJVOQxa5CYVtdBQ==","signedAt":"2026-06-21T04:33:20.111Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/tahadouaji--detr-doc-table-detection","artifact":"https://unfragile.ai/tahadouaji--detr-doc-table-detection","verify":"https://unfragile.ai/api/v1/verify?slug=tahadouaji--detr-doc-table-detection","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}