{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"hf-model-paddlepaddle--uvdoc","slug":"paddlepaddle--uvdoc","name":"UVDoc","type":"model","url":"https://huggingface.co/PaddlePaddle/UVDoc","page_url":"https://unfragile.ai/paddlepaddle--uvdoc","categories":["image-generation"],"tags":["PaddleOCR","OCR","PaddlePaddle","doc_img_unwarping","image-to-text","en","zh","license:apache-2.0","region:us"],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"hf-model-paddlepaddle--uvdoc__cap_0","uri":"capability://image.visual.document.image.unwarping.with.perspective.correction","name":"document image unwarping with perspective correction","description":"Detects and corrects perspective distortion in document photographs using deep learning-based geometric transformation. The model analyzes document boundaries and applies learned deformation mappings to normalize skewed, curved, or angled document images into frontal-facing rectangular layouts suitable for OCR. Works by predicting control point offsets or dense pixel displacement fields that unwarp the document surface.","intents":["I need to preprocess mobile camera photos of documents before running OCR to improve text recognition accuracy","I want to automatically correct perspective distortion in bulk document scanning workflows","I need to normalize document images with curved pages or extreme viewing angles for downstream processing"],"best_for":["document digitization pipelines requiring high OCR accuracy","mobile document scanning applications","teams building document processing workflows with PaddleOCR"],"limitations":["Optimized for document-like objects; performance degrades on non-planar or heavily occluded documents","Requires reasonably clear document boundaries; fails on heavily shadowed or low-contrast images","Output quality depends on input image resolution; very low-res inputs (<300px width) may produce artifacts","No built-in handling for multi-page document stacks or overlapping pages"],"requires":["PaddlePaddle inference framework (Python 3.6+)","Input image in common formats (JPEG, PNG, BMP)","Sufficient GPU memory for batch processing (2GB+ recommended for batch_size>4)"],"input_types":["image (JPEG, PNG, BMP, TIFF)","image batches (multiple documents)"],"output_types":["image (unwarped document image, same format as input)","geometric transformation metadata (optional)"],"categories":["image-visual","document-processing"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-paddlepaddle--uvdoc__cap_1","uri":"capability://image.visual.multi.language.document.image.to.text.extraction","name":"multi-language document image-to-text extraction","description":"Performs end-to-end optical character recognition on document images with support for English and Chinese text recognition. The model combines document unwarping with character-level text detection and recognition, using PaddleOCR's architecture to identify text regions and decode characters. Outputs structured text with bounding box coordinates and confidence scores for each recognized character or word.","intents":["I need to extract text from document images in English or Chinese with positional information","I want to build a document digitization system that handles both Latin and CJK character sets","I need to process mixed-language documents (English + Chinese) in a single inference pass"],"best_for":["document digitization services supporting English and Chinese markets","enterprise document management systems requiring multilingual OCR","developers building PaddleOCR-based applications in Asia-Pacific regions"],"limitations":["Limited to English and Chinese; no support for other languages or scripts (Arabic, Devanagari, etc.)","Character-level recognition may struggle with handwritten or stylized fonts","Accuracy degrades significantly on low-resolution images (<150 DPI) or heavily degraded documents","No built-in table structure recognition; treats tabular data as linear text sequences"],"requires":["PaddlePaddle 2.0+","Python 3.6+","Input image resolution minimum 100x100 pixels, recommended 300+ DPI for optimal accuracy"],"input_types":["image (JPEG, PNG, BMP, TIFF)","image batches"],"output_types":["structured text with bounding boxes (JSON or custom format)","confidence scores per character/word","text-only extraction (optional)"],"categories":["image-visual","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-paddlepaddle--uvdoc__cap_2","uri":"capability://automation.workflow.batch.document.processing.with.gpu.acceleration","name":"batch document processing with gpu acceleration","description":"Enables efficient processing of multiple document images in parallel using PaddlePaddle's batching infrastructure and GPU acceleration. The model accepts image batches and processes them through the unwarping and OCR pipeline simultaneously, with automatic batch size optimization based on available GPU memory. Implements asynchronous processing patterns for high-throughput document digitization workflows.","intents":["I need to process thousands of document images efficiently without writing custom batching logic","I want to maximize GPU utilization when processing large document archives","I need to build a scalable document processing service that handles variable input volumes"],"best_for":["high-volume document digitization services","batch processing pipelines for enterprise document archives","teams deploying UVDoc on GPU-equipped servers or cloud instances"],"limitations":["Batch processing requires homogeneous image dimensions; heterogeneous sizes require padding/resizing overhead","GPU memory constraints limit batch size; typical batch_size=4-16 on consumer GPUs (8GB VRAM)","No built-in distributed processing; single-GPU inference only (no multi-GPU data parallelism)","Batching adds ~50-100ms latency per batch due to synchronization overhead"],"requires":["NVIDIA GPU with CUDA 10.2+ (or compatible PaddlePaddle backend)","PaddlePaddle with GPU support compiled","Sufficient GPU memory (minimum 2GB, 8GB+ recommended for production)"],"input_types":["image batch (list of JPEG/PNG/BMP images)","image directory (automatic batch loading)"],"output_types":["batch results (list of OCR outputs with bounding boxes)","processing metrics (throughput, latency per image)"],"categories":["automation-workflow","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-paddlepaddle--uvdoc__cap_3","uri":"capability://image.visual.document.image.quality.assessment.and.filtering","name":"document image quality assessment and filtering","description":"Evaluates document image quality metrics (blur, contrast, brightness, skew angle) to identify images unsuitable for OCR processing. The model analyzes image statistics and learned quality features to assign quality scores and flag problematic images before expensive OCR inference. Enables filtering of low-quality inputs to improve overall pipeline accuracy and reduce processing of unusable documents.","intents":["I want to automatically reject blurry or low-contrast document scans before OCR to avoid garbage output","I need to identify which documents in a batch require re-scanning or manual intervention","I want to implement quality gates in my document processing pipeline to maintain accuracy thresholds"],"best_for":["document scanning applications with user feedback loops","quality assurance pipelines for document digitization","mobile document capture apps requiring real-time quality feedback"],"limitations":["Quality assessment is heuristic-based; may flag valid documents with unusual lighting as low-quality","No semantic quality assessment (e.g., cannot detect if document content is relevant/complete)","Threshold tuning required per use case; default thresholds may not suit all document types","Cannot assess OCR-specific quality (e.g., font size, character spacing) without running full OCR"],"requires":["PaddlePaddle inference framework","Input image in standard formats (JPEG, PNG, BMP)"],"input_types":["image (document photograph)","image batch"],"output_types":["quality score (0-100 or 0-1 range)","quality metrics breakdown (blur, contrast, brightness, skew)","pass/fail classification based on configurable thresholds"],"categories":["image-visual","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-model-paddlepaddle--uvdoc__cap_4","uri":"capability://data.processing.analysis.bounding.box.aware.text.extraction.with.spatial.layout.preservation","name":"bounding box-aware text extraction with spatial layout preservation","description":"Extracts recognized text while preserving spatial layout information through character-level and word-level bounding boxes. The model outputs structured data mapping each recognized character or word to its pixel coordinates, enabling reconstruction of document layout, detection of text regions, and integration with downstream layout analysis. Supports both dense character-level boxes and word-level aggregated boxes.","intents":["I need to extract text with precise location information to reconstruct document layout","I want to identify specific text regions in a document image for selective processing or highlighting","I need to build a document search system that can highlight matching text in the original image"],"best_for":["document layout analysis and reconstruction systems","document search and retrieval applications with visual highlighting","form processing pipelines requiring field-level text extraction"],"limitations":["Bounding box accuracy depends on character segmentation quality; may be imprecise for touching characters or ligatures","Character-level boxes add significant output size (10-100x larger than text-only output)","No semantic layout understanding (cannot distinguish headers from body text or identify table structure)","Boxes are axis-aligned rectangles; cannot represent rotated or curved text accurately"],"requires":["PaddlePaddle inference framework","Output parsing logic to consume bounding box coordinates"],"input_types":["image (document photograph)"],"output_types":["structured data (JSON/dict with text + bounding boxes)","bounding box format: [x_min, y_min, x_max, y_max] in pixel coordinates","confidence scores per character/word"],"categories":["data-processing-analysis","image-visual"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":41,"verified":false,"data_access_risk":"high","permissions":["PaddlePaddle inference framework (Python 3.6+)","Input image in common formats (JPEG, PNG, BMP)","Sufficient GPU memory for batch processing (2GB+ recommended for batch_size>4)","PaddlePaddle 2.0+","Python 3.6+","Input image resolution minimum 100x100 pixels, recommended 300+ DPI for optimal accuracy","NVIDIA GPU with CUDA 10.2+ (or compatible PaddlePaddle backend)","PaddlePaddle with GPU support compiled","Sufficient GPU memory (minimum 2GB, 8GB+ recommended for production)","PaddlePaddle inference framework"],"failure_modes":["Optimized for document-like objects; performance degrades on non-planar or heavily occluded documents","Requires reasonably clear document boundaries; fails on heavily shadowed or low-contrast images","Output quality depends on input image resolution; very low-res inputs (<300px width) may produce artifacts","No built-in handling for multi-page document stacks or overlapping pages","Limited to English and Chinese; no support for other languages or scripts (Arabic, Devanagari, etc.)","Character-level recognition may struggle with handwritten or stylized fonts","Accuracy degrades significantly on low-resolution images (<150 DPI) or heavily degraded documents","No built-in table structure recognition; treats tabular data as linear text sequences","Batch processing requires homogeneous image dimensions; heterogeneous sizes require padding/resizing overhead","GPU memory constraints limit batch size; typical batch_size=4-16 on consumer GPUs (8GB VRAM)","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.5882539583730524,"quality":0.2,"ecosystem":0.5000000000000001,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.35,"quality":0.2,"ecosystem":0.1,"match_graph":0.3,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:22.765Z","last_scraped_at":"2026-05-03T14:22:50.443Z","last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":410015,"model_likes":8}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=paddlepaddle--uvdoc","compare_url":"https://unfragile.ai/compare?artifact=paddlepaddle--uvdoc"}},"signature":"SrCK+O2S3QIak27M9gyeO+u7M9vxGq4PeSrTSM+yDtEjAyBr/k9jxw8AdaLmmzDw9LgxExOdnSFXswvo+NHJAQ==","signedAt":"2026-06-22T00:11:27.809Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/paddlepaddle--uvdoc","artifact":"https://unfragile.ai/paddlepaddle--uvdoc","verify":"https://unfragile.ai/api/v1/verify?slug=paddlepaddle--uvdoc","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}