{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"hf-dataset-ntu-nlp-sg--xcodeeval","slug":"ntu-nlp-sg--xcodeeval","name":"xCodeEval","type":"dataset","url":"https://huggingface.co/datasets/NTU-NLP-sg/xCodeEval","page_url":"https://unfragile.ai/ntu-nlp-sg--xcodeeval","categories":["model-training"],"tags":["task_categories:translation","task_categories:token-classification","task_categories:text-retrieval","task_categories:text-generation","task_categories:text-classification","task_categories:feature-extraction","task_categories:question-answering","annotations_creators:expert-generated","language_creators:found","language_creators:expert-generated","multilinguality:multilingual","source_datasets:original","language:code","language:en","license:cc-by-4.0","size_categories:1M<n<10M","arxiv:2303.03004","region:us","programming-language","code"],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"hf-dataset-ntu-nlp-sg--xcodeeval__cap_0","uri":"capability://data.processing.analysis.multilingual.code.to.code.translation.dataset.construction","name":"multilingual code-to-code translation dataset construction","description":"Provides 696,087 expert-annotated code translation pairs across multiple programming languages, enabling training of models to translate code semantically between languages while preserving functionality. The dataset uses expert-generated annotations to ensure translation quality and includes both source code and target translations with language-pair coverage, allowing models to learn cross-language code semantics through supervised learning on diverse programming paradigms.","intents":["Train code translation models that convert legacy code from one language to another while maintaining functionality","Build multilingual code understanding systems that work across Java, Python, C++, JavaScript, and other languages","Evaluate code-to-code translation quality and semantic preservation across language boundaries","Create benchmarks for assessing how well LLMs understand code equivalence across programming languages"],"best_for":["ML researchers training code translation models","Teams building cross-language code migration tools","Developers evaluating multilingual code LLM performance","Organizations standardizing legacy codebases across multiple languages"],"limitations":["Expert annotations may reflect specific translation preferences and idioms, not all valid translations","Dataset size (696K examples) may be insufficient for training very large models on all language pairs equally","Language pair coverage is uneven — some language combinations may have significantly fewer examples than others","Annotations are static snapshots and don't capture evolving language features or modern idioms introduced after dataset creation"],"requires":["HuggingFace Datasets library (transformers>=4.0)","Python 3.7+","Sufficient disk space for 1M-10M size category dataset (~2-5GB estimated)","Understanding of target programming languages for meaningful evaluation"],"input_types":["source code (Java, Python, C++, JavaScript, Go, Rust, etc.)","code snippets with function/method scope","code with inline comments and documentation"],"output_types":["translated code in target language","structured metadata (language pair, translation quality scores)","token-level alignment annotations for fine-grained analysis"],"categories":["data-processing-analysis","code-generation-editing"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-dataset-ntu-nlp-sg--xcodeeval__cap_1","uri":"capability://data.processing.analysis.code.clone.detection.dataset.with.multilingual.support","name":"code clone detection dataset with multilingual support","description":"Provides annotated pairs of semantically equivalent code snippets across multiple programming languages, enabling training of models to detect code clones and semantic similarity. The dataset uses expert classification to identify true semantic equivalence versus syntactic similarity, allowing models to learn language-agnostic code representations through contrastive or classification-based approaches on code pairs with varying levels of structural and semantic overlap.","intents":["Train code clone detection models that identify semantically equivalent code across language boundaries","Build code similarity search systems that find functionally equivalent implementations regardless of language","Evaluate how well code embeddings capture semantic equivalence across programming paradigms","Create benchmarks for assessing code deduplication and plagiarism detection systems"],"best_for":["Security researchers building code plagiarism detection systems","ML engineers training code embedding models","Teams managing large polyglot codebases needing deduplication","Researchers studying code semantics and language-agnostic representations"],"limitations":["Expert annotations reflect human judgment of equivalence, which may not align with all valid interpretations of 'semantic equivalence'","Clone detection focuses on function/method-level granularity; may not capture equivalence at statement or expression level","Dataset may have annotation bias toward certain programming styles or idioms common in the expert annotators' experience","No temporal dimension — doesn't capture how code equivalence evolves as languages and libraries change"],"requires":["HuggingFace Datasets library","Python 3.7+","Code parsing/tokenization tools for target languages","Familiarity with code similarity metrics and embedding approaches"],"input_types":["code snippet pairs (same or different languages)","function/method implementations","code with variable names and comments"],"output_types":["binary classification labels (equivalent/not equivalent)","similarity scores or confidence levels","token-level alignment for fine-grained analysis"],"categories":["data-processing-analysis","text-classification"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-dataset-ntu-nlp-sg--xcodeeval__cap_2","uri":"capability://search.retrieval.code.search.and.retrieval.dataset.with.natural.language.queries","name":"code search and retrieval dataset with natural language queries","description":"Provides paired code snippets and natural language descriptions/queries, enabling training of code search models that retrieve relevant code given natural language intent. The dataset uses expert-generated descriptions and found code to create query-code pairs, allowing models to learn the mapping between natural language semantics and code implementation through text-retrieval and feature-extraction tasks on multilingual code.","intents":["Train code search engines that find relevant implementations given natural language problem descriptions","Build semantic code search systems that understand intent beyond keyword matching","Evaluate how well code-language models understand the relationship between documentation and implementation","Create benchmarks for assessing code retrieval quality in IDE plugins and code recommendation systems"],"best_for":["Teams building code search and recommendation features in IDEs","ML researchers training code-language alignment models","Organizations implementing internal code discovery systems","Developers evaluating code retrieval models for documentation generation"],"limitations":["Natural language descriptions may be incomplete or ambiguous, not capturing all nuances of code behavior","Query-code pairing may reflect specific documentation styles and may not generalize to all code description patterns","Dataset size may be insufficient for training very large retrieval models with many language pairs","Descriptions are static and may become outdated as code evolves or APIs change"],"requires":["HuggingFace Datasets library","Python 3.7+","Text embedding and code embedding models (e.g., CodeBERT, GraphCodeBERT)","Retrieval evaluation metrics (MRR, NDCG, recall@k)"],"input_types":["natural language queries (English and potentially other languages)","code snippets in multiple programming languages","function signatures and documentation strings"],"output_types":["ranked lists of relevant code snippets","relevance scores or similarity metrics","embedding vectors for code and queries"],"categories":["search-retrieval","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-dataset-ntu-nlp-sg--xcodeeval__cap_3","uri":"capability://text.generation.language.code.question.answering.dataset.with.multilingual.code.context","name":"code question-answering dataset with multilingual code context","description":"Provides code snippets paired with natural language questions and expert-generated answers about code behavior, enabling training of models to answer questions about code functionality and semantics. The dataset uses question-answering and text-generation task formulations to train models to understand code and generate natural language explanations, supporting both extractive and abstractive answer generation across multiple programming languages.","intents":["Train code understanding models that answer questions about what code does and why","Build AI assistants that explain code behavior and help developers understand unfamiliar implementations","Evaluate how well code LLMs understand code semantics and can articulate behavior in natural language","Create benchmarks for code explanation and documentation generation systems"],"best_for":["Teams building code explanation features in IDEs and documentation tools","ML researchers training code understanding and generation models","Organizations creating AI-powered code review and documentation systems","Developers evaluating code LLM comprehension on multilingual codebases"],"limitations":["Expert answers may reflect specific interpretation of code behavior and may not capture all valid explanations","Questions may be biased toward certain types of code understanding (e.g., control flow vs. data flow)","Answer quality depends on expert annotator expertise in each programming language","Dataset may not cover edge cases, error handling, or non-obvious code behavior equally across languages"],"requires":["HuggingFace Datasets library","Python 3.7+","Code parsing and AST analysis tools for target languages","Text generation evaluation metrics (BLEU, ROUGE, BERTScore)"],"input_types":["code snippets in multiple programming languages","natural language questions about code behavior","function signatures and surrounding context"],"output_types":["natural language answers explaining code behavior","extractive spans from code or documentation","structured explanations of code semantics"],"categories":["text-generation-language","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-dataset-ntu-nlp-sg--xcodeeval__cap_4","uri":"capability://data.processing.analysis.code.feature.extraction.and.token.classification.dataset","name":"code feature extraction and token classification dataset","description":"Provides code snippets with expert-generated token-level annotations for semantic features (e.g., variable scope, function calls, data flow), enabling training of models to identify and classify code elements. The dataset uses token-classification and feature-extraction task formulations to train models to understand fine-grained code structure and semantics, supporting both sequence labeling and structured prediction approaches on multilingual code.","intents":["Train code understanding models that identify semantic elements like variable definitions, function calls, and data dependencies","Build code analysis tools that extract structured information from code for refactoring and optimization","Evaluate how well code models understand code structure and semantics at the token level","Create benchmarks for code parsing and semantic analysis across programming languages"],"best_for":["Teams building code analysis and refactoring tools","ML researchers training code understanding models with fine-grained annotations","Organizations implementing code quality and security analysis systems","Developers evaluating code models on structured prediction tasks"],"limitations":["Token-level annotations require consistent annotation guidelines across languages, which may introduce bias","Annotation granularity may not capture all relevant semantic features or may over-segment code","Different programming languages have different token structures, making cross-language learning challenging","Annotations are static and may not reflect dynamic behavior or runtime semantics"],"requires":["HuggingFace Datasets library","Python 3.7+","Tokenization tools for target programming languages","Sequence labeling models (e.g., BiLSTM-CRF, Transformer-based taggers)"],"input_types":["code snippets with token-level annotations","code in multiple programming languages","function/method implementations with context"],"output_types":["token-level semantic labels (variable, function, keyword, etc.)","structured code features (scope, type, data flow)","BIO/BIOES tagged sequences for sequence labeling"],"categories":["data-processing-analysis","text-classification"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-dataset-ntu-nlp-sg--xcodeeval__cap_5","uri":"capability://data.processing.analysis.multilingual.code.representation.learning.through.contrastive.pairs","name":"multilingual code representation learning through contrastive pairs","description":"Provides code pairs with varying degrees of semantic and syntactic similarity across multiple programming languages, enabling training of code embedding models through contrastive learning approaches. The dataset uses both positive pairs (semantically equivalent code) and negative pairs (dissimilar code) to train models to learn language-agnostic code representations that capture semantic similarity while being invariant to syntactic variation and language choice.","intents":["Train code embedding models that produce similar representations for semantically equivalent code across languages","Build code similarity and clustering systems that work across programming language boundaries","Evaluate how well code embeddings capture semantic equivalence independent of language and syntax","Create benchmarks for assessing code representation quality on multilingual code understanding tasks"],"best_for":["ML researchers training code embedding and representation learning models","Teams building code clustering and deduplication systems","Organizations implementing semantic code search across polyglot codebases","Developers evaluating code model representations on downstream tasks"],"limitations":["Contrastive pair selection may introduce bias toward certain types of equivalence and miss valid alternatives","Negative pair selection strategy significantly impacts learning quality but may not be optimal for all downstream tasks","Embedding quality depends on model architecture and training procedure, not just dataset quality","Dataset may not provide sufficient hard negatives to train robust embeddings for challenging similarity judgments"],"requires":["HuggingFace Datasets library","Python 3.7+","Contrastive learning frameworks (e.g., SimCLR, Triplet Loss implementations)","Code embedding models (e.g., CodeBERT, GraphCodeBERT, UniXcoder)"],"input_types":["code snippet pairs (positive and negative examples)","code in multiple programming languages","function/method implementations with varying complexity"],"output_types":["embedding vectors for code snippets","similarity scores between code pairs","clustering assignments based on semantic equivalence"],"categories":["data-processing-analysis","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"hf-dataset-ntu-nlp-sg--xcodeeval__cap_6","uri":"capability://text.generation.language.code.to.text.generation.dataset.for.documentation.and.explanation","name":"code-to-text generation dataset for documentation and explanation","description":"Provides code snippets paired with expert-generated natural language descriptions and documentation, enabling training of models to generate documentation and explanations from code. The dataset uses text-generation task formulations to train models to understand code semantics and produce coherent, accurate natural language descriptions, supporting both abstractive summarization and detailed explanation generation across multiple programming languages.","intents":["Train code-to-documentation models that automatically generate docstrings and API documentation from code","Build code explanation systems that produce human-readable descriptions of code functionality","Evaluate how well code-language models understand code and can articulate behavior in natural language","Create benchmarks for automatic documentation generation and code summarization systems"],"best_for":["Teams building automatic documentation generation tools","ML researchers training code-to-text generation models","Organizations implementing code quality and documentation standards","Developers evaluating code LLM generation quality on documentation tasks"],"limitations":["Expert-generated descriptions may reflect specific documentation styles and may not generalize to all code description patterns","Text generation quality depends heavily on model architecture and training procedure, not just dataset quality","Descriptions may be incomplete or may not capture all nuances of code behavior","Dataset may have bias toward certain types of code (e.g., well-structured functions vs. complex algorithms)"],"requires":["HuggingFace Datasets library","Python 3.7+","Text generation models (e.g., CodeT5, BART, T5)","Text generation evaluation metrics (BLEU, ROUGE, BERTScore, METEOR)"],"input_types":["code snippets in multiple programming languages","function/method implementations with signatures","code with variable names and inline comments"],"output_types":["natural language descriptions and docstrings","API documentation and usage examples","code summaries and explanations"],"categories":["text-generation-language","code-generation-editing"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":24,"verified":false,"data_access_risk":"low","permissions":["HuggingFace Datasets library (transformers>=4.0)","Python 3.7+","Sufficient disk space for 1M-10M size category dataset (~2-5GB estimated)","Understanding of target programming languages for meaningful evaluation","HuggingFace Datasets library","Code parsing/tokenization tools for target languages","Familiarity with code similarity metrics and embedding approaches","Text embedding and code embedding models (e.g., CodeBERT, GraphCodeBERT)","Retrieval evaluation metrics (MRR, NDCG, recall@k)","Code parsing and AST analysis tools for target languages"],"failure_modes":["Expert annotations may reflect specific translation preferences and idioms, not all valid translations","Dataset size (696K examples) may be insufficient for training very large models on all language pairs equally","Language pair coverage is uneven — some language combinations may have significantly fewer examples than others","Annotations are static snapshots and don't capture evolving language features or modern idioms introduced after dataset creation","Expert annotations reflect human judgment of equivalence, which may not align with all valid interpretations of 'semantic equivalence'","Clone detection focuses on function/method-level granularity; may not capture equivalence at statement or expression level","Dataset may have annotation bias toward certain programming styles or idioms common in the expert annotators' experience","No temporal dimension — doesn't capture how code equivalence evolves as languages and libraries change","Natural language descriptions may be incomplete or ambiguous, not capturing all nuances of code behavior","Query-code pairing may reflect specific documentation styles and may not generalize to all code description patterns","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.05,"quality":0.24,"ecosystem":0.5000000000000001,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.3,"quality":0.25,"ecosystem":0.1,"match_graph":0.3,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:22.764Z","last_scraped_at":"2026-05-03T14:22:48.064Z","last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=ntu-nlp-sg--xcodeeval","compare_url":"https://unfragile.ai/compare?artifact=ntu-nlp-sg--xcodeeval"}},"signature":"YPNr+FrCwkJmCJ3PTclWpH95AgCU8IptwsoyBSiCT8EG3FYMz2k+VVaPfVgLyOzUYw8/BIP4f4gG3FQUFViRCA==","signedAt":"2026-06-19T19:09:03.533Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/ntu-nlp-sg--xcodeeval","artifact":"https://unfragile.ai/ntu-nlp-sg--xcodeeval","verify":"https://unfragile.ai/api/v1/verify?slug=ntu-nlp-sg--xcodeeval","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}