{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"papers-with-code-arc","slug":"arc","name":"ARC","type":"benchmark","url":"https://paperswithcode.com/dataset/arc","page_url":"https://unfragile.ai/arc","categories":["testing-quality"],"tags":["benchmark","evaluation","visual-reasoning","abstraction"],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"papers-with-code-arc__cap_0","uri":"capability://planning.reasoning.abstract.reasoning.problem.generation","name":"abstract reasoning problem generation","description":"ARC generates visual reasoning problems that require abstract thinking and rule inference. It employs a grid-pattern puzzle design, ensuring that each problem is solvable by humans but challenging for AI systems. This unique structure tests the ability to deduce underlying rules from visual examples, making it distinct from traditional benchmarks that rely on memorization or straightforward logic.","intents":["How can I create a benchmark to test AI's reasoning capabilities?","What kind of visual puzzles can I use to evaluate abstract reasoning in AI?","I need a dataset that challenges AI systems beyond simple pattern recognition."],"best_for":["researchers developing AI models for reasoning tasks","developers creating AI systems that require advanced reasoning capabilities"],"limitations":["Limited to 800 total problems, which may not cover all reasoning scenarios","Problems are specifically designed for visual reasoning, not applicable to other reasoning types"],"requires":["No specific prerequisites, but familiarity with AI evaluation methods is beneficial"],"input_types":["visual patterns","grid-based puzzles"],"output_types":["problem sets","evaluation metrics"],"categories":["planning-reasoning","benchmarking"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"papers-with-code-arc__cap_1","uri":"capability://data.processing.analysis.evaluation.metric.formulation","name":"evaluation metric formulation","description":"ARC provides a framework for evaluating the performance of AI systems on its visual reasoning problems. It uses a set of criteria based on human performance to assess how well AI models can infer rules from the provided examples. This systematic approach to evaluation ensures that results are comparable across different AI systems and methodologies.","intents":["How can I measure the reasoning capabilities of my AI model?","What metrics should I use to evaluate performance on visual reasoning tasks?","I need a standardized way to compare different AI systems on reasoning benchmarks."],"best_for":["AI researchers looking to benchmark their models","developers needing a standardized evaluation method for reasoning tasks"],"limitations":["Evaluation metrics may not capture all nuances of reasoning","Dependent on the quality and diversity of the problem set"],"requires":["Basic understanding of AI evaluation metrics and benchmarking"],"input_types":["AI model outputs","problem sets"],"output_types":["evaluation scores","comparative analysis reports"],"categories":["data-processing-analysis","evaluation"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":49,"verified":false,"data_access_risk":"low","permissions":["No specific prerequisites, but familiarity with AI evaluation methods is beneficial","Basic understanding of AI evaluation metrics and benchmarking"],"failure_modes":["Limited to 800 total problems, which may not cover all reasoning scenarios","Problems are specifically designed for visual reasoning, not applicable to other reasoning types","Evaluation metrics may not capture all nuances of reasoning","Dependent on the quality and diversity of the problem set","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.8,"quality":0.39,"ecosystem":0.42,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.25,"quality":0.35,"ecosystem":0.15,"match_graph":0.2,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:25.060Z","last_scraped_at":"2026-05-03T15:20:49.428Z","last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=arc","compare_url":"https://unfragile.ai/compare?artifact=arc"}},"signature":"Lg5iaBhmVURrPgqqEc5Cm6jEruIECf7dbdRpsZWKRY/nmFMaJp01iqwKLXda4uUag6GLaGch1cD8OV7J3vH3DA==","signedAt":"2026-06-22T11:47:25.577Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/arc","artifact":"https://unfragile.ai/arc","verify":"https://unfragile.ai/api/v1/verify?slug=arc","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}