{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"reddit-1so2vmc","slug":"opus-4-7-high-scores-a-41-0-on-the-nyt-connections","name":"opus 4.7 (high) scores a 41.0% on the nyt connections extended benchmark. opus 4.6 scored 94.7%.","type":"benchmark","url":"https://github.com/lechmazur/nyt-connections/","page_url":"https://unfragile.ai/opus-4-7-high-scores-a-41-0-on-the-nyt-connections","categories":["testing-quality"],"tags":["singularity"],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"reddit-1so2vmc__cap_0","uri":"capability://data.processing.analysis.benchmark.scoring.analysis","name":"benchmark scoring analysis","description":"Opus 4.7 evaluates its performance against the NYT Connections extended benchmark by analyzing the results of its scoring algorithm, which utilizes a comparison of word associations and connections. The implementation leverages statistical models to determine the accuracy of connections made, allowing for a clear metric of performance. This capability is distinct in its ability to provide detailed breakdowns of scoring discrepancies between versions, such as the significant drop from 94.7% to 41.0%.","intents":["How does Opus 4.7 perform on the NYT Connections benchmark compared to previous versions?","What specific areas led to the drop in performance from Opus 4.6 to Opus 4.7?","Can I get a detailed analysis of the scoring methodology used in the benchmark?"],"best_for":["data scientists and AI researchers analyzing model performance"],"limitations":["The benchmark only evaluates specific types of word connections, limiting its applicability to broader language tasks."],"requires":["Python 3.8+","Access to the NYT Connections benchmark dataset"],"input_types":["text"],"output_types":["structured data","performance metrics"],"categories":["data-processing-analysis","benchmarking"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"reddit-1so2vmc__cap_1","uri":"capability://data.processing.analysis.version.performance.comparison","name":"version performance comparison","description":"Opus 4.7 includes a capability to compare its performance metrics against previous versions, specifically focusing on the NYT Connections benchmark scores. This is achieved through a structured logging system that captures and analyzes historical performance data, allowing users to visualize trends and identify regression points. The distinct aspect of this capability is its emphasis on version-to-version analysis rather than just absolute performance metrics.","intents":["What are the performance trends between Opus 4.6 and Opus 4.7?","How can I visualize the changes in benchmark scores over time?","What specific regressions were identified in the latest version?"],"best_for":["developers maintaining AI models and seeking to understand performance changes"],"limitations":["Requires historical data to be effective; without it, comparisons are limited."],"requires":["Python 3.8+","Access to historical performance logs"],"input_types":["structured data"],"output_types":["visualizations","comparison reports"],"categories":["data-processing-analysis","model-evaluation"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":42,"verified":false,"data_access_risk":"low","permissions":["Python 3.8+","Access to the NYT Connections benchmark dataset","Access to historical performance logs"],"failure_modes":["The benchmark only evaluates specific types of word connections, limiting its applicability to broader language tasks.","Requires historical data to be effective; without it, comparisons are limited.","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.9,"quality":0.14,"ecosystem":0.33,"match_graph":0.25,"freshness":0.9,"weights":{"adoption":0.25,"quality":0.35,"ecosystem":0.15,"match_graph":0.2,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:25.061Z","last_scraped_at":"2026-05-04T07:51:01.590Z","last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=opus-4-7-high-scores-a-41-0-on-the-nyt-connections","compare_url":"https://unfragile.ai/compare?artifact=opus-4-7-high-scores-a-41-0-on-the-nyt-connections"}},"signature":"bH1JHLKNckw/eZgoKbJ7inz7Tw7TzPkwqzzPuSwKP7RL/xpf+ZolEN2Nd6UOnwd7ldZwifFAoDu0AM9WjhEPAQ==","signedAt":"2026-06-15T11:01:22.751Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/opus-4-7-high-scores-a-41-0-on-the-nyt-connections","artifact":"https://unfragile.ai/opus-4-7-high-scores-a-41-0-on-the-nyt-connections","verify":"https://unfragile.ai/api/v1/verify?slug=opus-4-7-high-scores-a-41-0-on-the-nyt-connections","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}