{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"npm-llm-code-highlighter","slug":"llm-code-highlighter","name":"llm-code-highlighter","type":"repo","url":"https://github.com/restlessronin/llm-code-highlighter#readme","page_url":"https://unfragile.ai/llm-code-highlighter","categories":["automation"],"tags":[],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"npm-llm-code-highlighter__cap_0","uri":"capability://data.processing.analysis.syntax.aware.code.condensation.with.structural.preservation","name":"syntax-aware code condensation with structural preservation","description":"Extracts and highlights essential code elements (function signatures, class definitions, imports, key logic) while removing boilerplate and comments, using a simplified repomap technique adapted from Aider Chat. The tool parses source code into an AST-like representation to identify structural boundaries and preserve semantic relationships, then outputs a condensed version that maintains enough context for LLM analysis without token bloat.","intents":["I need to feed large codebases to Claude/GPT but stay within token limits","I want to show an LLM the essential structure of a file without verbose comments and boilerplate","I need to create a compact code summary that preserves function signatures and class hierarchies for context"],"best_for":["developers using LLM-based code analysis tools (Aider, custom agents)","teams building AI-assisted refactoring or code review systems","engineers working with large monorepos who need efficient context passing to LLMs"],"limitations":["Relies on language-specific parsing — unsupported languages fall back to naive line-filtering","May lose important inline documentation or docstrings if they're not recognized as structural elements","No semantic understanding of code intent — removes lines based on syntactic patterns, not logical importance","Condensation ratio varies significantly by language and code style; dense functional code may not compress well"],"requires":["Node.js 14+ or Python 3.7+","Source code files in supported languages (JavaScript, Python, TypeScript, Java, C++, Go, Rust, etc.)","Read access to filesystem or ability to pass code as string input"],"input_types":["source code (text)","file paths (string)","directory paths for batch processing"],"output_types":["condensed source code (text)","structured highlights with line numbers (JSON)","token count estimates (numeric)"],"categories":["data-processing-analysis","code-generation-editing"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-llm-code-highlighter__cap_1","uri":"capability://data.processing.analysis.multi.language.code.parsing.with.fallback.strategies","name":"multi-language code parsing with fallback strategies","description":"Detects source code language from file extension or content, then applies language-specific parsing rules to identify structural elements (function/class definitions, imports, decorators). Falls back to heuristic-based line filtering for unsupported languages, ensuring graceful degradation across diverse codebases without requiring external parser dependencies.","intents":["I have a mixed-language codebase and need consistent condensation across all file types","I want the tool to automatically detect language and apply appropriate highlighting rules","I need to handle edge cases where language detection might be ambiguous"],"best_for":["polyglot development teams with JavaScript, Python, Java, Go, Rust, C++ codebases","monorepo maintainers processing heterogeneous source trees","LLM agents that need to analyze arbitrary code without manual language specification"],"limitations":["Language detection relies on file extensions — ambiguous or non-standard extensions may be misclassified","Unsupported languages degrade to generic line-filtering heuristics, losing structural awareness","No support for domain-specific languages (DSLs) or configuration file formats (YAML, HCL, Terraform)","Parsing rules are pattern-based, not full-featured compilers — may miss edge cases in complex syntax"],"requires":["File extension or explicit language parameter","Source code conforming to standard syntax for detected language"],"input_types":["source code (text)","file path with extension (string)","language hint (string, optional)"],"output_types":["condensed code (text)","detected language identifier (string)","parsing confidence score (numeric, optional)"],"categories":["data-processing-analysis","code-generation-editing"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-llm-code-highlighter__cap_2","uri":"capability://data.processing.analysis.token.aware.condensation.with.size.estimation","name":"token-aware condensation with size estimation","description":"Estimates token consumption of condensed code using language-model-specific tokenizers (OpenAI, Anthropic, etc.) and provides feedback on compression ratio achieved. Allows developers to tune condensation aggressiveness (preserve more detail vs. maximize compression) based on target token budget, enabling predictable context window usage.","intents":["I need to know how many tokens my condensed code will consume before sending to an LLM API","I want to adjust the condensation level to fit within a specific token budget","I need to compare token efficiency across different code files or condensation strategies"],"best_for":["developers optimizing LLM API costs by managing token consumption","teams building agentic systems with fixed context window budgets","engineers evaluating trade-offs between code detail and token efficiency"],"limitations":["Token estimation is approximate — actual token counts from LLM APIs may vary by 5-10% due to tokenizer differences","Requires explicit tokenizer selection or API key for accurate estimation","No dynamic adjustment of condensation level — users must manually tune parameters and re-run","Token counting adds computational overhead (~50-200ms per file depending on size)"],"requires":["Tokenizer library (tiktoken for OpenAI, anthropic-tokenizer, or equivalent)","Optional: API key for LLM provider if using live token counting"],"input_types":["condensed code (text)","tokenizer name or model identifier (string)","target token budget (numeric, optional)"],"output_types":["token count estimate (numeric)","compression ratio (percentage)","token budget remaining (numeric)","condensation level recommendation (string)"],"categories":["data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-llm-code-highlighter__cap_3","uri":"capability://automation.workflow.batch.directory.processing.with.recursive.traversal","name":"batch directory processing with recursive traversal","description":"Processes entire directory trees recursively, applying condensation rules to all source files matching specified patterns (glob filters, language filters). Outputs a structured map of condensed files with metadata (original size, condensed size, token count, language), enabling efficient analysis of large monorepos or multi-module projects.","intents":["I need to condense an entire codebase directory at once for LLM analysis","I want to selectively process only certain file types or directories (e.g., skip node_modules, tests)","I need a summary report showing compression statistics across the whole project"],"best_for":["monorepo maintainers analyzing large codebases with LLM agents","teams building code indexing or documentation systems","developers preparing entire projects for AI-assisted refactoring or migration"],"limitations":["Recursive traversal can be slow on very large directories (100k+ files) — no built-in parallelization","Glob pattern matching is basic — complex exclusion rules may require manual filtering","Memory usage scales with total codebase size — very large projects may require streaming or chunking","No incremental processing — re-running on unchanged files wastes computation"],"requires":["Read access to filesystem","Sufficient disk space for output files","Optional: glob pattern syntax knowledge for filtering"],"input_types":["directory path (string)","glob patterns for inclusion/exclusion (array of strings)","language filters (array of strings, optional)"],"output_types":["condensed files (text files in output directory)","metadata manifest (JSON with file stats)","summary report (text with compression statistics)"],"categories":["automation-workflow","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-llm-code-highlighter__cap_4","uri":"capability://automation.workflow.configurable.condensation.profiles.with.preset.strategies","name":"configurable condensation profiles with preset strategies","description":"Offers multiple condensation profiles (aggressive, balanced, conservative) that control which code elements are preserved (imports, comments, docstrings, blank lines, etc.). Users can define custom profiles via configuration files, enabling consistent condensation behavior across teams and projects without per-file parameter tuning.","intents":["I want to apply consistent condensation rules across my team's codebase","I need different condensation levels for different file types (e.g., more detail for business logic, less for utilities)","I want to preserve docstrings and type hints but remove inline comments"],"best_for":["teams standardizing on LLM-assisted code analysis with shared condensation policies","projects with varying code styles that need adaptive condensation rules","organizations building internal tools that wrap llm-code-highlighter with custom profiles"],"limitations":["Profile configuration is static — no runtime adaptation based on code content or LLM feedback","Limited to predefined profile options unless users write custom parsing logic","No validation of profile configurations — invalid settings may silently degrade to defaults","Profiles are global — no per-file or per-directory overrides without multiple configuration files"],"requires":["Configuration file (JSON, YAML, or JavaScript) in project root or specified path","Understanding of available profile options and customization syntax"],"input_types":["profile name (string)","configuration object (JSON/YAML)","custom profile rules (object with boolean flags)"],"output_types":["condensed code with profile-specific rules applied (text)","profile metadata (JSON with applied settings)"],"categories":["automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-llm-code-highlighter__cap_5","uri":"capability://data.processing.analysis.import.and.dependency.extraction.with.relationship.mapping","name":"import and dependency extraction with relationship mapping","description":"Identifies and extracts import statements, require() calls, and dependency declarations from source code, then maps relationships between modules (which files import which). Outputs a dependency graph or adjacency list that helps LLMs understand module structure and interdependencies without analyzing full file contents.","intents":["I want to show an LLM the dependency structure of my codebase without including all implementation details","I need to identify which modules depend on each other for refactoring or migration planning","I want to create a lightweight module map for LLM-based code navigation"],"best_for":["developers analyzing large codebases where module structure is more important than implementation","teams planning refactoring or modularization with LLM assistance","engineers building code navigation or documentation tools"],"limitations":["Dynamic imports (require() with variables, dynamic paths) are not resolved — only static imports are extracted","Circular dependencies are detected but not resolved — LLMs must handle cycles explicitly","External dependencies (npm packages, pip modules) are listed but not distinguished from internal modules","No support for implicit dependencies (e.g., shared configuration files, environment variables)"],"requires":["Source code with standard import/require syntax","Language-specific parsing rules for import statement detection"],"input_types":["source code (text)","directory path for batch dependency extraction (string)"],"output_types":["dependency list (array of strings)","dependency graph (JSON with nodes and edges)","adjacency list (JSON mapping modules to dependencies)"],"categories":["data-processing-analysis","code-generation-editing"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-llm-code-highlighter__cap_6","uri":"capability://data.processing.analysis.function.and.class.signature.extraction.with.metadata","name":"function and class signature extraction with metadata","description":"Parses source code to extract function/method signatures, class definitions, and type annotations, preserving parameter names, return types, and decorators. Outputs a structured list of callable interfaces with optional docstring summaries, enabling LLMs to understand the public API of a module without reading implementation details.","intents":["I want to show an LLM the public API of a module without implementation details","I need to extract function signatures for code generation or refactoring tasks","I want to create a lightweight API reference for LLM-based code navigation"],"best_for":["developers using LLMs to generate code that calls existing APIs","teams building code generation tools that need to understand available functions","engineers creating API documentation or interface contracts for LLM analysis"],"limitations":["Signature extraction is syntactic — does not resolve type aliases or complex generic types","Private/internal functions are not distinguished from public APIs without explicit markers (e.g., underscore prefix)","Docstring extraction is basic — complex documentation formats may not parse correctly","No support for overloaded functions or method polymorphism — all signatures are listed separately"],"requires":["Source code with standard function/class definition syntax","Optional: type annotations for richer metadata"],"input_types":["source code (text)","language identifier (string)"],"output_types":["function signatures (array of objects with name, parameters, return type)","class definitions (array of objects with name, methods, properties)","API reference (JSON with callable interfaces)"],"categories":["data-processing-analysis","code-generation-editing"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-llm-code-highlighter__cap_7","uri":"capability://data.processing.analysis.comment.and.docstring.filtering.with.preservation.options","name":"comment and docstring filtering with preservation options","description":"Identifies and selectively removes or preserves comments, docstrings, and documentation blocks based on configurable rules (remove all, keep docstrings only, keep type hints, etc.). Supports multiple comment styles (single-line, block, inline) across languages, enabling fine-grained control over documentation preservation in condensed code.","intents":["I want to remove verbose comments to reduce token count but keep docstrings for context","I need to preserve type hints and parameter documentation while removing implementation comments","I want to strip all comments for a minimal code footprint"],"best_for":["developers optimizing token usage while preserving semantic documentation","teams with verbose commenting styles that need aggressive condensation","engineers building code analysis tools that need clean, comment-free code"],"limitations":["Comment detection is pattern-based — may misidentify strings containing comment markers as actual comments","Docstring preservation requires language-specific rules — not all languages have standard docstring formats","Inline comments are harder to distinguish from code — removal may break line-based logic","No semantic understanding of comment importance — all comments are treated equally"],"requires":["Source code with standard comment syntax for target language","Comment filtering configuration (rules for what to preserve)"],"input_types":["source code (text)","comment preservation strategy (string: 'none', 'docstrings-only', 'type-hints', 'all')"],"output_types":["condensed code with comments filtered (text)","comment removal statistics (JSON with counts by type)"],"categories":["data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-llm-code-highlighter__cap_8","uri":"capability://data.processing.analysis.whitespace.and.formatting.normalization","name":"whitespace and formatting normalization","description":"Removes unnecessary whitespace (blank lines, excessive indentation, trailing spaces) while preserving code structure and readability. Normalizes indentation to a consistent level (spaces or tabs) and collapses multiple blank lines into single lines, reducing token count without affecting code semantics.","intents":["I want to reduce token count by removing unnecessary whitespace","I need to normalize indentation across files with inconsistent formatting","I want to collapse blank lines to minimize code size for LLM processing"],"best_for":["developers optimizing token efficiency in code condensation","teams with inconsistent code formatting that need normalization","engineers building code preprocessing pipelines"],"limitations":["Aggressive whitespace removal may reduce code readability for human review","Indentation normalization assumes standard spacing — may conflict with language-specific conventions (e.g., Python's significant whitespace)","No preservation of intentional blank lines for logical separation — all whitespace is treated equally","Minimal token savings in well-formatted code — most benefit in verbose or inconsistently formatted files"],"requires":["Source code with standard whitespace characters"],"input_types":["source code (text)","whitespace normalization level (string: 'minimal', 'moderate', 'aggressive')"],"output_types":["normalized code (text)","whitespace removal statistics (JSON with bytes saved)"],"categories":["data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"npm-llm-code-highlighter__cap_9","uri":"capability://data.processing.analysis.line.by.line.filtering.with.heuristic.scoring","name":"line-by-line filtering with heuristic scoring","description":"Applies heuristic scoring to individual lines of code to determine importance (function definitions score high, blank lines score low, etc.), then filters lines below a configurable threshold. Uses pattern matching to identify structural elements (imports, definitions, key statements) and removes low-value lines (blank lines, comments, verbose formatting) while preserving semantic content.","intents":["I want to automatically identify and remove unimportant lines without manual configuration","I need a fallback strategy for unsupported languages that still provides reasonable condensation","I want to tune condensation aggressiveness by adjusting a single threshold parameter"],"best_for":["developers processing code in unsupported languages with generic fallback","teams wanting simple, threshold-based condensation without complex configuration","engineers building quick prototypes that need reasonable condensation without language-specific rules"],"limitations":["Heuristic scoring is simplistic — may remove important lines or preserve unimportant ones","No semantic understanding of code intent — scoring is based on syntactic patterns only","Threshold tuning is manual and language-dependent — optimal values vary significantly","False positives/negatives are common in complex code with unusual formatting"],"requires":["Source code in any language","Threshold parameter (numeric, typically 0.0-1.0)"],"input_types":["source code (text)","importance threshold (numeric)"],"output_types":["filtered code (text)","line importance scores (array of numeric values)"],"categories":["data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":31,"verified":false,"data_access_risk":"high","permissions":["Node.js 14+ or Python 3.7+","Source code files in supported languages (JavaScript, Python, TypeScript, Java, C++, Go, Rust, etc.)","Read access to filesystem or ability to pass code as string input","File extension or explicit language parameter","Source code conforming to standard syntax for detected language","Tokenizer library (tiktoken for OpenAI, anthropic-tokenizer, or equivalent)","Optional: API key for LLM provider if using live token counting","Read access to filesystem","Sufficient disk space for output files","Optional: glob pattern syntax knowledge for filtering"],"failure_modes":["Relies on language-specific parsing — unsupported languages fall back to naive line-filtering","May lose important inline documentation or docstrings if they're not recognized as structural elements","No semantic understanding of code intent — removes lines based on syntactic patterns, not logical importance","Condensation ratio varies significantly by language and code style; dense functional code may not compress well","Language detection relies on file extensions — ambiguous or non-standard extensions may be misclassified","Unsupported languages degrade to generic line-filtering heuristics, losing structural awareness","No support for domain-specific languages (DSLs) or configuration file formats (YAML, HCL, Terraform)","Parsing rules are pattern-based, not full-featured compilers — may miss edge cases in complex syntax","Token estimation is approximate — actual token counts from LLM APIs may vary by 5-10% due to tokenizer differences","Requires explicit tokenizer selection or API key for accurate estimation","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.19589459324939362,"quality":0.45,"ecosystem":0.39999999999999997,"match_graph":0.25,"freshness":0.52,"weights":{"adoption":0.3,"quality":0.2,"ecosystem":0.15,"match_graph":0.3,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:23.902Z","last_scraped_at":"2026-04-22T08:08:13.652Z","last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":4549,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=llm-code-highlighter","compare_url":"https://unfragile.ai/compare?artifact=llm-code-highlighter"}},"signature":"dodVyiUnjH6l3zZEHQRZoxvhuFGwGSPCriSMVyjgbg+CidAfaZFr92XmgN2L76y4mbuwHWoSOBC9WCmFJSWxDQ==","signedAt":"2026-06-21T10:25:26.119Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/llm-code-highlighter","artifact":"https://unfragile.ai/llm-code-highlighter","verify":"https://unfragile.ai/api/v1/verify?slug=llm-code-highlighter","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}