{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"chatglm-4","slug":"chatglm-4","name":"ChatGLM-4","type":"model","url":"https://github.com/THUDM/ChatGLM-6B","page_url":"https://unfragile.ai/chatglm-4","categories":["model-training"],"tags":[],"pricing":{"model":"free","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"chatglm-4__cap_0","uri":"capability://text.generation.language.bilingual.multi.turn.dialogue.generation.with.conversation.history.management","name":"bilingual multi-turn dialogue generation with conversation history management","description":"Generates contextually coherent responses in Chinese and English using a GLM-based transformer architecture that maintains full conversation history through the model.chat(tokenizer, prompt, history) interface. The model processes prior exchanges as context, enabling multi-turn conversations where each response is conditioned on the complete dialogue history rather than isolated prompts. Uses relative position encoding to theoretically support unlimited context length, though training was optimized for 2048-token sequences.","intents":["Build a chatbot that understands context across multiple conversation turns","Create a bilingual assistant that maintains conversation state without external session management","Deploy a dialogue system that handles Chinese and English interchangeably within the same conversation"],"best_for":["Teams building Chinese-first or bilingual conversational AI applications","Developers needing efficient inference on consumer-grade hardware","Organizations requiring open-source models with no API dependencies"],"limitations":["Performance degrades for inputs exceeding 2048 tokens despite theoretical unlimited context support","Memory usage increases after 2-3 dialogue turns due to history accumulation in context window","No built-in conversation persistence — history must be managed externally by the application layer","Bilingual capability is optimized for Chinese-English pairs; other language combinations not guaranteed"],"requires":["Python 3.7+","PyTorch 1.10+","6GB GPU memory minimum (INT4 quantization) or 13GB (FP16)","ChatGLMTokenizer for proper text encoding/decoding"],"input_types":["text (Chinese or English)","conversation history as list of (prompt, response) tuples"],"output_types":["text (Chinese or English)","response with updated history tuple"],"categories":["text-generation-language","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"chatglm-4__cap_1","uri":"capability://data.processing.analysis.int4.and.int8.quantization.with.memory.footprint.reduction","name":"int4 and int8 quantization with memory footprint reduction","description":"Reduces model memory requirements through post-training quantization via model.quantize(bits) method supporting INT4 (4-bit) and INT8 (8-bit) precision. Quantization is applied to the ChatGLMForConditionalGeneration weights, compressing the 6.2B parameter model from 13GB (FP16) to 6GB (INT4) or 8GB (INT8) while maintaining inference quality through careful bit-width selection. This enables deployment on consumer GPUs and edge devices without retraining.","intents":["Deploy ChatGLM on resource-constrained hardware like consumer GPUs or edge devices","Reduce inference latency and memory bandwidth requirements for production deployments","Run the model locally without cloud infrastructure or API costs"],"best_for":["Solo developers and small teams with limited GPU budgets","Edge deployment scenarios (laptops, mobile inference servers)","Cost-sensitive production environments avoiding cloud API fees"],"limitations":["INT4 quantization introduces 2-5% accuracy degradation compared to FP16 baseline","Quantization is post-training only — no fine-tuning after quantization without retraining","Quantized models are not compatible with certain optimization techniques like gradient checkpointing","INT4 inference speed gains are hardware-dependent; older GPUs may not have efficient INT4 kernels"],"requires":["Python 3.7+","PyTorch 1.10+ with quantization support","CUDA 11.0+ for GPU quantization (CPU quantization supported but slower)","6GB GPU memory for INT4 or 8GB for INT8"],"input_types":["loaded ChatGLMForConditionalGeneration model","quantization bit-width parameter (4 or 8)"],"output_types":["quantized model weights in INT4 or INT8 format","model checkpoint with reduced memory footprint"],"categories":["data-processing-analysis","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"chatglm-4__cap_10","uri":"capability://automation.workflow.cpu.based.inference.with.reduced.precision","name":"cpu-based inference with reduced precision","description":"Enables model inference on CPU-only systems through INT8 quantization and memory-mapped file loading, allowing deployment on machines without GPUs. CPU inference uses PyTorch's CPU optimizations and optional ONNX Runtime acceleration for faster computation. While significantly slower than GPU inference (10-50x latency increase), CPU deployment is valuable for edge devices, development environments, and cost-sensitive scenarios where GPU access is unavailable.","intents":["Deploy ChatGLM on laptops or servers without GPU hardware","Run the model in development environments for testing without GPU allocation","Enable inference on edge devices or IoT systems with CPU-only constraints"],"best_for":["Solo developers without GPU access","Organizations with CPU-only infrastructure","Edge deployment scenarios on Raspberry Pi or similar devices"],"limitations":["CPU inference is 10-50x slower than GPU; typical latency is 5-30 seconds per response","INT8 quantization is required; FP16 inference on CPU is impractical (requires 26GB+ RAM)","Single-threaded inference is default; multi-threading requires careful PyTorch configuration","Memory-mapped file loading adds startup latency (5-10 seconds) on first inference"],"requires":["Python 3.7+","PyTorch 1.10+ with CPU support","16GB+ RAM for INT8 quantized model","Optional: ONNX Runtime for acceleration"],"input_types":["text prompt","conversation history"],"output_types":["text response","updated conversation history"],"categories":["automation-workflow","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"chatglm-4__cap_11","uri":"capability://automation.workflow.macos.deployment.with.metal.acceleration","name":"macos deployment with metal acceleration","description":"Enables optimized inference on Apple Silicon (M1/M2/M3) and Intel Macs through PyTorch's Metal Performance Shaders (MPS) backend, which accelerates tensor operations using the GPU without requiring CUDA. The deployment automatically detects Mac hardware and routes computation to Metal when available, providing 2-5x speedup over CPU-only inference while maintaining compatibility with INT8 quantization. This enables ChatGLM deployment on consumer MacBooks without external GPU hardware.","intents":["Deploy ChatGLM on MacBooks for local development and testing","Create Mac-native applications with integrated language model inference","Enable offline ChatGLM usage on Apple Silicon devices"],"best_for":["Mac-based developers building AI applications","Teams with Apple Silicon infrastructure","Organizations requiring offline-capable AI tools on MacOS"],"limitations":["Metal acceleration is limited to Apple Silicon (M1+) and newer Intel Macs; older Macs use CPU-only inference","Metal performance is 2-5x faster than CPU but still 5-10x slower than NVIDIA GPUs","Some PyTorch operations may not have Metal implementations; fallback to CPU adds latency","Memory bandwidth on MacBooks limits batch size; typical batch size is 1-2"],"requires":["Python 3.9+ (Metal support requires newer PyTorch versions)","PyTorch 1.12+ with Metal support","MacOS 12.3+ for Metal acceleration","16GB+ RAM for INT8 quantized model"],"input_types":["text prompt","conversation history"],"output_types":["text response","updated conversation history"],"categories":["automation-workflow","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"chatglm-4__cap_12","uri":"capability://memory.knowledge.conversation.history.state.management.for.multi.turn.dialogue","name":"conversation history state management for multi-turn dialogue","description":"Manages conversation state through a list of (prompt, response) tuples that are passed to model.chat() as the history parameter, enabling the model to condition responses on prior exchanges. The history is maintained by the application layer (not the model), allowing flexible storage backends (in-memory, database, file system). Each inference call returns both the response and updated history, enabling stateless API design where clients manage history explicitly.","intents":["Maintain conversation context across multiple API calls without server-side session storage","Implement conversation persistence by storing history in a database or file system","Support concurrent conversations for multiple users with independent history tracking"],"best_for":["Stateless API designs where clients manage conversation state","Multi-user systems requiring isolated conversation histories","Applications needing persistent conversation logging for audit or analysis"],"limitations":["History must be managed explicitly by the application; no automatic persistence","Growing history increases memory usage and inference latency (linear with history length)","No built-in deduplication or compression of history; redundant exchanges accumulate","History format is application-specific; no standardized serialization format provided"],"requires":["Application-level state management (in-memory list, database, file system)","Serialization mechanism for history persistence (JSON, pickle, etc.)","Client-side history tracking for stateless API calls"],"input_types":["current prompt (string)","conversation history (list of (prompt, response) tuples)"],"output_types":["response (string)","updated history (list of tuples with new exchange appended)"],"categories":["memory-knowledge","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"chatglm-4__cap_2","uri":"capability://code.generation.editing.parameter.efficient.fine.tuning.via.p.tuning.v2","name":"parameter-efficient fine-tuning via p-tuning v2","description":"Enables domain-specific model adaptation through P-Tuning v2 implementation in the ptuning/ directory, which adds learnable soft prompts to the model without modifying base weights. During fine-tuning, only the prompt embeddings and a small adapter layer are trained (typically <1% of model parameters), while the 6.2B base model parameters remain frozen. This approach reduces fine-tuning memory from 14GB (full fine-tuning) to 7GB while maintaining task-specific performance through prompt optimization.","intents":["Adapt ChatGLM to domain-specific tasks (customer support, medical QA, legal document analysis) without full retraining","Fine-tune the model on limited hardware with 7GB GPU memory instead of 14GB","Create multiple task-specific variants from a single base model with minimal storage overhead"],"best_for":["Teams with domain-specific datasets but limited GPU budgets","Organizations needing rapid model customization for multiple use cases","Researchers exploring prompt-based adaptation techniques"],"limitations":["P-Tuning v2 typically requires 500+ labeled examples per task for convergence; smaller datasets may overfit","Fine-tuned prompts are not transferable across different base model versions or architectures","Soft prompts add ~50-100ms latency per inference due to additional embedding lookups","No built-in evaluation metrics — requires external validation framework to assess task performance"],"requires":["Python 3.7+","PyTorch 1.10+","7GB GPU memory minimum for fine-tuning","Labeled training dataset in JSON format with prompt-response pairs","ptuning/ module from ChatGLM repository"],"input_types":["base ChatGLMForConditionalGeneration model","training dataset (JSON with prompt/response pairs)","hyperparameters (learning rate, batch size, epochs)"],"output_types":["fine-tuned prompt embeddings checkpoint","adapter weights for task-specific inference","evaluation metrics (loss, perplexity)"],"categories":["code-generation-editing","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"chatglm-4__cap_3","uri":"capability://tool.use.integration.rest.api.service.for.remote.model.inference","name":"rest api service for remote model inference","description":"Exposes the model through an HTTP API via api.py that accepts JSON requests and returns JSON responses, enabling integration with web applications and microservices without direct Python dependencies. The API wraps the model.chat() interface, accepting prompt and history as JSON payload and returning generated responses with updated conversation history. Supports concurrent requests through standard Python async/await patterns, making it suitable for production deployments behind load balancers.","intents":["Integrate ChatGLM into web applications or mobile apps via HTTP endpoints","Deploy the model as a microservice accessible to non-Python applications","Build a multi-user chatbot backend with concurrent request handling"],"best_for":["Full-stack developers building web applications with separate frontend/backend","Teams deploying models in containerized environments (Docker, Kubernetes)","Organizations needing language-agnostic model access (JavaScript, Go, Java clients)"],"limitations":["HTTP overhead adds 50-200ms latency per request compared to direct Python calls","No built-in authentication or rate limiting — requires external API gateway for production security","Conversation history must be managed by the client; server does not persist sessions","Concurrent request handling is limited by GPU memory; exceeding capacity causes request queueing"],"requires":["Python 3.7+","Flask or FastAPI framework (api.py uses Flask by default)","6GB+ GPU memory for model loading","Port availability (default 8000 or configurable)"],"input_types":["JSON POST request with keys: prompt (string), history (list of tuples)"],"output_types":["JSON response with keys: response (string), history (list of tuples)"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"chatglm-4__cap_4","uri":"capability://automation.workflow.interactive.command.line.interface.for.local.testing","name":"interactive command-line interface for local testing","description":"Provides a cli_demo.py script that implements an interactive REPL for real-time model testing without code changes. The CLI maintains conversation history across turns, displays token counts and generation time, and supports configuration flags for quantization level, device selection (GPU/CPU), and model path. Users type prompts at a command prompt and receive responses with latency metrics, making it ideal for rapid prototyping and debugging model behavior.","intents":["Quickly test model responses without writing Python code","Debug conversation history handling and multi-turn behavior","Benchmark inference latency and memory usage on local hardware"],"best_for":["Researchers and developers prototyping conversational AI features","Non-technical stakeholders evaluating model quality","DevOps engineers testing model deployment before containerization"],"limitations":["Single-user only — cannot handle concurrent conversations","No persistent conversation logging; history is lost on exit unless manually saved","Terminal-based interface limits formatting options for complex outputs","Requires direct GPU access; not suitable for remote testing without SSH tunneling"],"requires":["Python 3.7+","PyTorch 1.10+","6GB+ GPU memory or CPU with 16GB RAM","Terminal/shell environment (Linux, macOS, Windows PowerShell)"],"input_types":["user text input at command prompt","configuration flags (--quantization-level, --device, --model-path)"],"output_types":["model response text","metadata (generation time, token count, memory usage)"],"categories":["automation-workflow","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"chatglm-4__cap_5","uri":"capability://tool.use.integration.web.based.chat.interface.with.gradio","name":"web-based chat interface with gradio","description":"Exposes the model through a browser-based UI via web_demo.py using Gradio framework, which automatically generates an interactive chat interface from the model.chat() function signature. The Gradio interface handles HTML rendering, session management, and client-server communication, allowing users to interact with the model through a web browser without terminal access. Supports real-time streaming of responses and maintains conversation history in the browser session.","intents":["Share the model with non-technical users through a web browser","Create a shareable demo link for stakeholder feedback without deployment infrastructure","Build a quick prototype UI for customer-facing chatbot applications"],"best_for":["Researchers sharing models with collaborators or the public","Product teams creating quick demos for stakeholder review","Solo developers needing a UI without frontend engineering skills"],"limitations":["Gradio generates a basic UI with limited customization for branding or complex layouts","No built-in user authentication or multi-user session isolation","Conversation history is stored only in browser memory; refreshing the page clears history","Gradio's automatic UI generation may not scale well for models with many parameters or complex input schemas"],"requires":["Python 3.7+","Gradio 3.0+","6GB+ GPU memory","Port availability (default 7860)"],"input_types":["text input from browser text field","conversation history maintained by Gradio state"],"output_types":["HTML-rendered chat interface","streamed text responses in browser"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"chatglm-4__cap_6","uri":"capability://tool.use.integration.alternative.streamlit.based.web.interface","name":"alternative streamlit-based web interface","description":"Provides web_demo2.py as an alternative to Gradio using Streamlit framework, which renders the chat interface using Streamlit's session state management and reactive component model. Streamlit automatically reruns the entire script on each user interaction, maintaining conversation history through st.session_state dictionary. This approach is more Pythonic for developers familiar with data science workflows, though it introduces latency from full-script reruns.","intents":["Deploy the model using Streamlit for teams already invested in Streamlit dashboards","Create a web interface with more Pythonic state management than Gradio","Integrate the chatbot into existing Streamlit data applications"],"best_for":["Data science teams using Streamlit for analytics dashboards","Organizations with Streamlit infrastructure and deployment pipelines","Developers preferring Python-first UI development over declarative frameworks"],"limitations":["Full-script reruns on each interaction add 200-500ms latency compared to Gradio's event-driven model","Session state is lost when the Streamlit server restarts; no persistent conversation storage","Streamlit's reactive model can cause unexpected behavior with stateful model inference","Scaling to multiple concurrent users requires Streamlit Cloud or custom deployment infrastructure"],"requires":["Python 3.7+","Streamlit 1.0+","6GB+ GPU memory","Port availability (default 8501)"],"input_types":["text input from Streamlit text_input() widget","conversation history from st.session_state"],"output_types":["Streamlit-rendered chat interface","text responses displayed via st.write()"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"chatglm-4__cap_7","uri":"capability://code.generation.editing.transformer.based.glm.architecture.with.conditional.generation","name":"transformer-based glm architecture with conditional generation","description":"Implements ChatGLMForConditionalGeneration class using a modified transformer architecture with 6.2 billion parameters that combines bidirectional and autoregressive components from the GLM framework. The architecture uses relative position encoding instead of absolute positions, enabling theoretical unlimited context length while maintaining training efficiency. The model processes input tokens through multi-head self-attention layers with GLM-specific masking patterns that support both understanding and generation tasks in a unified architecture.","intents":["Understand the technical foundation of ChatGLM's language understanding and generation capabilities","Extend or modify the base architecture for specialized tasks","Evaluate model capacity and parameter efficiency compared to standard transformers"],"best_for":["ML researchers studying transformer architectures and GLM variants","Engineers implementing custom model modifications or pruning","Teams evaluating model capacity requirements for deployment"],"limitations":["Relative position encoding adds ~5-10% computational overhead compared to absolute positions","Model was trained on 2048-token sequences; performance degrades for longer contexts despite theoretical support","6.2B parameters is fixed; no built-in mechanisms for dynamic model scaling","Architecture details are partially documented; full implementation requires reading source code"],"requires":["Python 3.7+","PyTorch 1.10+","Understanding of transformer architectures and attention mechanisms","Access to ChatGLM source code for implementation details"],"input_types":["tokenized input sequences (token IDs)","attention masks and position IDs"],"output_types":["logits for next-token prediction","hidden states for downstream tasks"],"categories":["code-generation-editing","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"chatglm-4__cap_8","uri":"capability://data.processing.analysis.tokenization.and.detokenization.with.chatglm.vocabulary","name":"tokenization and detokenization with chatglm vocabulary","description":"Handles text encoding/decoding through ChatGLMTokenizer class that maps text to token IDs and vice versa using a learned vocabulary optimized for Chinese-English bilingual text. The tokenizer implements subword tokenization (likely BPE or SentencePiece) with special tokens for dialogue control (e.g., [gMASK], [eos_token]). Tokenization is a required preprocessing step before model inference, and detokenization reconstructs text from token IDs with proper handling of whitespace and special characters.","intents":["Convert raw text input into token IDs for model inference","Decode model output token IDs back into human-readable text","Understand token boundaries and vocabulary coverage for input text"],"best_for":["Developers building inference pipelines that need text preprocessing","Teams analyzing model tokenization behavior for prompt engineering","Researchers studying vocabulary coverage for Chinese-English text"],"limitations":["Vocabulary is fixed at model release; new words or domains may have poor tokenization","Tokenization is lossy for some special characters and formatting (e.g., multiple spaces collapse to one)","Token count varies between Chinese and English text; Chinese typically requires fewer tokens per character","No built-in token counting without full tokenization; requires running tokenizer to estimate prompt length"],"requires":["Python 3.7+","ChatGLMTokenizer from model repository","Model vocabulary file (typically included with model weights)"],"input_types":["text string (Chinese or English)","token ID sequences"],"output_types":["token ID list","reconstructed text string"],"categories":["data-processing-analysis","text-generation-language"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"chatglm-4__cap_9","uri":"capability://automation.workflow.multi.gpu.distributed.inference.and.fine.tuning","name":"multi-gpu distributed inference and fine-tuning","description":"Supports scaling model inference and training across multiple GPUs through PyTorch's DataParallel and DistributedDataParallel mechanisms. During multi-GPU deployment, the model is replicated across GPUs with batch splitting, allowing larger batch sizes and faster throughput. Fine-tuning on multiple GPUs uses gradient accumulation and distributed gradient synchronization to maintain training stability while reducing per-GPU memory requirements.","intents":["Increase inference throughput by processing multiple requests in parallel across GPUs","Reduce fine-tuning time by distributing training across multiple GPUs","Scale model serving to handle production traffic with multiple concurrent users"],"best_for":["Production deployments requiring high-throughput inference","Teams with access to multi-GPU clusters (2-8 GPUs typical)","Organizations fine-tuning models on large datasets"],"limitations":["Multi-GPU scaling has diminishing returns beyond 4-8 GPUs due to communication overhead","Requires NCCL or Gloo backend for GPU communication; not all hardware supports efficient distributed training","Batch size must be divisible by number of GPUs; small batches may not fully utilize all GPUs","Distributed training introduces synchronization overhead (~10-20% slower than single-GPU per-GPU throughput)"],"requires":["Python 3.7+","PyTorch 1.10+ with distributed training support","Multiple NVIDIA GPUs (2+) with CUDA 11.0+","NCCL library for GPU communication","Proper CUDA_VISIBLE_DEVICES configuration"],"input_types":["model checkpoint","training dataset (for fine-tuning)","batch of prompts (for inference)"],"output_types":["inference results across batch","fine-tuned model checkpoint"],"categories":["automation-workflow","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"chatglm-4__headline","uri":"capability://text.generation.language.bilingual.conversational.ai.model","name":"bilingual conversational ai model","description":"ChatGLM-6B is an open-source bilingual dialogue model designed for efficient conversational AI applications, excelling in both Chinese and English language understanding and generation.","intents":["best bilingual conversational AI model","conversational AI for Chinese and English","open-source dialogue model for efficient inference","AI model for multi-turn conversations","best model for consumer-grade hardware deployment"],"best_for":["developers seeking bilingual AI solutions","users needing efficient dialogue systems"],"limitations":["may require fine-tuning for specific tasks"],"requires":["6GB GPU memory for optimal performance"],"input_types":["text prompts in Chinese or English"],"output_types":["conversational responses in Chinese or English"],"categories":["text-generation-language"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":57,"verified":false,"data_access_risk":"high","permissions":["Python 3.7+","PyTorch 1.10+","6GB GPU memory minimum (INT4 quantization) or 13GB (FP16)","ChatGLMTokenizer for proper text encoding/decoding","PyTorch 1.10+ with quantization support","CUDA 11.0+ for GPU quantization (CPU quantization supported but slower)","6GB GPU memory for INT4 or 8GB for INT8","PyTorch 1.10+ with CPU support","16GB+ RAM for INT8 quantized model","Optional: ONNX Runtime for acceleration"],"failure_modes":["Performance degrades for inputs exceeding 2048 tokens despite theoretical unlimited context support","Memory usage increases after 2-3 dialogue turns due to history accumulation in context window","No built-in conversation persistence — history must be managed externally by the application layer","Bilingual capability is optimized for Chinese-English pairs; other language combinations not guaranteed","INT4 quantization introduces 2-5% accuracy degradation compared to FP16 baseline","Quantization is post-training only — no fine-tuning after quantization without retraining","Quantized models are not compatible with certain optimization techniques like gradient checkpointing","INT4 inference speed gains are hardware-dependent; older GPUs may not have efficient INT4 kernels","CPU inference is 10-50x slower than GPU; typical latency is 5-30 seconds per response","INT8 quantization is required; FP16 inference on CPU is impractical (requires 26GB+ RAM)","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.7,"quality":0.9,"ecosystem":0.39999999999999997,"match_graph":0.25,"freshness":0.52,"weights":{"adoption":0.35,"quality":0.2,"ecosystem":0.1,"match_graph":0.3,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-06-17T09:51:04.690Z","last_scraped_at":null,"last_commit":null},"community":{"stars":null,"forks":null,"weekly_downloads":null,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=chatglm-4","compare_url":"https://unfragile.ai/compare?artifact=chatglm-4"}},"signature":"3Zc+XnAGSl5ySRrZ+6s1/41d3vYEwsvMCpyLefkp5Rd1EBO7GopneDPNI/yMQAZNReo6W0U3hNdaBLNM2GbiCw==","signedAt":"2026-06-22T04:05:11.801Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/chatglm-4","artifact":"https://unfragile.ai/chatglm-4","verify":"https://unfragile.ai/api/v1/verify?slug=chatglm-4","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}