{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"github-lightricks--comfyui-ltxvideo","slug":"lightricks--comfyui-ltxvideo","name":"ComfyUI-LTXVideo","type":"repo","url":"https://ltx.io/model/ltx-2","page_url":"https://unfragile.ai/lightricks--comfyui-ltxvideo","categories":["image-generation"],"tags":["comfyui","diffusion-models","dit","image-to-video","image-to-video-generation","text-to-image","text-to-image-generation"],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"github-lightricks--comfyui-ltxvideo__cap_0","uri":"capability://image.visual.text.to.video.generation.with.ltx.2.diffusion.model","name":"text-to-video generation with ltx-2 diffusion model","description":"Generates video sequences from natural language prompts using the LTX-2 diffusion transformer model integrated into ComfyUI core. The system tokenizes text through a Gemma-based CLIP encoder, processes it through the DiT (Diffusion Transformer) architecture, and applies iterative denoising in latent space to produce video frames. Supports both base sampling and advanced guidance mechanisms (STG/APG) to control quality and semantic adherence during generation.","intents":["Generate short video clips from text descriptions without manual video editing","Create multiple video variations from the same prompt with different random seeds","Control video generation quality and semantic accuracy through guidance parameters","Integrate video generation into automated content creation pipelines"],"best_for":["Content creators building automated video generation workflows","AI researchers experimenting with diffusion-based video synthesis","Teams prototyping video generation features in ComfyUI-based applications"],"limitations":["Requires significant VRAM (24GB+ recommended for full model, 16GB minimum with quantization)","Generation speed depends on number of denoising steps and video length (typically 30-120 seconds per generation)","Text encoder (Gemma) must be loaded separately and cached in memory","Output resolution and frame count fixed by model architecture (typically 768x512 or similar)"],"requires":["ComfyUI installation with LTX-2 model weights (comfy/ldm/lightricks in core)","Gemma text encoder model weights","Python 3.9+","CUDA-capable GPU with minimum 16GB VRAM (24GB+ recommended)","ComfyUI-LTXVideo custom nodes installed via ComfyUI Manager or manual installation"],"input_types":["text (prompt string)","integer (seed for reproducibility)","float (guidance scale for STG/APG)","integer (number of sampling steps)"],"output_types":["latent tensor (compressed video representation)","video frames (after VAE decoding)"],"categories":["image-visual","diffusion-models"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-lightricks--comfyui-ltxvideo__cap_1","uri":"capability://image.visual.image.to.video.synthesis.with.temporal.extension","name":"image-to-video synthesis with temporal extension","description":"Converts a static image into a video sequence by encoding the image as the first frame and using the LTX-2 model to generate subsequent frames that maintain visual consistency and semantic coherence. The system loads the image through the VAE encoder, optionally applies IC-LoRA (in-context LoRA) for structural control, and uses specialized samplers (LTXVInContextSampler) to condition generation on the initial frame while allowing natural motion and scene evolution.","intents":["Animate static images into short video clips with natural motion","Create video sequences that maintain visual style and composition from a reference image","Apply structural control (camera movement, object trajectories) to image-to-video generation","Extend existing video clips by generating additional frames"],"best_for":["Motion graphics designers creating animated assets from static artwork","Video editors needing to extend or interpolate existing footage","Developers building image-to-video features in creative applications"],"limitations":["Generated motion may not perfectly match real-world physics or expected camera movements without IC-LoRA conditioning","Temporal consistency degrades over longer sequences (typically best for 5-15 second videos)","Requires careful prompt engineering to guide motion direction and style","IC-LoRA control adds computational overhead and requires additional model weights"],"requires":["ComfyUI-LTXVideo with LTXVInContextSampler node","Input image in supported formats (PNG, JPEG, WebP)","VAE model for image encoding/decoding","Optional: IC-LoRA weights for structural control","16GB+ VRAM"],"input_types":["image (initial frame)","text (motion/style prompt)","latent tensor (encoded image)","optional: IC-LoRA conditioning tensor"],"output_types":["video frames (sequence of images)","latent tensor (compressed video representation)"],"categories":["image-visual","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-lightricks--comfyui-ltxvideo__cap_10","uri":"capability://image.visual.two.stage.upscaling.workflow.with.quality.preservation","name":"two-stage upscaling workflow with quality preservation","description":"Implements a two-stage video upscaling pipeline that first generates low-resolution video with LTX-2, then applies specialized upscaling models to enhance resolution while preserving temporal coherence and semantic content. The system chains LTX-2 generation with external upscaling models (e.g., RealESRGAN, BSRGAN) through ComfyUI's node system, managing intermediate representations and quality metrics throughout the pipeline.","intents":["Generate high-quality videos by upscaling low-resolution LTX-2 output","Improve video detail and sharpness without retraining the diffusion model","Create 4K video from lower-resolution generation","Balance generation speed (fast low-res generation) with output quality"],"best_for":["Teams needing high-resolution output with fast generation","Content creators prioritizing quality over generation speed","Developers building multi-stage video generation pipelines"],"limitations":["Two-stage approach adds latency (typically 2-3x slower than single-stage generation)","Upscaling models may introduce artifacts or hallucinate details","Temporal coherence depends on upscaling model's temporal awareness","Requires additional model weights and VRAM for upscaling stage"],"requires":["ComfyUI-LTXVideo with LTX-2 generation nodes","Upscaling model (RealESRGAN, BSRGAN, or similar)","24GB+ VRAM for both generation and upscaling stages","Patience for longer generation times"],"input_types":["text prompt","integer (target upscaling factor, typically 2-4x)","optional: upscaling model selection"],"output_types":["upscaled video frames (high-resolution)","quality metrics (PSNR, SSIM, temporal consistency scores)"],"categories":["image-visual","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-lightricks--comfyui-ltxvideo__cap_11","uri":"capability://image.visual.camera.control.and.motion.specification.through.ic.lora","name":"camera control and motion specification through ic-lora","description":"Enables precise control over camera movement and object motion in generated videos through in-context LoRA (IC-LoRA) conditioning. The system allows users to specify camera trajectories (pan, zoom, rotate) and object motion paths, which are encoded as conditioning signals and injected into the diffusion process. IC-LoRA weights are loaded through LTXVQ8LoraModelLoader and applied during sampling to guide motion generation without full model retraining.","intents":["Specify exact camera movements (pan, zoom, dolly) in generated videos","Control object motion trajectories and speeds","Create cinematic camera work in AI-generated videos","Maintain consistency between multiple video generations with same camera path"],"best_for":["Filmmakers and cinematographers using AI for shot planning","Motion graphics designers creating camera-controlled animations","Developers building camera-aware video generation tools"],"limitations":["IC-LoRA control requires training or fine-tuning for specific motion types","Motion specification interface may be complex for non-technical users","Generated motion may not perfectly match specified trajectories","Requires additional IC-LoRA model weights (adds to memory footprint)"],"requires":["ComfyUI-LTXVideo with IC-LoRA support","IC-LoRA weights for desired motion types (camera pan, zoom, etc.)","Motion specification format (trajectory coordinates, timing, etc.)","16GB+ VRAM"],"input_types":["text prompt","IC-LoRA weights (quantized or full precision)","motion specification (camera trajectory, object paths)","optional: motion timing and speed parameters"],"output_types":["video frames (with controlled camera/object motion)","motion metadata (actual vs. specified trajectories)"],"categories":["image-visual","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-lightricks--comfyui-ltxvideo__cap_12","uri":"capability://tool.use.integration.custom.node.registration.and.workflow.composition","name":"custom node registration and workflow composition","description":"Provides a plugin architecture that registers custom nodes with ComfyUI through a dual-registration system (static mappings in __init__.py and runtime-generated nodes from nodes_registry.py). The system enables users to compose complex video generation workflows by connecting nodes in ComfyUI's visual editor, with automatic type checking and data flow validation. NODE_CLASS_MAPPINGS and NODE_DISPLAY_NAME_MAPPINGS enable ComfyUI Manager compatibility and user-friendly node discovery.","intents":["Build complex video generation workflows without coding","Compose multiple generation stages (text-to-video, upscaling, blending) visually","Share and version-control workflows as JSON graphs","Extend ComfyUI-LTXVideo with custom nodes"],"best_for":["Non-technical users building video generation workflows","Teams standardizing video generation processes through workflow templates","Developers extending ComfyUI-LTXVideo with custom nodes"],"limitations":["Visual workflow composition can become complex for large pipelines","Debugging workflow issues requires understanding node connections and data types","Performance optimization requires understanding of node execution order and memory usage","Workflow portability depends on all referenced nodes being installed"],"requires":["ComfyUI installation","ComfyUI-LTXVideo custom nodes installed","ComfyUI Manager (recommended for easy node installation)","Understanding of ComfyUI workflow concepts"],"input_types":["workflow JSON (ComfyUI format)","node configuration parameters"],"output_types":["workflow JSON (saved/exported)","video frames (from workflow execution)"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-lightricks--comfyui-ltxvideo__cap_13","uri":"capability://text.generation.language.gemma.text.encoder.integration.with.caching","name":"gemma text encoder integration with caching","description":"Integrates Lightricks' Gemma-based CLIP text encoder for semantic understanding of prompts, with intelligent caching to avoid redundant encoding of identical prompts. The system implements LTXVGemmaCLIPModelLoader and LTXVGemmaCLIPModelLoaderMGPU that load the encoder, cache embeddings for repeated prompts, and manage encoder lifecycle across multiple generation calls. Supports both single-GPU and multi-GPU loading strategies.","intents":["Encode text prompts into semantic embeddings for video generation","Improve semantic understanding compared to generic CLIP encoders","Reduce generation latency by caching prompt embeddings","Support multi-GPU text encoding for faster batch processing"],"best_for":["Users generating multiple videos from similar prompts","Teams running batch video generation with prompt reuse","Developers optimizing video generation pipelines for latency"],"limitations":["Gemma encoder adds 2-5GB to memory footprint","Encoder loading time (typically 5-10 seconds) amortized across generations","Cache invalidation requires manual management or prompt hashing","Multi-GPU encoder loading adds complexity without proportional speedup for single prompts"],"requires":["ComfyUI-LTXVideo with LTXVGemmaCLIPModelLoader or LTXVGemmaCLIPModelLoaderMGPU","Gemma text encoder model weights","4GB+ VRAM for encoder (additional to LTX-2 model)","Optional: Multi-GPU setup for LTXVGemmaCLIPModelLoaderMGPU"],"input_types":["text prompt (string)","optional: encoder configuration parameters"],"output_types":["text embedding (tensor, typically 768-1024 dimensions)","embedding cache (for repeated prompts)"],"categories":["text-generation-language","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-lightricks--comfyui-ltxvideo__cap_2","uri":"capability://image.visual.video.frame.extension.and.temporal.blending","name":"video frame extension and temporal blending","description":"Extends existing video sequences by generating additional frames that seamlessly blend with original footage. The system uses LTXVExtendSampler to process latent representations of video clips, applies temporal blending operations (LTXVBlendLatents) to smooth transitions between original and generated frames, and supports looping generation (LTXVLoopingSampler) for continuous video synthesis. Latent normalization (LTXVNormalizeLatents) ensures consistent quality across extended sequences.","intents":["Extend short video clips to longer durations without quality degradation","Create seamless transitions between multiple video segments","Generate looping video content for backgrounds or animations","Interpolate frames between existing video keyframes"],"best_for":["Video editors needing to extend footage without re-shooting","Motion designers creating looping background videos","Developers building video extension features in editing software"],"limitations":["Blending quality depends on temporal overlap between original and generated frames","Looping generation may accumulate artifacts over many iterations without normalization","Requires careful tuning of blend weights and latent normalization parameters","Cannot recover lost information from original video — only generates forward in time"],"requires":["ComfyUI-LTXVideo with LTXVExtendSampler, LTXVBlendLatents, LTXVLoopingSampler nodes","Existing video encoded as latent tensors","VAE model for decoding extended sequences","16GB+ VRAM"],"input_types":["latent tensor (original video frames)","latent tensor (generated frames to blend)","float (blend weight, 0.0-1.0)","integer (number of extension frames)"],"output_types":["latent tensor (blended video representation)","video frames (decoded extended sequence)"],"categories":["image-visual","data-processing-analysis"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-lightricks--comfyui-ltxvideo__cap_3","uri":"capability://planning.reasoning.structural.guidance.with.stg.and.apg.control.systems","name":"structural guidance with stg and apg control systems","description":"Applies spatial and temporal guidance during video generation to improve quality and semantic adherence without retraining the model. The system implements two guidance mechanisms: STG (Spatial-Temporal Guidance) for general quality improvement and APG (Adaptive Prompt Guidance) for semantic control. Nodes (STGGuiderNode, STGGuiderAdvancedNode, MultimodalGuiderNode) inject guidance signals into the diffusion process at configurable timesteps, modulating the denoising direction toward desired outputs while maintaining diversity.","intents":["Improve video quality and reduce artifacts during generation","Enforce semantic consistency with input prompts or reference images","Control generation style through advanced guidance parameters","Balance quality improvement with creative variation"],"best_for":["Users requiring high-quality video output with minimal artifacts","Developers building quality-controlled video generation pipelines","Researchers experimenting with guidance mechanisms in diffusion models"],"limitations":["Excessive guidance scale can reduce diversity and create unrealistic artifacts","Guidance adds computational overhead (typically 10-20% slower generation)","Requires careful tuning of guidance parameters for different prompts and styles","APG requires additional multimodal encoder weights and inference"],"requires":["ComfyUI-LTXVideo with STGGuiderNode or STGGuiderAdvancedNode","Base LTX-2 model and sampler","Optional: Multimodal encoder for APG (additional model weights)","16GB+ VRAM"],"input_types":["float (guidance scale, typically 1.0-15.0)","integer (guidance start/end timesteps)","text (prompt for APG)","image (reference for multimodal guidance)"],"output_types":["guided latent tensor (modified diffusion trajectory)","video frames (with improved quality)"],"categories":["planning-reasoning","safety-moderation"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-lightricks--comfyui-ltxvideo__cap_4","uri":"capability://automation.workflow.q8.quantization.for.low.vram.model.loading","name":"q8 quantization for low-vram model loading","description":"Reduces model memory footprint through 8-bit quantization, enabling LTX-2 inference on GPUs with limited VRAM (16GB or less). The system implements LTXVQ8LoraModelLoader and LowVRAMCheckpointLoader nodes that load model weights in quantized format, apply dynamic dequantization during inference, and optionally load LoRA adapters in quantized form. This approach trades minimal quality loss for significant memory savings (typically 40-50% reduction).","intents":["Run LTX-2 video generation on consumer-grade GPUs with 16GB VRAM","Reduce memory pressure when running multiple models simultaneously","Deploy video generation on cost-constrained hardware","Enable longer video generation without running out of memory"],"best_for":["Individual developers with consumer GPUs (RTX 4060, RTX 3080, etc.)","Teams deploying video generation on edge devices or cloud instances with limited VRAM","Researchers studying quantization effects on diffusion model quality"],"limitations":["Q8 quantization introduces minor quality degradation (typically imperceptible but measurable)","Dequantization adds ~5-10% inference latency overhead","Not compatible with all LoRA adapters — requires quantization-aware LoRA weights","Requires careful tuning of quantization parameters for different model architectures"],"requires":["ComfyUI-LTXVideo with LTXVQ8LoraModelLoader or LowVRAMCheckpointLoader","Quantized model weights (or quantization script to convert existing weights)","GPU with 16GB+ VRAM (8GB possible with aggressive settings)","bitsandbytes or similar quantization library"],"input_types":["model checkpoint path (quantized format)","optional: LoRA adapter path (quantized)","integer (quantization bits, typically 8)"],"output_types":["loaded model (in quantized format)","LoRA adapter (quantized)"],"categories":["automation-workflow","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-lightricks--comfyui-ltxvideo__cap_5","uri":"capability://automation.workflow.multi.gpu.model.distribution.and.memory.management","name":"multi-gpu model distribution and memory management","description":"Distributes model components across multiple GPUs to enable larger batch sizes and longer video generation on multi-GPU systems. The system implements LTXVGemmaCLIPModelLoaderMGPU and memory optimization nodes that partition the text encoder, diffusion model, and VAE across available devices, managing inter-device communication and synchronization. Automatic memory profiling (LowVRAMCheckpointLoader) detects available VRAM and adjusts model placement accordingly.","intents":["Generate longer videos or larger batches on multi-GPU systems","Distribute computational load across multiple GPUs for faster inference","Optimize memory usage across heterogeneous GPU setups","Enable batch video generation on enterprise hardware"],"best_for":["Teams with multi-GPU setups (2+ GPUs) running production video generation","Data centers deploying video generation services","Researchers running large-scale video generation experiments"],"limitations":["Inter-GPU communication overhead can reduce speedup below theoretical maximum (typically 1.5-1.8x for 2 GPUs)","Requires careful tuning of model partitioning for different GPU configurations","Not all model components benefit equally from distribution (bottlenecks may emerge)","Synchronization overhead increases with number of GPUs (diminishing returns beyond 4 GPUs)"],"requires":["ComfyUI-LTXVideo with LTXVGemmaCLIPModelLoaderMGPU","Multiple CUDA-capable GPUs (2+)","NVIDIA NCCL or similar multi-GPU communication library","Sufficient PCIe bandwidth for inter-GPU communication"],"input_types":["model checkpoint path","list of GPU device IDs","optional: memory allocation hints"],"output_types":["distributed model (components on different GPUs)","video frames (generated with multi-GPU acceleration)"],"categories":["automation-workflow","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-lightricks--comfyui-ltxvideo__cap_6","uri":"capability://data.processing.analysis.latent.space.manipulation.and.normalization","name":"latent space manipulation and normalization","description":"Provides low-level operations on compressed video representations (latent tensors) to enable advanced workflows without decoding to pixel space. The system implements nodes (LTXVSelectLatents, LTXVBlendLatents, LTXVNormalizeLatents, LTXVConcatenateLatents) that manipulate latent dimensions, blend multiple latent sequences, normalize distributions, and concatenate temporal sequences. These operations work directly in compressed space, enabling efficient composition of video generation results.","intents":["Select specific frames or frame ranges from generated video latents","Blend multiple generated videos in latent space for smooth transitions","Normalize latent distributions to prevent quality degradation in extended generation","Concatenate video segments without decoding to pixel space"],"best_for":["Advanced users building complex video composition workflows","Developers creating video editing tools with latent-space operations","Researchers studying latent space properties of video diffusion models"],"limitations":["Requires understanding of latent space structure and dimensions","Blending in latent space may produce artifacts not visible until decoding","Normalization parameters must be tuned for specific model architectures","Limited visibility into results until final VAE decoding (no real-time preview)"],"requires":["ComfyUI-LTXVideo with latent operation nodes (LTXVSelectLatents, LTXVBlendLatents, etc.)","Video encoded as latent tensors (from LTX-2 sampling or VAE encoding)","Understanding of latent tensor shapes and dimensions"],"input_types":["latent tensor (video representation)","integer (frame indices for selection)","float (blend weights)","optional: normalization parameters"],"output_types":["latent tensor (manipulated video representation)","video frames (after VAE decoding)"],"categories":["data-processing-analysis","image-visual"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-lightricks--comfyui-ltxvideo__cap_7","uri":"capability://data.processing.analysis.vae.encoding.and.decoding.with.video.support","name":"vae encoding and decoding with video support","description":"Converts between pixel-space video frames and compressed latent representations using a variational autoencoder optimized for temporal coherence. The system provides VAE encoder/decoder nodes that process video sequences frame-by-frame or in temporal chunks, maintaining consistency across frames while achieving 8-16x compression. Supports both standard VAE decoding and tiled decoding for memory-constrained scenarios.","intents":["Encode input videos into latent space for conditioning or analysis","Decode generated latent videos back to viewable pixel-space frames","Compress video sequences for efficient storage and processing","Enable memory-efficient video processing on limited-VRAM systems"],"best_for":["Users working with existing video footage as input to video generation","Developers building video processing pipelines","Teams needing efficient video compression for storage"],"limitations":["VAE decoding introduces minor quality loss (typically imperceptible)","Temporal consistency in VAE encoding depends on frame overlap and chunk size","Tiled decoding adds complexity and may introduce seams at tile boundaries","VAE model weights add to total memory footprint (typically 1-2GB)"],"requires":["ComfyUI-LTXVideo with VAE nodes","VAE model weights (typically included with LTX-2 distribution)","Input video in supported formats (MP4, WebM, PNG sequence, etc.)","8GB+ VRAM for standard VAE, 4GB+ for tiled VAE"],"input_types":["video frames (pixel-space, typically uint8 or float32)","optional: tiling parameters for memory-constrained encoding"],"output_types":["latent tensor (compressed video representation)","video frames (decoded from latents)"],"categories":["data-processing-analysis","image-visual"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-lightricks--comfyui-ltxvideo__cap_8","uri":"capability://image.visual.tiled.sampling.for.high.resolution.video.generation","name":"tiled sampling for high-resolution video generation","description":"Generates high-resolution videos by dividing the spatial domain into overlapping tiles, sampling each tile independently, and blending results at tile boundaries. The system implements LTXVTiledSampler that manages tile generation, overlap regions, and boundary blending to produce seamless high-resolution output without requiring proportional VRAM increases. Tile size and overlap are configurable to balance quality and memory usage.","intents":["Generate videos at resolutions higher than model's native output","Create high-resolution video on systems with limited VRAM","Improve spatial detail in generated videos","Enable 4K or higher video generation on consumer hardware"],"best_for":["Content creators needing high-resolution video output","Teams with memory-constrained systems requiring high-quality video","Developers building resolution-agnostic video generation services"],"limitations":["Tiling introduces potential artifacts at tile boundaries despite blending","Tile overlap reduces effective resolution gain (typically 1.5-2x max practical increase)","Sampling time increases with number of tiles (quadratic with resolution increase)","Requires careful tuning of tile size and overlap for different resolutions"],"requires":["ComfyUI-LTXVideo with LTXVTiledSampler","Base LTX-2 model and text encoder","Minimum 16GB VRAM (tiling reduces but doesn't eliminate VRAM requirements)","Patience for longer generation times"],"input_types":["text prompt","integer (target resolution)","integer (tile size, typically 512-768)","integer (tile overlap, typically 64-128)"],"output_types":["high-resolution latent tensor","high-resolution video frames"],"categories":["image-visual","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-lightricks--comfyui-ltxvideo__cap_9","uri":"capability://text.generation.language.prompt.enhancement.and.dynamic.conditioning","name":"prompt enhancement and dynamic conditioning","description":"Augments user prompts with automatically generated enhancements and applies dynamic conditioning during generation. The system provides utility nodes that expand prompts with style descriptors, quality keywords, and temporal directives, then injects these enhanced prompts into the diffusion process at configurable timesteps. Supports both static prompt enhancement and dynamic prompt scheduling that varies conditioning over generation timesteps.","intents":["Improve video quality by automatically adding quality-enhancing keywords to prompts","Apply style consistency across multiple generations","Control temporal aspects of video (motion speed, camera movement) through prompt scheduling","Enable non-expert users to generate high-quality videos without detailed prompt engineering"],"best_for":["Non-technical users wanting better results without prompt engineering","Teams building user-facing video generation interfaces","Developers creating prompt optimization tools"],"limitations":["Automatic enhancement may not suit all use cases or styles","Dynamic prompt scheduling adds complexity and requires careful tuning","Over-enhancement can reduce diversity and create generic results","Prompt scheduling requires understanding of diffusion timestep semantics"],"requires":["ComfyUI-LTXVideo with prompt enhancement nodes","Base LTX-2 model and text encoder","Optional: Custom prompt enhancement templates or models"],"input_types":["text (base prompt)","optional: style descriptor","optional: quality level (low/medium/high)","optional: prompt schedule (timestep-to-prompt mapping)"],"output_types":["enhanced prompt (text)","video frames (generated with enhanced conditioning)"],"categories":["text-generation-language","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":44,"verified":false,"data_access_risk":"low","permissions":["ComfyUI installation with LTX-2 model weights (comfy/ldm/lightricks in core)","Gemma text encoder model weights","Python 3.9+","CUDA-capable GPU with minimum 16GB VRAM (24GB+ recommended)","ComfyUI-LTXVideo custom nodes installed via ComfyUI Manager or manual installation","ComfyUI-LTXVideo with LTXVInContextSampler node","Input image in supported formats (PNG, JPEG, WebP)","VAE model for image encoding/decoding","Optional: IC-LoRA weights for structural control","16GB+ VRAM"],"failure_modes":["Requires significant VRAM (24GB+ recommended for full model, 16GB minimum with quantization)","Generation speed depends on number of denoising steps and video length (typically 30-120 seconds per generation)","Text encoder (Gemma) must be loaded separately and cached in memory","Output resolution and frame count fixed by model architecture (typically 768x512 or similar)","Generated motion may not perfectly match real-world physics or expected camera movements without IC-LoRA conditioning","Temporal consistency degrades over longer sequences (typically best for 5-15 second videos)","Requires careful prompt engineering to guide motion direction and style","IC-LoRA control adds computational overhead and requires additional model weights","Two-stage approach adds latency (typically 2-3x slower than single-stage generation)","Upscaling models may introduce artifacts or hallucinate details","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.557032185154139,"quality":0.35,"ecosystem":0.6000000000000001,"match_graph":0.25,"freshness":0.75,"weights":{"adoption":0.3,"quality":0.2,"ecosystem":0.15,"match_graph":0.3,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:21.550Z","last_scraped_at":"2026-05-03T13:58:44.860Z","last_commit":"2026-04-26T08:38:17Z"},"community":{"stars":3562,"forks":388,"weekly_downloads":null,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=lightricks--comfyui-ltxvideo","compare_url":"https://unfragile.ai/compare?artifact=lightricks--comfyui-ltxvideo"}},"signature":"KQaIiGBmZ65oQvgZ55X/NZr3GZhq9VvwP82beRZT+LhdBW0kYkt4hEaLUJEIspmqY8jL4z82qBKTbz7ujKL4BA==","signedAt":"2026-06-21T18:43:48.504Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/lightricks--comfyui-ltxvideo","artifact":"https://unfragile.ai/lightricks--comfyui-ltxvideo","verify":"https://unfragile.ai/api/v1/verify?slug=lightricks--comfyui-ltxvideo","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}