diff --git a/nodes/selva_feature_extractor.py b/nodes/selva_feature_extractor.py index cb9b70b..958875c 100644 --- a/nodes/selva_feature_extractor.py +++ b/nodes/selva_feature_extractor.py @@ -58,15 +58,19 @@ class SelvaFeatureExtractor: "video": ("IMAGE",), "prompt": ("STRING", { "default": "", "multiline": True, - "tooltip": "Text prompt used by TextSynchformer to focus sync features on the relevant sound source. Should match the prompt used in SelvaSampler.", + "tooltip": "Describes the sounds to generate. Used to focus the visual sync features on motion relevant to the prompt — more specific prompts produce cleaner audio sync. Wire the prompt output directly to the Sampler so you only type it once.", }), }, "optional": { - "video_info": ("VHS_VIDEOINFO", {"tooltip": "Connect VHS LoadVideo info to auto-set fps."}), - "fps": ("FLOAT", {"default": 30.0, "min": 1.0, "max": 120.0, "step": 0.001}), + "video_info": ("VHS_VIDEOINFO", { + "tooltip": "VHS_VIDEOINFO from VHS LoadVideo. Automatically sets the correct source fps — always connect this when loading video with VHS nodes.", + }), + "fps": ("FLOAT", {"default": 30.0, "min": 1.0, "max": 120.0, "step": 0.001, + "tooltip": "Source fps of the input video. Ignored when video_info is connected."}), "duration": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 30.0, "step": 0.1, - "tooltip": "Override duration in seconds. 0 = infer from video length and fps."}), - "cache_dir": ("STRING", {"default": "", "tooltip": "Directory for cached .npz features. Empty = temp dir."}), + "tooltip": "Clip duration in seconds. 0 = use the full video length. Clamped to actual video length if too long."}), + "cache_dir": ("STRING", {"default": "", + "tooltip": "Where to store extracted feature files (.npz). Leave empty for the system temp directory. Reusing the same directory enables instant cache hits on re-runs."}), }, } diff --git a/nodes/selva_model_loader.py b/nodes/selva_model_loader.py index dedfab5..2eec4a8 100644 --- a/nodes/selva_model_loader.py +++ b/nodes/selva_model_loader.py @@ -87,9 +87,15 @@ class SelvaModelLoader: def INPUT_TYPES(cls): return { "required": { - "variant": (list(_VARIANTS.keys()),), - "precision": (["bf16", "fp16", "fp32"],), - "offload_strategy": (["auto", "keep_in_vram", "offload_to_cpu"],), + "variant": (list(_VARIANTS.keys()), { + "tooltip": "Model size and output sample rate. small_16k is fastest (16 kHz). 44k variants output 44.1 kHz. larger = better quality, more VRAM.", + }), + "precision": (["bf16", "fp16", "fp32"], { + "tooltip": "Compute dtype. bf16 is recommended on Ampere+ GPUs. fp16 for older NVIDIA hardware. fp32 if you see NaN outputs.", + }), + "offload_strategy": (["auto", "keep_in_vram", "offload_to_cpu"], { + "tooltip": "auto picks keep_in_vram if ≥16 GB VRAM is free, otherwise offload_to_cpu. offload_to_cpu moves weights to RAM between nodes, saving VRAM at the cost of speed.", + }), } } diff --git a/nodes/selva_sampler.py b/nodes/selva_sampler.py index 7f97bf0..fb9a40f 100644 --- a/nodes/selva_sampler.py +++ b/nodes/selva_sampler.py @@ -13,20 +13,20 @@ class SelvaSampler: "features": ("SELVA_FEATURES",), "prompt": ("STRING", { "default": "", "multiline": True, - "tooltip": "CLIP text for audio generation. Leave empty to reuse the prompt from SelvaFeatureExtractor.", + "tooltip": "Sound description for CLIP text conditioning. Leave empty to reuse the prompt from the Feature Extractor (wire its prompt output here). Changing this without re-extracting features shifts CLIP conditioning but not sync features.", }), "negative_prompt": ("STRING", { "default": "", "multiline": False, - "tooltip": "Sounds to steer away from, e.g. 'wind noise, background music'.", + "tooltip": "Sounds to suppress, e.g. 'speech, music, wind noise'. Steered away from via CFG. Leave empty for unconditional guidance baseline.", }), "duration": ("FLOAT", { "default": 0.0, "min": 0.0, "max": 30.0, "step": 0.1, - "tooltip": "Audio duration in seconds. 0 = use duration from features.", + "tooltip": "Output audio length in seconds. 0 = match the video duration stored in features.", }), "steps": ("INT", {"default": 25, "min": 1, "max": 200, - "tooltip": "Euler steps (25 is SelVA default)."}), + "tooltip": "Euler steps for the flow matching ODE. 25 is the SelVA default. Diminishing returns above 50; below 10 may sound rough."}), "cfg_strength": ("FLOAT", {"default": 4.5, "min": 1.0, "max": 20.0, "step": 0.1, - "tooltip": "CFG scale (SelVA default is 4.5)."}), + "tooltip": "Classifier-free guidance scale. Higher values follow the prompt more strictly but can introduce artifacts. SelVA default is 4.5; useful range is roughly 3–7."}), "seed": ("INT", {"default": 0, "min": 0, "max": 0xFFFFFFFF}), }, "optional": {},