docs: improve tooltips on all three SelVA nodes

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-04 18:10:05 +02:00
parent 57f56c04e2
commit 429810db5b
3 changed files with 23 additions and 13 deletions
@@ -58,15 +58,19 @@ class SelvaFeatureExtractor:
                "video":  ("IMAGE",),
                "prompt": ("STRING", {
                    "default": "", "multiline": True,
-                    "tooltip": "Text prompt used by TextSynchformer to focus sync features on the relevant sound source. Should match the prompt used in SelvaSampler.",
+                    "tooltip": "Describes the sounds to generate. Used to focus the visual sync features on motion relevant to the prompt — more specific prompts produce cleaner audio sync. Wire the prompt output directly to the Sampler so you only type it once.",
                }),
            },
            "optional": {
-                "video_info": ("VHS_VIDEOINFO", {"tooltip": "Connect VHS LoadVideo info to auto-set fps."}),
-                "fps":      ("FLOAT", {"default": 30.0, "min": 1.0, "max": 120.0, "step": 0.001}),
+                "video_info": ("VHS_VIDEOINFO", {
+                    "tooltip": "VHS_VIDEOINFO from VHS LoadVideo. Automatically sets the correct source fps — always connect this when loading video with VHS nodes.",
+                }),
+                "fps":      ("FLOAT", {"default": 30.0, "min": 1.0, "max": 120.0, "step": 0.001,
+                                       "tooltip": "Source fps of the input video. Ignored when video_info is connected."}),
                "duration": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 30.0, "step": 0.1,
-                                       "tooltip": "Override duration in seconds. 0 = infer from video length and fps."}),
-                "cache_dir": ("STRING", {"default": "", "tooltip": "Directory for cached .npz features. Empty = temp dir."}),
+                                       "tooltip": "Clip duration in seconds. 0 = use the full video length. Clamped to actual video length if too long."}),
+                "cache_dir": ("STRING", {"default": "",
+                                         "tooltip": "Where to store extracted feature files (.npz). Leave empty for the system temp directory. Reusing the same directory enables instant cache hits on re-runs."}),
            },
        }

@@ -87,9 +87,15 @@ class SelvaModelLoader:
    def INPUT_TYPES(cls):
        return {
            "required": {
-                "variant": (list(_VARIANTS.keys()),),
-                "precision": (["bf16", "fp16", "fp32"],),
-                "offload_strategy": (["auto", "keep_in_vram", "offload_to_cpu"],),
+                "variant": (list(_VARIANTS.keys()), {
+                    "tooltip": "Model size and output sample rate. small_16k is fastest (16 kHz). 44k variants output 44.1 kHz. larger = better quality, more VRAM.",
+                }),
+                "precision": (["bf16", "fp16", "fp32"], {
+                    "tooltip": "Compute dtype. bf16 is recommended on Ampere+ GPUs. fp16 for older NVIDIA hardware. fp32 if you see NaN outputs.",
+                }),
+                "offload_strategy": (["auto", "keep_in_vram", "offload_to_cpu"], {
+                    "tooltip": "auto picks keep_in_vram if ≥16 GB VRAM is free, otherwise offload_to_cpu. offload_to_cpu moves weights to RAM between nodes, saving VRAM at the cost of speed.",
+                }),
            }
        }

@@ -13,20 +13,20 @@ class SelvaSampler:
                "features": ("SELVA_FEATURES",),
                "prompt":   ("STRING", {
                    "default": "", "multiline": True,
-                    "tooltip": "CLIP text for audio generation. Leave empty to reuse the prompt from SelvaFeatureExtractor.",
+                    "tooltip": "Sound description for CLIP text conditioning. Leave empty to reuse the prompt from the Feature Extractor (wire its prompt output here). Changing this without re-extracting features shifts CLIP conditioning but not sync features.",
                }),
                "negative_prompt": ("STRING", {
                    "default": "", "multiline": False,
-                    "tooltip": "Sounds to steer away from, e.g. 'wind noise, background music'.",
+                    "tooltip": "Sounds to suppress, e.g. 'speech, music, wind noise'. Steered away from via CFG. Leave empty for unconditional guidance baseline.",
                }),
                "duration": ("FLOAT", {
                    "default": 0.0, "min": 0.0, "max": 30.0, "step": 0.1,
-                    "tooltip": "Audio duration in seconds. 0 = use duration from features.",
+                    "tooltip": "Output audio length in seconds. 0 = match the video duration stored in features.",
                }),
                "steps":    ("INT",   {"default": 25,  "min": 1,   "max": 200,
-                                       "tooltip": "Euler steps (25 is SelVA default)."}),
+                                       "tooltip": "Euler steps for the flow matching ODE. 25 is the SelVA default. Diminishing returns above 50; below 10 may sound rough."}),
                "cfg_strength": ("FLOAT", {"default": 4.5, "min": 1.0, "max": 20.0, "step": 0.1,
-                                           "tooltip": "CFG scale (SelVA default is 4.5)."}),
+                                           "tooltip": "Classifier-free guidance scale. Higher values follow the prompt more strictly but can introduce artifacts. SelVA default is 4.5; useful range is roughly 3–7."}),
                "seed":     ("INT",   {"default": 0,   "min": 0,   "max": 0xFFFFFFFF}),
            },
            "optional": {},