docs: improve tooltips on all three SelVA nodes
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -58,15 +58,19 @@ class SelvaFeatureExtractor:
|
|||||||
"video": ("IMAGE",),
|
"video": ("IMAGE",),
|
||||||
"prompt": ("STRING", {
|
"prompt": ("STRING", {
|
||||||
"default": "", "multiline": True,
|
"default": "", "multiline": True,
|
||||||
"tooltip": "Text prompt used by TextSynchformer to focus sync features on the relevant sound source. Should match the prompt used in SelvaSampler.",
|
"tooltip": "Describes the sounds to generate. Used to focus the visual sync features on motion relevant to the prompt — more specific prompts produce cleaner audio sync. Wire the prompt output directly to the Sampler so you only type it once.",
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
"optional": {
|
"optional": {
|
||||||
"video_info": ("VHS_VIDEOINFO", {"tooltip": "Connect VHS LoadVideo info to auto-set fps."}),
|
"video_info": ("VHS_VIDEOINFO", {
|
||||||
"fps": ("FLOAT", {"default": 30.0, "min": 1.0, "max": 120.0, "step": 0.001}),
|
"tooltip": "VHS_VIDEOINFO from VHS LoadVideo. Automatically sets the correct source fps — always connect this when loading video with VHS nodes.",
|
||||||
|
}),
|
||||||
|
"fps": ("FLOAT", {"default": 30.0, "min": 1.0, "max": 120.0, "step": 0.001,
|
||||||
|
"tooltip": "Source fps of the input video. Ignored when video_info is connected."}),
|
||||||
"duration": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 30.0, "step": 0.1,
|
"duration": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 30.0, "step": 0.1,
|
||||||
"tooltip": "Override duration in seconds. 0 = infer from video length and fps."}),
|
"tooltip": "Clip duration in seconds. 0 = use the full video length. Clamped to actual video length if too long."}),
|
||||||
"cache_dir": ("STRING", {"default": "", "tooltip": "Directory for cached .npz features. Empty = temp dir."}),
|
"cache_dir": ("STRING", {"default": "",
|
||||||
|
"tooltip": "Where to store extracted feature files (.npz). Leave empty for the system temp directory. Reusing the same directory enables instant cache hits on re-runs."}),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -87,9 +87,15 @@ class SelvaModelLoader:
|
|||||||
def INPUT_TYPES(cls):
|
def INPUT_TYPES(cls):
|
||||||
return {
|
return {
|
||||||
"required": {
|
"required": {
|
||||||
"variant": (list(_VARIANTS.keys()),),
|
"variant": (list(_VARIANTS.keys()), {
|
||||||
"precision": (["bf16", "fp16", "fp32"],),
|
"tooltip": "Model size and output sample rate. small_16k is fastest (16 kHz). 44k variants output 44.1 kHz. larger = better quality, more VRAM.",
|
||||||
"offload_strategy": (["auto", "keep_in_vram", "offload_to_cpu"],),
|
}),
|
||||||
|
"precision": (["bf16", "fp16", "fp32"], {
|
||||||
|
"tooltip": "Compute dtype. bf16 is recommended on Ampere+ GPUs. fp16 for older NVIDIA hardware. fp32 if you see NaN outputs.",
|
||||||
|
}),
|
||||||
|
"offload_strategy": (["auto", "keep_in_vram", "offload_to_cpu"], {
|
||||||
|
"tooltip": "auto picks keep_in_vram if ≥16 GB VRAM is free, otherwise offload_to_cpu. offload_to_cpu moves weights to RAM between nodes, saving VRAM at the cost of speed.",
|
||||||
|
}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,20 +13,20 @@ class SelvaSampler:
|
|||||||
"features": ("SELVA_FEATURES",),
|
"features": ("SELVA_FEATURES",),
|
||||||
"prompt": ("STRING", {
|
"prompt": ("STRING", {
|
||||||
"default": "", "multiline": True,
|
"default": "", "multiline": True,
|
||||||
"tooltip": "CLIP text for audio generation. Leave empty to reuse the prompt from SelvaFeatureExtractor.",
|
"tooltip": "Sound description for CLIP text conditioning. Leave empty to reuse the prompt from the Feature Extractor (wire its prompt output here). Changing this without re-extracting features shifts CLIP conditioning but not sync features.",
|
||||||
}),
|
}),
|
||||||
"negative_prompt": ("STRING", {
|
"negative_prompt": ("STRING", {
|
||||||
"default": "", "multiline": False,
|
"default": "", "multiline": False,
|
||||||
"tooltip": "Sounds to steer away from, e.g. 'wind noise, background music'.",
|
"tooltip": "Sounds to suppress, e.g. 'speech, music, wind noise'. Steered away from via CFG. Leave empty for unconditional guidance baseline.",
|
||||||
}),
|
}),
|
||||||
"duration": ("FLOAT", {
|
"duration": ("FLOAT", {
|
||||||
"default": 0.0, "min": 0.0, "max": 30.0, "step": 0.1,
|
"default": 0.0, "min": 0.0, "max": 30.0, "step": 0.1,
|
||||||
"tooltip": "Audio duration in seconds. 0 = use duration from features.",
|
"tooltip": "Output audio length in seconds. 0 = match the video duration stored in features.",
|
||||||
}),
|
}),
|
||||||
"steps": ("INT", {"default": 25, "min": 1, "max": 200,
|
"steps": ("INT", {"default": 25, "min": 1, "max": 200,
|
||||||
"tooltip": "Euler steps (25 is SelVA default)."}),
|
"tooltip": "Euler steps for the flow matching ODE. 25 is the SelVA default. Diminishing returns above 50; below 10 may sound rough."}),
|
||||||
"cfg_strength": ("FLOAT", {"default": 4.5, "min": 1.0, "max": 20.0, "step": 0.1,
|
"cfg_strength": ("FLOAT", {"default": 4.5, "min": 1.0, "max": 20.0, "step": 0.1,
|
||||||
"tooltip": "CFG scale (SelVA default is 4.5)."}),
|
"tooltip": "Classifier-free guidance scale. Higher values follow the prompt more strictly but can introduce artifacts. SelVA default is 4.5; useful range is roughly 3–7."}),
|
||||||
"seed": ("INT", {"default": 0, "min": 0, "max": 0xFFFFFFFF}),
|
"seed": ("INT", {"default": 0, "min": 0, "max": 0xFFFFFFFF}),
|
||||||
},
|
},
|
||||||
"optional": {},
|
"optional": {},
|
||||||
|
|||||||
Reference in New Issue
Block a user