feat: add Voice Design node + language and guidance_scale to Generate

OmniVoiceVoiceDesign: structured dropdowns for gender/age/pitch/accent
that compose into an instruct string — wire to Generate's instruct input.

OmniVoiceGenerate: new optional language dropdown (auto + 11 languages)
and guidance_scale (CFG, default 2.0) parameters.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-05 20:02:06 +02:00
parent 97ed0f209f
commit c1558efad9
4 changed files with 57 additions and 4 deletions
+20 -2
View File
@@ -62,6 +62,14 @@ class OmniVoiceGenerate:
"default": "",
"tooltip": "Transcription of ref_audio. Connect a Whisper (or other STT) node for best results.",
}),
"language": (
["auto", "English", "Chinese", "Japanese", "Korean", "French",
"Spanish", "German", "Portuguese", "Russian", "Arabic", "Hindi"],
{
"default": "auto",
"tooltip": "Language of the text to synthesize. 'auto' lets the model detect it.",
},
),
"instruct": ("STRING", {
"default": "",
"tooltip": (
@@ -79,6 +87,13 @@ class OmniVoiceGenerate:
"EXAMPLE: female, high pitch, british accent"
),
}),
"guidance_scale": ("FLOAT", {
"default": 2.0, "min": 0.0, "max": 20.0, "step": 0.1,
"tooltip": (
"Classifier-free guidance scale. Higher = more faithful to the reference/instruct, "
"but can over-saturate. 2.0 is a good default."
),
}),
"speed": ("FLOAT", {
"default": 1.0, "min": 0.1, "max": 3.0, "step": 0.1,
"tooltip": "Playback speed multiplier. 1.0 = normal, >1.0 = faster, <1.0 = slower.",
@@ -104,10 +119,13 @@ class OmniVoiceGenerate:
FUNCTION = "generate"
CATEGORY = "OmniVoice"
def generate(self, model, text, mode, ref_audio=None, ref_text="", instruct="", speed=1.0, num_step=32, seed=0):
def generate(self, model, text, mode, ref_audio=None, ref_text="", language="auto",
instruct="", guidance_scale=2.0, speed=1.0, num_step=32, seed=0):
if seed != 0:
torch.manual_seed(seed)
kwargs = {"text": text, "speed": speed, "num_step": num_step}
kwargs = {"text": text, "speed": speed, "num_step": num_step, "guidance_scale": guidance_scale}
if language != "auto":
kwargs["language"] = language
if mode == "voice_cloning" and ref_audio is None:
raise ValueError("voice_cloning mode requires ref_audio to be connected")