feat: add Voice Design node + language and guidance_scale to Generate
OmniVoiceVoiceDesign: structured dropdowns for gender/age/pitch/accent that compose into an instruct string — wire to Generate's instruct input. OmniVoiceGenerate: new optional language dropdown (auto + 11 languages) and guidance_scale (CFG, default 2.0) parameters. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+3
-1
@@ -1,4 +1,4 @@
|
|||||||
from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate, OmniVoiceEpubLoader, OmniVoiceVoicePreset, OmniVoiceMixVoices
|
from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate, OmniVoiceEpubLoader, OmniVoiceVoicePreset, OmniVoiceMixVoices, OmniVoiceVoiceDesign
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
NODE_CLASS_MAPPINGS = {
|
||||||
"OmniVoiceModelLoader": OmniVoiceModelLoader,
|
"OmniVoiceModelLoader": OmniVoiceModelLoader,
|
||||||
@@ -6,6 +6,7 @@ NODE_CLASS_MAPPINGS = {
|
|||||||
"OmniVoiceEpubLoader": OmniVoiceEpubLoader,
|
"OmniVoiceEpubLoader": OmniVoiceEpubLoader,
|
||||||
"OmniVoiceVoicePreset": OmniVoiceVoicePreset,
|
"OmniVoiceVoicePreset": OmniVoiceVoicePreset,
|
||||||
"OmniVoiceMixVoices": OmniVoiceMixVoices,
|
"OmniVoiceMixVoices": OmniVoiceMixVoices,
|
||||||
|
"OmniVoiceVoiceDesign": OmniVoiceVoiceDesign,
|
||||||
}
|
}
|
||||||
|
|
||||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||||
@@ -14,6 +15,7 @@ NODE_DISPLAY_NAME_MAPPINGS = {
|
|||||||
"OmniVoiceEpubLoader": "OmniVoice EPUB Loader",
|
"OmniVoiceEpubLoader": "OmniVoice EPUB Loader",
|
||||||
"OmniVoiceVoicePreset": "OmniVoice Voice Preset",
|
"OmniVoiceVoicePreset": "OmniVoice Voice Preset",
|
||||||
"OmniVoiceMixVoices": "OmniVoice Mix Voices",
|
"OmniVoiceMixVoices": "OmniVoice Mix Voices",
|
||||||
|
"OmniVoiceVoiceDesign": "OmniVoice Voice Design",
|
||||||
}
|
}
|
||||||
|
|
||||||
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
|
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
|
||||||
|
|||||||
+2
-1
@@ -3,5 +3,6 @@ from .generator import OmniVoiceGenerate
|
|||||||
from .epub_loader import OmniVoiceEpubLoader
|
from .epub_loader import OmniVoiceEpubLoader
|
||||||
from .voice_presets import OmniVoiceVoicePreset
|
from .voice_presets import OmniVoiceVoicePreset
|
||||||
from .mix_voices import OmniVoiceMixVoices
|
from .mix_voices import OmniVoiceMixVoices
|
||||||
|
from .voice_design import OmniVoiceVoiceDesign
|
||||||
|
|
||||||
__all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate", "OmniVoiceEpubLoader", "OmniVoiceVoicePreset", "OmniVoiceMixVoices"]
|
__all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate", "OmniVoiceEpubLoader", "OmniVoiceVoicePreset", "OmniVoiceMixVoices", "OmniVoiceVoiceDesign"]
|
||||||
|
|||||||
+20
-2
@@ -62,6 +62,14 @@ class OmniVoiceGenerate:
|
|||||||
"default": "",
|
"default": "",
|
||||||
"tooltip": "Transcription of ref_audio. Connect a Whisper (or other STT) node for best results.",
|
"tooltip": "Transcription of ref_audio. Connect a Whisper (or other STT) node for best results.",
|
||||||
}),
|
}),
|
||||||
|
"language": (
|
||||||
|
["auto", "English", "Chinese", "Japanese", "Korean", "French",
|
||||||
|
"Spanish", "German", "Portuguese", "Russian", "Arabic", "Hindi"],
|
||||||
|
{
|
||||||
|
"default": "auto",
|
||||||
|
"tooltip": "Language of the text to synthesize. 'auto' lets the model detect it.",
|
||||||
|
},
|
||||||
|
),
|
||||||
"instruct": ("STRING", {
|
"instruct": ("STRING", {
|
||||||
"default": "",
|
"default": "",
|
||||||
"tooltip": (
|
"tooltip": (
|
||||||
@@ -79,6 +87,13 @@ class OmniVoiceGenerate:
|
|||||||
"EXAMPLE: female, high pitch, british accent"
|
"EXAMPLE: female, high pitch, british accent"
|
||||||
),
|
),
|
||||||
}),
|
}),
|
||||||
|
"guidance_scale": ("FLOAT", {
|
||||||
|
"default": 2.0, "min": 0.0, "max": 20.0, "step": 0.1,
|
||||||
|
"tooltip": (
|
||||||
|
"Classifier-free guidance scale. Higher = more faithful to the reference/instruct, "
|
||||||
|
"but can over-saturate. 2.0 is a good default."
|
||||||
|
),
|
||||||
|
}),
|
||||||
"speed": ("FLOAT", {
|
"speed": ("FLOAT", {
|
||||||
"default": 1.0, "min": 0.1, "max": 3.0, "step": 0.1,
|
"default": 1.0, "min": 0.1, "max": 3.0, "step": 0.1,
|
||||||
"tooltip": "Playback speed multiplier. 1.0 = normal, >1.0 = faster, <1.0 = slower.",
|
"tooltip": "Playback speed multiplier. 1.0 = normal, >1.0 = faster, <1.0 = slower.",
|
||||||
@@ -104,10 +119,13 @@ class OmniVoiceGenerate:
|
|||||||
FUNCTION = "generate"
|
FUNCTION = "generate"
|
||||||
CATEGORY = "OmniVoice"
|
CATEGORY = "OmniVoice"
|
||||||
|
|
||||||
def generate(self, model, text, mode, ref_audio=None, ref_text="", instruct="", speed=1.0, num_step=32, seed=0):
|
def generate(self, model, text, mode, ref_audio=None, ref_text="", language="auto",
|
||||||
|
instruct="", guidance_scale=2.0, speed=1.0, num_step=32, seed=0):
|
||||||
if seed != 0:
|
if seed != 0:
|
||||||
torch.manual_seed(seed)
|
torch.manual_seed(seed)
|
||||||
kwargs = {"text": text, "speed": speed, "num_step": num_step}
|
kwargs = {"text": text, "speed": speed, "num_step": num_step, "guidance_scale": guidance_scale}
|
||||||
|
if language != "auto":
|
||||||
|
kwargs["language"] = language
|
||||||
|
|
||||||
if mode == "voice_cloning" and ref_audio is None:
|
if mode == "voice_cloning" and ref_audio is None:
|
||||||
raise ValueError("voice_cloning mode requires ref_audio to be connected")
|
raise ValueError("voice_cloning mode requires ref_audio to be connected")
|
||||||
|
|||||||
@@ -0,0 +1,32 @@
|
|||||||
|
class OmniVoiceVoiceDesign:
|
||||||
|
"""Compose a voice design instruct string from structured dropdowns."""
|
||||||
|
|
||||||
|
GENDERS = ["none", "male", "female"]
|
||||||
|
AGES = ["none", "child", "teenager", "young adult", "middle-aged", "elderly"]
|
||||||
|
PITCHES = ["none", "very low pitch", "low pitch", "moderate pitch", "high pitch", "very high pitch", "whisper"]
|
||||||
|
ACCENTS = [
|
||||||
|
"none",
|
||||||
|
"american accent", "british accent", "australian accent", "canadian accent",
|
||||||
|
"indian accent", "chinese accent", "japanese accent", "korean accent",
|
||||||
|
"portuguese accent", "russian accent",
|
||||||
|
]
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(cls):
|
||||||
|
return {
|
||||||
|
"required": {
|
||||||
|
"gender": (cls.GENDERS, {"default": "female"}),
|
||||||
|
"age": (cls.AGES, {"default": "none"}),
|
||||||
|
"pitch": (cls.PITCHES, {"default": "none"}),
|
||||||
|
"accent": (cls.ACCENTS, {"default": "none"}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("STRING",)
|
||||||
|
RETURN_NAMES = ("instruct",)
|
||||||
|
FUNCTION = "compose"
|
||||||
|
CATEGORY = "OmniVoice"
|
||||||
|
|
||||||
|
def compose(self, gender, age, pitch, accent):
|
||||||
|
parts = [v for v in [gender, age, pitch, accent] if v != "none"]
|
||||||
|
return (", ".join(parts),)
|
||||||
Reference in New Issue
Block a user