diff --git a/__init__.py b/__init__.py index 4d810da..b5e605b 100644 --- a/__init__.py +++ b/__init__.py @@ -1,4 +1,4 @@ -from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate, OmniVoiceEpubLoader, OmniVoiceVoicePreset, OmniVoiceMixVoices +from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate, OmniVoiceEpubLoader, OmniVoiceVoicePreset, OmniVoiceMixVoices, OmniVoiceVoiceDesign NODE_CLASS_MAPPINGS = { "OmniVoiceModelLoader": OmniVoiceModelLoader, @@ -6,6 +6,7 @@ NODE_CLASS_MAPPINGS = { "OmniVoiceEpubLoader": OmniVoiceEpubLoader, "OmniVoiceVoicePreset": OmniVoiceVoicePreset, "OmniVoiceMixVoices": OmniVoiceMixVoices, + "OmniVoiceVoiceDesign": OmniVoiceVoiceDesign, } NODE_DISPLAY_NAME_MAPPINGS = { @@ -14,6 +15,7 @@ NODE_DISPLAY_NAME_MAPPINGS = { "OmniVoiceEpubLoader": "OmniVoice EPUB Loader", "OmniVoiceVoicePreset": "OmniVoice Voice Preset", "OmniVoiceMixVoices": "OmniVoice Mix Voices", + "OmniVoiceVoiceDesign": "OmniVoice Voice Design", } __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"] diff --git a/nodes/__init__.py b/nodes/__init__.py index 557d7e0..dd8be47 100644 --- a/nodes/__init__.py +++ b/nodes/__init__.py @@ -3,5 +3,6 @@ from .generator import OmniVoiceGenerate from .epub_loader import OmniVoiceEpubLoader from .voice_presets import OmniVoiceVoicePreset from .mix_voices import OmniVoiceMixVoices +from .voice_design import OmniVoiceVoiceDesign -__all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate", "OmniVoiceEpubLoader", "OmniVoiceVoicePreset", "OmniVoiceMixVoices"] +__all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate", "OmniVoiceEpubLoader", "OmniVoiceVoicePreset", "OmniVoiceMixVoices", "OmniVoiceVoiceDesign"] diff --git a/nodes/generator.py b/nodes/generator.py index 5a89eaa..bc0f2af 100644 --- a/nodes/generator.py +++ b/nodes/generator.py @@ -62,6 +62,14 @@ class OmniVoiceGenerate: "default": "", "tooltip": "Transcription of ref_audio. Connect a Whisper (or other STT) node for best results.", }), + "language": ( + ["auto", "English", "Chinese", "Japanese", "Korean", "French", + "Spanish", "German", "Portuguese", "Russian", "Arabic", "Hindi"], + { + "default": "auto", + "tooltip": "Language of the text to synthesize. 'auto' lets the model detect it.", + }, + ), "instruct": ("STRING", { "default": "", "tooltip": ( @@ -79,6 +87,13 @@ class OmniVoiceGenerate: "EXAMPLE: female, high pitch, british accent" ), }), + "guidance_scale": ("FLOAT", { + "default": 2.0, "min": 0.0, "max": 20.0, "step": 0.1, + "tooltip": ( + "Classifier-free guidance scale. Higher = more faithful to the reference/instruct, " + "but can over-saturate. 2.0 is a good default." + ), + }), "speed": ("FLOAT", { "default": 1.0, "min": 0.1, "max": 3.0, "step": 0.1, "tooltip": "Playback speed multiplier. 1.0 = normal, >1.0 = faster, <1.0 = slower.", @@ -104,10 +119,13 @@ class OmniVoiceGenerate: FUNCTION = "generate" CATEGORY = "OmniVoice" - def generate(self, model, text, mode, ref_audio=None, ref_text="", instruct="", speed=1.0, num_step=32, seed=0): + def generate(self, model, text, mode, ref_audio=None, ref_text="", language="auto", + instruct="", guidance_scale=2.0, speed=1.0, num_step=32, seed=0): if seed != 0: torch.manual_seed(seed) - kwargs = {"text": text, "speed": speed, "num_step": num_step} + kwargs = {"text": text, "speed": speed, "num_step": num_step, "guidance_scale": guidance_scale} + if language != "auto": + kwargs["language"] = language if mode == "voice_cloning" and ref_audio is None: raise ValueError("voice_cloning mode requires ref_audio to be connected") diff --git a/nodes/voice_design.py b/nodes/voice_design.py new file mode 100644 index 0000000..4ae16ee --- /dev/null +++ b/nodes/voice_design.py @@ -0,0 +1,32 @@ +class OmniVoiceVoiceDesign: + """Compose a voice design instruct string from structured dropdowns.""" + + GENDERS = ["none", "male", "female"] + AGES = ["none", "child", "teenager", "young adult", "middle-aged", "elderly"] + PITCHES = ["none", "very low pitch", "low pitch", "moderate pitch", "high pitch", "very high pitch", "whisper"] + ACCENTS = [ + "none", + "american accent", "british accent", "australian accent", "canadian accent", + "indian accent", "chinese accent", "japanese accent", "korean accent", + "portuguese accent", "russian accent", + ] + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "gender": (cls.GENDERS, {"default": "female"}), + "age": (cls.AGES, {"default": "none"}), + "pitch": (cls.PITCHES, {"default": "none"}), + "accent": (cls.ACCENTS, {"default": "none"}), + }, + } + + RETURN_TYPES = ("STRING",) + RETURN_NAMES = ("instruct",) + FUNCTION = "compose" + CATEGORY = "OmniVoice" + + def compose(self, gender, age, pitch, accent): + parts = [v for v in [gender, age, pitch, accent] if v != "none"] + return (", ".join(parts),)