diff --git a/nodes/generator.py b/nodes/generator.py index 9aecb2e..2648f74 100644 --- a/nodes/generator.py +++ b/nodes/generator.py @@ -62,6 +62,18 @@ class OmniVoiceGenerate: "default": "", "tooltip": "Transcription of ref_audio. Connect a Whisper (or other STT) node for best results.", }), + "language": ( + ["auto", "English", "Chinese"], + { + "default": "auto", + "tooltip": ( + "Used in voice_design mode to select the instruct vocabulary.\n" + "'English' uses English instruct items (male, female, british accent …)\n" + "'Chinese' uses Chinese dialect items (男, 女, 四川话, 东北话 …)\n" + "Has no effect in voice_cloning mode (language is inferred from text)." + ), + }, + ), "instruct": ("STRING", { "default": "", "tooltip": ( @@ -113,11 +125,13 @@ class OmniVoiceGenerate: FUNCTION = "generate" CATEGORY = "OmniVoice" - def generate(self, model, text, mode, ref_audio=None, ref_text="", + def generate(self, model, text, mode, ref_audio=None, ref_text="", language="auto", instruct="", guidance_scale=2.0, speed=1.0, num_step=32, seed=0): if seed != 0: torch.manual_seed(seed) kwargs = {"text": text, "speed": speed, "num_step": num_step, "guidance_scale": guidance_scale} + if mode != "voice_cloning" and language and language != "auto": + kwargs["language"] = language if mode == "voice_cloning" and ref_audio is None: raise ValueError("voice_cloning mode requires ref_audio to be connected") diff --git a/nodes/voice_design.py b/nodes/voice_design.py index fdcad86..2dec9b2 100644 --- a/nodes/voice_design.py +++ b/nodes/voice_design.py @@ -11,7 +11,7 @@ class OmniVoiceVoiceDesign: "high pitch", "very high pitch", "whisper", ] - # Exactly the accents validated by the model's _resolve_instruct() + # Exactly the accents validated by the model's _resolve_instruct() for English ACCENTS = [ "none", "american accent", "australian accent", "british accent", @@ -20,18 +20,42 @@ class OmniVoiceVoiceDesign: "russian accent", ] + # Chinese dialect items validated by the model's _resolve_instruct() + ZH_GENDERS = ["none", "男", "女"] + ZH_AGES = ["none", "儿童", "少年", "青年", "中年", "老年"] + ZH_PITCHES = ["none", "极低音调", "低音调", "中音调", "高音调", "极高音调", "耳语"] + ZH_DIALECTS = [ + "none", + "东北话", "云南话", "四川话", "宁夏话", "桂林话", + "河南话", "济南话", "甘肃话", "石家庄话", "贵州话", + "陕西话", "青岛话", + ] + @classmethod def INPUT_TYPES(cls): return { "required": { + "language": ( + ["English", "Chinese"], + { + "default": "English", + "tooltip": "Selects the instruct vocabulary. Must match the language set in OmniVoice Generate.", + }, + ), "gender": (cls.GENDERS, {"default": "female", - "tooltip": "Voice gender."}), + "tooltip": "Voice gender (English). Ignored when language is Chinese — use zh_gender."}), "age": (cls.AGES, {"default": "none", - "tooltip": "Approximate age of the speaker."}), + "tooltip": "Age of the speaker (English). Ignored when language is Chinese — use zh_age."}), "pitch": (cls.PITCHES, {"default": "none", - "tooltip": "Pitch / register of the voice."}), + "tooltip": "Pitch (English). Ignored when language is Chinese — use zh_pitch."}), "accent": (cls.ACCENTS, {"default": "none", - "tooltip": "Accent validated by the model. Only these 10 are supported."}), + "tooltip": "Accent (English only, 10 supported values)."}), + }, + "optional": { + "zh_gender": (cls.ZH_GENDERS, {"default": "none", "tooltip": "声线性别 (Chinese mode)"}), + "zh_age": (cls.ZH_AGES, {"default": "none", "tooltip": "年龄段 (Chinese mode)"}), + "zh_pitch": (cls.ZH_PITCHES, {"default": "none", "tooltip": "音调 (Chinese mode)"}), + "zh_dialect": (cls.ZH_DIALECTS, {"default": "none", "tooltip": "方言/口音 (Chinese mode)"}), }, } @@ -40,6 +64,11 @@ class OmniVoiceVoiceDesign: FUNCTION = "compose" CATEGORY = "OmniVoice" - def compose(self, gender, age, pitch, accent): - parts = [v for v in [gender, age, pitch, accent] if v != "none"] - return (", ".join(parts),) + def compose(self, language, gender, age, pitch, accent, + zh_gender="none", zh_age="none", zh_pitch="none", zh_dialect="none"): + if language == "Chinese": + parts = [v for v in [zh_gender, zh_age, zh_pitch, zh_dialect] if v != "none"] + return (",".join(parts),) + else: + parts = [v for v in [gender, age, pitch, accent] if v != "none"] + return (", ".join(parts),)