feat: add language selector for voice_design + Chinese instruct support
- Generate: language dropdown (auto/English/Chinese), passed only in voice_design and auto_voice modes where it selects the instruct vocab - VoiceDesign: Chinese mode with dialect/age/pitch/gender dropdowns using the model's validated Chinese instruct vocabulary (全角逗号) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+15
-1
@@ -62,6 +62,18 @@ class OmniVoiceGenerate:
|
|||||||
"default": "",
|
"default": "",
|
||||||
"tooltip": "Transcription of ref_audio. Connect a Whisper (or other STT) node for best results.",
|
"tooltip": "Transcription of ref_audio. Connect a Whisper (or other STT) node for best results.",
|
||||||
}),
|
}),
|
||||||
|
"language": (
|
||||||
|
["auto", "English", "Chinese"],
|
||||||
|
{
|
||||||
|
"default": "auto",
|
||||||
|
"tooltip": (
|
||||||
|
"Used in voice_design mode to select the instruct vocabulary.\n"
|
||||||
|
"'English' uses English instruct items (male, female, british accent …)\n"
|
||||||
|
"'Chinese' uses Chinese dialect items (男, 女, 四川话, 东北话 …)\n"
|
||||||
|
"Has no effect in voice_cloning mode (language is inferred from text)."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
),
|
||||||
"instruct": ("STRING", {
|
"instruct": ("STRING", {
|
||||||
"default": "",
|
"default": "",
|
||||||
"tooltip": (
|
"tooltip": (
|
||||||
@@ -113,11 +125,13 @@ class OmniVoiceGenerate:
|
|||||||
FUNCTION = "generate"
|
FUNCTION = "generate"
|
||||||
CATEGORY = "OmniVoice"
|
CATEGORY = "OmniVoice"
|
||||||
|
|
||||||
def generate(self, model, text, mode, ref_audio=None, ref_text="",
|
def generate(self, model, text, mode, ref_audio=None, ref_text="", language="auto",
|
||||||
instruct="", guidance_scale=2.0, speed=1.0, num_step=32, seed=0):
|
instruct="", guidance_scale=2.0, speed=1.0, num_step=32, seed=0):
|
||||||
if seed != 0:
|
if seed != 0:
|
||||||
torch.manual_seed(seed)
|
torch.manual_seed(seed)
|
||||||
kwargs = {"text": text, "speed": speed, "num_step": num_step, "guidance_scale": guidance_scale}
|
kwargs = {"text": text, "speed": speed, "num_step": num_step, "guidance_scale": guidance_scale}
|
||||||
|
if mode != "voice_cloning" and language and language != "auto":
|
||||||
|
kwargs["language"] = language
|
||||||
|
|
||||||
if mode == "voice_cloning" and ref_audio is None:
|
if mode == "voice_cloning" and ref_audio is None:
|
||||||
raise ValueError("voice_cloning mode requires ref_audio to be connected")
|
raise ValueError("voice_cloning mode requires ref_audio to be connected")
|
||||||
|
|||||||
+35
-6
@@ -11,7 +11,7 @@ class OmniVoiceVoiceDesign:
|
|||||||
"high pitch", "very high pitch", "whisper",
|
"high pitch", "very high pitch", "whisper",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Exactly the accents validated by the model's _resolve_instruct()
|
# Exactly the accents validated by the model's _resolve_instruct() for English
|
||||||
ACCENTS = [
|
ACCENTS = [
|
||||||
"none",
|
"none",
|
||||||
"american accent", "australian accent", "british accent",
|
"american accent", "australian accent", "british accent",
|
||||||
@@ -20,18 +20,42 @@ class OmniVoiceVoiceDesign:
|
|||||||
"russian accent",
|
"russian accent",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Chinese dialect items validated by the model's _resolve_instruct()
|
||||||
|
ZH_GENDERS = ["none", "男", "女"]
|
||||||
|
ZH_AGES = ["none", "儿童", "少年", "青年", "中年", "老年"]
|
||||||
|
ZH_PITCHES = ["none", "极低音调", "低音调", "中音调", "高音调", "极高音调", "耳语"]
|
||||||
|
ZH_DIALECTS = [
|
||||||
|
"none",
|
||||||
|
"东北话", "云南话", "四川话", "宁夏话", "桂林话",
|
||||||
|
"河南话", "济南话", "甘肃话", "石家庄话", "贵州话",
|
||||||
|
"陕西话", "青岛话",
|
||||||
|
]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(cls):
|
def INPUT_TYPES(cls):
|
||||||
return {
|
return {
|
||||||
"required": {
|
"required": {
|
||||||
|
"language": (
|
||||||
|
["English", "Chinese"],
|
||||||
|
{
|
||||||
|
"default": "English",
|
||||||
|
"tooltip": "Selects the instruct vocabulary. Must match the language set in OmniVoice Generate.",
|
||||||
|
},
|
||||||
|
),
|
||||||
"gender": (cls.GENDERS, {"default": "female",
|
"gender": (cls.GENDERS, {"default": "female",
|
||||||
"tooltip": "Voice gender."}),
|
"tooltip": "Voice gender (English). Ignored when language is Chinese — use zh_gender."}),
|
||||||
"age": (cls.AGES, {"default": "none",
|
"age": (cls.AGES, {"default": "none",
|
||||||
"tooltip": "Approximate age of the speaker."}),
|
"tooltip": "Age of the speaker (English). Ignored when language is Chinese — use zh_age."}),
|
||||||
"pitch": (cls.PITCHES, {"default": "none",
|
"pitch": (cls.PITCHES, {"default": "none",
|
||||||
"tooltip": "Pitch / register of the voice."}),
|
"tooltip": "Pitch (English). Ignored when language is Chinese — use zh_pitch."}),
|
||||||
"accent": (cls.ACCENTS, {"default": "none",
|
"accent": (cls.ACCENTS, {"default": "none",
|
||||||
"tooltip": "Accent validated by the model. Only these 10 are supported."}),
|
"tooltip": "Accent (English only, 10 supported values)."}),
|
||||||
|
},
|
||||||
|
"optional": {
|
||||||
|
"zh_gender": (cls.ZH_GENDERS, {"default": "none", "tooltip": "声线性别 (Chinese mode)"}),
|
||||||
|
"zh_age": (cls.ZH_AGES, {"default": "none", "tooltip": "年龄段 (Chinese mode)"}),
|
||||||
|
"zh_pitch": (cls.ZH_PITCHES, {"default": "none", "tooltip": "音调 (Chinese mode)"}),
|
||||||
|
"zh_dialect": (cls.ZH_DIALECTS, {"default": "none", "tooltip": "方言/口音 (Chinese mode)"}),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -40,6 +64,11 @@ class OmniVoiceVoiceDesign:
|
|||||||
FUNCTION = "compose"
|
FUNCTION = "compose"
|
||||||
CATEGORY = "OmniVoice"
|
CATEGORY = "OmniVoice"
|
||||||
|
|
||||||
def compose(self, gender, age, pitch, accent):
|
def compose(self, language, gender, age, pitch, accent,
|
||||||
|
zh_gender="none", zh_age="none", zh_pitch="none", zh_dialect="none"):
|
||||||
|
if language == "Chinese":
|
||||||
|
parts = [v for v in [zh_gender, zh_age, zh_pitch, zh_dialect] if v != "none"]
|
||||||
|
return (",".join(parts),)
|
||||||
|
else:
|
||||||
parts = [v for v in [gender, age, pitch, accent] if v != "none"]
|
parts = [v for v in [gender, age, pitch, accent] if v != "none"]
|
||||||
return (", ".join(parts),)
|
return (", ".join(parts),)
|
||||||
|
|||||||
Reference in New Issue
Block a user