Files
ComfyUI-Omnivoice/nodes/multi_speaker.py
T
Ethanfel 95cf706b19 feat: add multi-speaker generation with JS-powered dynamic slots
- Add OmniVoiceSpeaker node (label + ref_audio + ref_text → OMNIVOICE_SPEAKER)
- Add OmniVoiceSpeakers node (roster with dynamic speaker_N inputs driven by
  num_speakers INT widget; slots expand/collapse via ComfyUI JS extension)
- Add web/multi_speaker.js: ComfyUI extension that hooks onNodeCreated and
  onConfigure to sync speaker_N inputs in real time (max 8 speakers)
- Extend OmniVoiceGenerate with optional speakers (OMNIVOICE_SPEAKERS) input;
  when connected it routes each paragraph to the assigned speaker and
  concatenates the results — supports alternate_paragraphs and tagged_speakers modes
- Remove OmniVoiceMultiSpeakerGenerate (generation now lives in the existing
  Generate node)
- Refactor generator.py: extract _write_tmp_wav helper, add _tensors_to_audio

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-06 09:08:23 +02:00

98 lines
3.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
class OmniVoiceSpeaker:
"""Bundle a label, reference audio, and optional transcript into a speaker slot."""
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"label": ("STRING", {
"default": "Narrator",
"tooltip": (
"Name used to identify this speaker.\n"
"In tagged_speakers mode, prefix paragraphs with [Label]:\n"
" [Narrator] Once upon a time...\n"
"In alternate_paragraphs mode the label is informational only."
),
}),
"ref_audio": ("AUDIO", {
"tooltip": "Reference audio clip for this speaker's voice.",
}),
},
"optional": {
"ref_text": ("STRING", {
"default": "",
"tooltip": "Transcript of ref_audio. Improves cloning quality.",
}),
},
}
RETURN_TYPES = ("OMNIVOICE_SPEAKER",)
RETURN_NAMES = ("speaker",)
FUNCTION = "build"
CATEGORY = "OmniVoice"
def build(self, label, ref_audio, ref_text=""):
return ({"label": label, "ref_audio": ref_audio, "ref_text": ref_text},)
class OmniVoiceSpeakers:
"""Collect multiple speakers into a roster for multi-speaker generation.
The number of speaker input slots expands dynamically when num_speakers changes
(requires the OmniVoice web extension to be loaded by ComfyUI).
Connect one OmniVoice Speaker node per slot.
"""
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"num_speakers": ("INT", {
"default": 2, "min": 2, "max": 8, "step": 1,
"tooltip": (
"Number of active speaker slots.\n"
"Changing this value adds or removes speaker_N inputs on the node."
),
}),
"mode": (
["alternate_paragraphs", "tagged_speakers"],
{
"default": "alternate_paragraphs",
"tooltip": (
"alternate_paragraphs paragraphs (separated by blank lines) rotate\n"
" through speakers in order: 1 → 2 → 3 → 1 → …\n"
"\n"
"tagged_speakers prefix each paragraph with [Label] to assign\n"
" a specific speaker. Labels must match those on the Speaker nodes.\n"
" Unrecognised tags fall back to speaker 1.\n"
"\n"
" Example:\n"
" [Narrator] The door creaked open.\n"
"\n"
" [Alice] Who is there?"
),
},
),
},
# speaker_1 … speaker_8 are added/removed dynamically by the JS extension.
# They are not listed here so ComfyUI does not render them as static widgets.
}
RETURN_TYPES = ("OMNIVOICE_SPEAKERS",)
RETURN_NAMES = ("speakers",)
FUNCTION = "build"
CATEGORY = "OmniVoice"
def build(self, num_speakers, mode, **kwargs):
speakers = []
for i in range(1, num_speakers + 1):
spk = kwargs.get(f"speaker_{i}")
if spk is not None:
speakers.append(spk)
if len(speakers) < 2:
raise ValueError(
f"OmniVoice Speakers: at least 2 speakers must be connected "
f"(got {len(speakers)})."
)
return ({"speakers": speakers, "mode": mode},)