feat: add multi-speaker generation with JS-powered dynamic slots

- Add OmniVoiceSpeaker node (label + ref_audio + ref_text → OMNIVOICE_SPEAKER) - Add OmniVoiceSpeakers node (roster with dynamic speaker_N inputs driven by num_speakers INT widget; slots expand/collapse via ComfyUI JS extension) - Add web/multi_speaker.js: ComfyUI extension that hooks onNodeCreated and onConfigure to sync speaker_N inputs in real time (max 8 speakers) - Extend OmniVoiceGenerate with optional speakers (OMNIVOICE_SPEAKERS) input; when connected it routes each paragraph to the assigned speaker and concatenates the results — supports alternate_paragraphs and tagged_speakers modes - Remove OmniVoiceMultiSpeakerGenerate (generation now lives in the existing Generate node) - Refactor generator.py: extract _write_tmp_wav helper, add _tensors_to_audio Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-06 09:08:23 +02:00
parent 3cbc04d12d
commit 95cf706b19
5 changed files with 272 additions and 16 deletions
@@ -0,0 +1,97 @@
+class OmniVoiceSpeaker:
+    """Bundle a label, reference audio, and optional transcript into a speaker slot."""
+
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "label": ("STRING", {
+                    "default": "Narrator",
+                    "tooltip": (
+                        "Name used to identify this speaker.\n"
+                        "In tagged_speakers mode, prefix paragraphs with [Label]:\n"
+                        "  [Narrator] Once upon a time...\n"
+                        "In alternate_paragraphs mode the label is informational only."
+                    ),
+                }),
+                "ref_audio": ("AUDIO", {
+                    "tooltip": "Reference audio clip for this speaker's voice.",
+                }),
+            },
+            "optional": {
+                "ref_text": ("STRING", {
+                    "default": "",
+                    "tooltip": "Transcript of ref_audio. Improves cloning quality.",
+                }),
+            },
+        }
+
+    RETURN_TYPES = ("OMNIVOICE_SPEAKER",)
+    RETURN_NAMES = ("speaker",)
+    FUNCTION = "build"
+    CATEGORY = "OmniVoice"
+
+    def build(self, label, ref_audio, ref_text=""):
+        return ({"label": label, "ref_audio": ref_audio, "ref_text": ref_text},)
+
+
+class OmniVoiceSpeakers:
+    """Collect multiple speakers into a roster for multi-speaker generation.
+
+    The number of speaker input slots expands dynamically when num_speakers changes
+    (requires the OmniVoice web extension to be loaded by ComfyUI).
+    Connect one OmniVoice Speaker node per slot.
+    """
+
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "num_speakers": ("INT", {
+                    "default": 2, "min": 2, "max": 8, "step": 1,
+                    "tooltip": (
+                        "Number of active speaker slots.\n"
+                        "Changing this value adds or removes speaker_N inputs on the node."
+                    ),
+                }),
+                "mode": (
+                    ["alternate_paragraphs", "tagged_speakers"],
+                    {
+                        "default": "alternate_paragraphs",
+                        "tooltip": (
+                            "alternate_paragraphs – paragraphs (separated by blank lines) rotate\n"
+                            "  through speakers in order: 1 → 2 → 3 → 1 → …\n"
+                            "\n"
+                            "tagged_speakers – prefix each paragraph with [Label] to assign\n"
+                            "  a specific speaker. Labels must match those on the Speaker nodes.\n"
+                            "  Unrecognised tags fall back to speaker 1.\n"
+                            "\n"
+                            "  Example:\n"
+                            "  [Narrator] The door creaked open.\n"
+                            "\n"
+                            "  [Alice] Who is there?"
+                        ),
+                    },
+                ),
+            },
+            # speaker_1 … speaker_8 are added/removed dynamically by the JS extension.
+            # They are not listed here so ComfyUI does not render them as static widgets.
+        }
+
+    RETURN_TYPES = ("OMNIVOICE_SPEAKERS",)
+    RETURN_NAMES = ("speakers",)
+    FUNCTION = "build"
+    CATEGORY = "OmniVoice"
+
+    def build(self, num_speakers, mode, **kwargs):
+        speakers = []
+        for i in range(1, num_speakers + 1):
+            spk = kwargs.get(f"speaker_{i}")
+            if spk is not None:
+                speakers.append(spk)
+        if len(speakers) < 2:
+            raise ValueError(
+                f"OmniVoice Speakers: at least 2 speakers must be connected "
+                f"(got {len(speakers)})."
+            )
+        return ({"speakers": speakers, "mode": mode},)