feat: add OmniVoiceGenerate node with voice cloning, design, and auto modes
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+61
-1
@@ -1,2 +1,62 @@
|
||||
import tempfile
|
||||
import os
|
||||
import torch
|
||||
import torchaudio
|
||||
|
||||
|
||||
class OmniVoiceGenerate:
|
||||
pass
|
||||
@classmethod
|
||||
def INPUT_TYPES(cls):
|
||||
return {
|
||||
"required": {
|
||||
"model": ("OMNIVOICE_MODEL",),
|
||||
"text": ("STRING", {"multiline": True, "default": ""}),
|
||||
"mode": (
|
||||
["voice_cloning", "voice_design", "auto_voice"],
|
||||
{"default": "voice_cloning"},
|
||||
),
|
||||
},
|
||||
"optional": {
|
||||
"ref_audio": ("AUDIO",),
|
||||
"ref_text": ("STRING", {"default": ""}),
|
||||
"instruct": ("STRING", {"default": ""}),
|
||||
"speed": ("FLOAT", {"default": 1.0, "min": 0.1, "max": 3.0, "step": 0.1}),
|
||||
"num_step": ("INT", {"default": 32, "min": 1, "max": 100}),
|
||||
},
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("AUDIO",)
|
||||
RETURN_NAMES = ("audio",)
|
||||
FUNCTION = "generate"
|
||||
CATEGORY = "OmniVoice"
|
||||
|
||||
def generate(self, model, text, mode, ref_audio=None, ref_text="", instruct="", speed=1.0, num_step=32):
|
||||
kwargs = {"text": text, "speed": speed, "num_step": num_step}
|
||||
|
||||
if mode == "voice_cloning" and ref_audio is not None:
|
||||
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
||||
tmp_path = tmp.name
|
||||
tmp.close()
|
||||
try:
|
||||
waveform = ref_audio["waveform"].squeeze(0) # (channels, samples)
|
||||
torchaudio.save(tmp_path, waveform, ref_audio["sample_rate"])
|
||||
kwargs["ref_audio"] = tmp_path
|
||||
if ref_text:
|
||||
kwargs["ref_text"] = ref_text
|
||||
audio_tensors = model.generate(**kwargs)
|
||||
finally:
|
||||
os.unlink(tmp_path)
|
||||
|
||||
elif mode == "voice_design" and instruct:
|
||||
kwargs["instruct"] = instruct
|
||||
audio_tensors = model.generate(**kwargs)
|
||||
|
||||
else: # auto_voice or fallback
|
||||
audio_tensors = model.generate(**kwargs)
|
||||
|
||||
# Concatenate chunks: each tensor is (1, T) → concat along T → (1, T_total)
|
||||
combined = torch.cat(audio_tensors, dim=1) # (1, T_total)
|
||||
# ComfyUI AUDIO format: (batch, channels, samples)
|
||||
waveform = combined.unsqueeze(0) # (1, 1, T_total)
|
||||
|
||||
return ({"waveform": waveform, "sample_rate": 24000},)
|
||||
|
||||
Reference in New Issue
Block a user