Compare commits
36 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| b4c1cb2955 | |||
| 2e3c357e5a | |||
| aa986fd534 | |||
| 3fee610050 | |||
| d4638aa785 | |||
| e9c947b613 | |||
| 197bcc554e | |||
| 9f2683cd54 | |||
| f8657aca80 | |||
| 0f2bcc4c0e | |||
| 4eabac4c7e | |||
| dd6d5061e4 | |||
| 147030e2af | |||
| aedbe2e7d9 | |||
| 26295e4db7 | |||
| d5f2632c48 | |||
| 33b3d62d02 | |||
| 95cf706b19 | |||
| 3cbc04d12d | |||
| 340c0aa402 | |||
| 2b13e55dc5 | |||
| 86ec8cf3fb | |||
| ae2255d9e4 | |||
| d5a0ebeb9a | |||
| 0d43e5374f | |||
| 2b4b221e88 | |||
| 772f6654d4 | |||
| e26bac3684 | |||
| 194e0b0e09 | |||
| d4bf7c825e | |||
| d2cb5c4249 | |||
| c1558efad9 | |||
| 97ed0f209f | |||
| f7d624799c | |||
| d5000dee11 | |||
| bb1d83578c |
@@ -1,9 +1,12 @@
|
||||
name: Publish to ComfyUI Registry
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
tags:
|
||||
- "v*"
|
||||
branches:
|
||||
- master
|
||||
paths:
|
||||
- "pyproject.toml"
|
||||
|
||||
jobs:
|
||||
publish:
|
||||
|
||||
+10
-2
@@ -1,4 +1,4 @@
|
||||
from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate, OmniVoiceEpubLoader, OmniVoiceVoicePreset, OmniVoiceMixVoices
|
||||
from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate, OmniVoiceEpubLoader, OmniVoiceVoicePreset, OmniVoiceMixVoices, OmniVoiceVoiceDesign, OmniVoiceSpeaker, OmniVoiceSpeakers
|
||||
|
||||
NODE_CLASS_MAPPINGS = {
|
||||
"OmniVoiceModelLoader": OmniVoiceModelLoader,
|
||||
@@ -6,6 +6,9 @@ NODE_CLASS_MAPPINGS = {
|
||||
"OmniVoiceEpubLoader": OmniVoiceEpubLoader,
|
||||
"OmniVoiceVoicePreset": OmniVoiceVoicePreset,
|
||||
"OmniVoiceMixVoices": OmniVoiceMixVoices,
|
||||
"OmniVoiceVoiceDesign": OmniVoiceVoiceDesign,
|
||||
"OmniVoiceSpeaker": OmniVoiceSpeaker,
|
||||
"OmniVoiceSpeakers": OmniVoiceSpeakers,
|
||||
}
|
||||
|
||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||
@@ -14,6 +17,11 @@ NODE_DISPLAY_NAME_MAPPINGS = {
|
||||
"OmniVoiceEpubLoader": "OmniVoice EPUB Loader",
|
||||
"OmniVoiceVoicePreset": "OmniVoice Voice Preset",
|
||||
"OmniVoiceMixVoices": "OmniVoice Mix Voices",
|
||||
"OmniVoiceVoiceDesign": "OmniVoice Voice Design",
|
||||
"OmniVoiceSpeaker": "OmniVoice Speaker",
|
||||
"OmniVoiceSpeakers": "OmniVoice Speakers",
|
||||
}
|
||||
|
||||
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
|
||||
WEB_DIRECTORY = "./web"
|
||||
|
||||
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS", "WEB_DIRECTORY"]
|
||||
|
||||
+3
-1
@@ -3,5 +3,7 @@ from .generator import OmniVoiceGenerate
|
||||
from .epub_loader import OmniVoiceEpubLoader
|
||||
from .voice_presets import OmniVoiceVoicePreset
|
||||
from .mix_voices import OmniVoiceMixVoices
|
||||
from .voice_design import OmniVoiceVoiceDesign
|
||||
from .multi_speaker import OmniVoiceSpeaker, OmniVoiceSpeakers
|
||||
|
||||
__all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate", "OmniVoiceEpubLoader", "OmniVoiceVoicePreset", "OmniVoiceMixVoices"]
|
||||
__all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate", "OmniVoiceEpubLoader", "OmniVoiceVoicePreset", "OmniVoiceMixVoices", "OmniVoiceVoiceDesign", "OmniVoiceSpeaker", "OmniVoiceSpeakers"]
|
||||
|
||||
+14
-4
@@ -58,6 +58,12 @@ def _extract_chapters(epub_path):
|
||||
if tag:
|
||||
title = tag.get_text(strip=True)
|
||||
break
|
||||
# Remove title/heading elements so they don't appear in the body text
|
||||
if soup.title:
|
||||
soup.title.decompose()
|
||||
for hn in ['h1', 'h2', 'h3']:
|
||||
for tag in soup.find_all(hn):
|
||||
tag.decompose()
|
||||
for tag in soup.find_all(_BLOCK_TAGS):
|
||||
tag.append(soup.new_string('\n\n'))
|
||||
text = soup.get_text(separator='')
|
||||
@@ -90,8 +96,8 @@ class OmniVoiceEpubLoader:
|
||||
},
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("STRING", "STRING")
|
||||
RETURN_NAMES = ("text", "chapter_list")
|
||||
RETURN_TYPES = ("STRING", "STRING", "STRING")
|
||||
RETURN_NAMES = ("text", "chapter_title", "chapter_list")
|
||||
FUNCTION = "load_epub"
|
||||
CATEGORY = "OmniVoice"
|
||||
|
||||
@@ -100,7 +106,7 @@ class OmniVoiceEpubLoader:
|
||||
n = len(chapters)
|
||||
|
||||
if n == 0:
|
||||
return ("", "")
|
||||
return ("", "", "")
|
||||
|
||||
start = max(1, min(chapter_start, n))
|
||||
end = max(start, min(chapter_end, n))
|
||||
@@ -111,8 +117,12 @@ class OmniVoiceEpubLoader:
|
||||
for i, ch in enumerate(chapters, 1)
|
||||
)
|
||||
|
||||
# chapter_title: title of the first selected chapter (useful for file naming)
|
||||
first = chapters[start - 1]
|
||||
chapter_title = first["title"] if first["title"] else f"Chapter {start}"
|
||||
|
||||
# text: selected range joined by delimiter
|
||||
selected = chapters[start - 1 : end]
|
||||
text = "\n\n---\n\n".join(ch["text"] for ch in selected)
|
||||
|
||||
return (text, chapter_list)
|
||||
return (text, chapter_title, chapter_list)
|
||||
|
||||
+137
-22
@@ -1,8 +1,26 @@
|
||||
import re
|
||||
import tempfile
|
||||
import os
|
||||
import torch
|
||||
import soundfile as sf
|
||||
|
||||
_TAG_RE = re.compile(r'^\[([^\]]+)\]:?\s*(.*)', re.DOTALL)
|
||||
|
||||
|
||||
def _write_tmp_wav(ref_audio):
|
||||
"""Write a ComfyUI AUDIO dict to a temp WAV file. Returns the path (caller must delete)."""
|
||||
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
||||
tmp_path = tmp.name
|
||||
tmp.close()
|
||||
waveform = ref_audio["waveform"].squeeze(0).cpu() # (channels, samples)
|
||||
audio_np = waveform.numpy()
|
||||
sf.write(
|
||||
tmp_path,
|
||||
audio_np[0] if audio_np.shape[0] == 1 else audio_np.T,
|
||||
int(ref_audio["sample_rate"]),
|
||||
)
|
||||
return tmp_path
|
||||
|
||||
|
||||
class OmniVoiceGenerate:
|
||||
@classmethod
|
||||
@@ -49,12 +67,21 @@ class OmniVoiceGenerate:
|
||||
"tooltip": (
|
||||
"voice_cloning – clone the voice from ref_audio (requires ref_audio)\n"
|
||||
"voice_design – describe a voice with the instruct field (requires instruct)\n"
|
||||
"auto_voice – model picks a voice automatically"
|
||||
"auto_voice – model picks a voice automatically\n"
|
||||
"\n"
|
||||
"Ignored when a Speakers roster is connected."
|
||||
),
|
||||
},
|
||||
),
|
||||
},
|
||||
"optional": {
|
||||
"speakers": ("OMNIVOICE_SPEAKERS", {
|
||||
"tooltip": (
|
||||
"Connect an OmniVoice Speakers node to enable multi-speaker generation.\n"
|
||||
"When connected, ref_audio / instruct / mode are ignored and each paragraph\n"
|
||||
"is routed to its assigned speaker automatically."
|
||||
),
|
||||
}),
|
||||
"ref_audio": ("AUDIO", {
|
||||
"tooltip": "Reference audio clip to clone the voice from. Used in voice_cloning mode.",
|
||||
}),
|
||||
@@ -65,23 +92,32 @@ class OmniVoiceGenerate:
|
||||
"instruct": ("STRING", {
|
||||
"default": "",
|
||||
"tooltip": (
|
||||
"Voice description for voice_design mode. Combine attributes freely.\n"
|
||||
"Voice style description. Required for voice_design mode; optional in voice_cloning\n"
|
||||
"mode to attempt accent/style transfer on top of the cloned voice.\n"
|
||||
"Connect the OmniVoice Voice Design node for structured input.\n"
|
||||
"\n"
|
||||
"GENDER: male, female\n"
|
||||
"AGE: child, teenager, young adult, middle-aged, elderly\n"
|
||||
"PITCH: very low, low, moderate, high, very high\n"
|
||||
"STYLE: whisper\n"
|
||||
"PITCH: very low pitch, low pitch, moderate pitch, high pitch, very high pitch, whisper\n"
|
||||
"\n"
|
||||
"ENGLISH ACCENTS (text must be English):\n"
|
||||
" american, british, australian, canadian,\n"
|
||||
" indian, chinese, korean, japanese, portuguese, russian\n"
|
||||
"ACCENTS (only these are supported by the model):\n"
|
||||
" american accent, australian accent, british accent, canadian accent,\n"
|
||||
" chinese accent, indian accent, japanese accent, korean accent,\n"
|
||||
" portuguese accent, russian accent\n"
|
||||
"\n"
|
||||
"EXAMPLE: female, high pitch, british accent"
|
||||
),
|
||||
}),
|
||||
"guidance_scale": ("FLOAT", {
|
||||
"default": 2.0, "min": 0.0, "max": 20.0, "step": 0.1,
|
||||
"tooltip": (
|
||||
"Classifier-free guidance scale. Higher = more faithful to the reference/instruct, "
|
||||
"but can over-saturate. 2.0 is a good default."
|
||||
),
|
||||
}),
|
||||
"speed": ("FLOAT", {
|
||||
"default": 1.0, "min": 0.1, "max": 3.0, "step": 0.1,
|
||||
"tooltip": "Playback speed multiplier. 1.0 = normal, >1.0 = faster, <1.0 = slower.",
|
||||
"default": 1.0, "min": 0.3, "max": 3.0, "step": 0.1,
|
||||
"tooltip": "Playback speed multiplier. 1.0 = normal, >1.0 = faster, <1.0 = slower. Below 0.3 produces noise and extreme VRAM usage.",
|
||||
}),
|
||||
"num_step": ("INT", {
|
||||
"default": 32, "min": 1, "max": 100,
|
||||
@@ -104,10 +140,17 @@ class OmniVoiceGenerate:
|
||||
FUNCTION = "generate"
|
||||
CATEGORY = "OmniVoice"
|
||||
|
||||
def generate(self, model, text, mode, ref_audio=None, ref_text="", instruct="", speed=1.0, num_step=32, seed=0):
|
||||
def generate(self, model, text, mode, speakers=None, ref_audio=None, ref_text="",
|
||||
instruct="", guidance_scale=2.0, speed=1.0, num_step=32, seed=0):
|
||||
if seed != 0:
|
||||
torch.manual_seed(seed)
|
||||
kwargs = {"text": text, "speed": speed, "num_step": num_step}
|
||||
|
||||
if speakers is not None:
|
||||
return self._generate_multi_speaker(
|
||||
model, text, speakers, guidance_scale, speed, num_step
|
||||
)
|
||||
|
||||
kwargs = {"text": text, "speed": speed, "num_step": num_step, "guidance_scale": guidance_scale}
|
||||
|
||||
if mode == "voice_cloning" and ref_audio is None:
|
||||
raise ValueError("voice_cloning mode requires ref_audio to be connected")
|
||||
@@ -115,17 +158,13 @@ class OmniVoiceGenerate:
|
||||
raise ValueError("voice_design mode requires an instruct string (e.g. 'female, low pitch')")
|
||||
|
||||
if mode == "voice_cloning":
|
||||
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
||||
tmp_path = tmp.name
|
||||
tmp.close()
|
||||
tmp_path = _write_tmp_wav(ref_audio)
|
||||
try:
|
||||
ref_waveform = ref_audio["waveform"].squeeze(0).cpu() # (channels, samples)
|
||||
audio_np = ref_waveform.numpy()
|
||||
# soundfile expects (samples,) for mono or (samples, channels) for multi-channel
|
||||
sf.write(tmp_path, audio_np[0] if audio_np.shape[0] == 1 else audio_np.T, int(ref_audio["sample_rate"]))
|
||||
kwargs["ref_audio"] = tmp_path
|
||||
if ref_text:
|
||||
kwargs["ref_text"] = ref_text
|
||||
if instruct:
|
||||
kwargs["instruct"] = instruct
|
||||
audio_tensors = model.generate(**kwargs)
|
||||
finally:
|
||||
try:
|
||||
@@ -140,9 +179,85 @@ class OmniVoiceGenerate:
|
||||
else: # auto_voice or fallback
|
||||
audio_tensors = model.generate(**kwargs)
|
||||
|
||||
# Concatenate chunks: each tensor is (1, T) → concat along T → (1, T_total)
|
||||
combined = torch.cat(audio_tensors, dim=1).cpu() # (1, T_total) on CPU
|
||||
# ComfyUI AUDIO format: (batch, channels, samples)
|
||||
waveform = combined.unsqueeze(0) # (1, 1, T_total)
|
||||
return self._tensors_to_audio(audio_tensors)
|
||||
|
||||
def _generate_multi_speaker(self, model, text, speakers_data, guidance_scale, speed, num_step):
|
||||
speaker_list = speakers_data["speakers"]
|
||||
spk_mode = speakers_data["mode"]
|
||||
label_map = {s["label"].lower(): i for i, s in enumerate(speaker_list)}
|
||||
|
||||
if spk_mode == "alternate_paragraphs":
|
||||
paragraphs = [p.strip() for p in text.split("\n\n") if p.strip()]
|
||||
if not paragraphs:
|
||||
raise ValueError("OmniVoice Multi-Speaker: no paragraphs found in text.")
|
||||
segments = [
|
||||
(para, speaker_list[i % len(speaker_list)])
|
||||
for i, para in enumerate(paragraphs)
|
||||
]
|
||||
else: # tagged_speakers
|
||||
# In tagged mode each line that starts with [Tag] begins a new segment.
|
||||
# Continuation lines (no tag) are appended to the previous segment so
|
||||
# multi-line speeches stay together. Both \n and \n\n separators work.
|
||||
raw_segments: list[list[str]] = []
|
||||
current: list[str] = []
|
||||
for line in text.splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
if _TAG_RE.match(line):
|
||||
if current:
|
||||
raw_segments.append(current)
|
||||
current = [line]
|
||||
else:
|
||||
current.append(line)
|
||||
if current:
|
||||
raw_segments.append(current)
|
||||
|
||||
if not raw_segments:
|
||||
raise ValueError("OmniVoice Multi-Speaker: no tagged segments found in text.")
|
||||
|
||||
segments = []
|
||||
for lines in raw_segments:
|
||||
joined = " ".join(lines)
|
||||
m = _TAG_RE.match(joined)
|
||||
if m:
|
||||
tag = m.group(1).strip().lower()
|
||||
body = m.group(2).strip()
|
||||
spk = speaker_list[label_map.get(tag, 0)]
|
||||
else:
|
||||
body = joined
|
||||
spk = speaker_list[0]
|
||||
if body:
|
||||
segments.append((body, spk))
|
||||
|
||||
if not segments:
|
||||
raise ValueError("OmniVoice Multi-Speaker: no text segments to generate.")
|
||||
|
||||
all_chunks = []
|
||||
for para_text, spk in segments:
|
||||
tmp_path = _write_tmp_wav(spk["ref_audio"])
|
||||
try:
|
||||
kwargs = {
|
||||
"text": para_text,
|
||||
"ref_audio": tmp_path,
|
||||
"speed": speed,
|
||||
"num_step": num_step,
|
||||
"guidance_scale": guidance_scale,
|
||||
}
|
||||
if spk["ref_text"]:
|
||||
kwargs["ref_text"] = spk["ref_text"]
|
||||
chunks = model.generate(**kwargs)
|
||||
all_chunks.extend(chunks)
|
||||
finally:
|
||||
try:
|
||||
os.unlink(tmp_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return self._tensors_to_audio(all_chunks)
|
||||
|
||||
@staticmethod
|
||||
def _tensors_to_audio(tensors):
|
||||
combined = torch.cat(tensors, dim=1).cpu() # (1, T_total)
|
||||
waveform = combined.unsqueeze(0) # (1, 1, T_total)
|
||||
return ({"waveform": waveform, "sample_rate": 24000},)
|
||||
|
||||
+11
-4
@@ -1,10 +1,12 @@
|
||||
import os
|
||||
import torch
|
||||
|
||||
_omnivoice_import_error = None
|
||||
try:
|
||||
from omnivoice import OmniVoice
|
||||
except ImportError:
|
||||
OmniVoice = None # deferred; will raise at runtime if package is missing
|
||||
except Exception as e:
|
||||
OmniVoice = None
|
||||
_omnivoice_import_error = e
|
||||
|
||||
try:
|
||||
import folder_paths
|
||||
@@ -55,9 +57,14 @@ class OmniVoiceModelLoader:
|
||||
|
||||
def load_model(self, device, dtype, compile=False):
|
||||
if OmniVoice is None:
|
||||
raise ImportError(
|
||||
"omnivoice is not installed. Run: pip install omnivoice --no-deps"
|
||||
msg = (
|
||||
"omnivoice failed to import. "
|
||||
"Install it with: pip install omnivoice --no-deps\n"
|
||||
"(On Windows embedded Python: .\\python_embeded\\python.exe -m pip install omnivoice --no-deps)\n"
|
||||
)
|
||||
if _omnivoice_import_error is not None:
|
||||
msg += f"\nOriginal error: {_omnivoice_import_error}"
|
||||
raise ImportError(msg)
|
||||
|
||||
model = OmniVoice.from_pretrained(
|
||||
"k2-fsa/OmniVoice",
|
||||
|
||||
@@ -0,0 +1,103 @@
|
||||
class OmniVoiceSpeaker:
|
||||
"""Bundle a label, reference audio, and optional transcript into a speaker slot."""
|
||||
|
||||
@classmethod
|
||||
def INPUT_TYPES(cls):
|
||||
return {
|
||||
"required": {
|
||||
"label": ("STRING", {
|
||||
"default": "Narrator",
|
||||
"tooltip": (
|
||||
"Name used to identify this speaker.\n"
|
||||
"In tagged_speakers mode, prefix paragraphs with [Label]:\n"
|
||||
" [Narrator] Once upon a time...\n"
|
||||
"In alternate_paragraphs mode the label is informational only."
|
||||
),
|
||||
}),
|
||||
"ref_audio": ("AUDIO", {
|
||||
"tooltip": "Reference audio clip for this speaker's voice.",
|
||||
}),
|
||||
},
|
||||
"optional": {
|
||||
"ref_text": ("STRING", {
|
||||
"default": "",
|
||||
"tooltip": "Transcript of ref_audio. Improves cloning quality.",
|
||||
}),
|
||||
},
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("OMNIVOICE_SPEAKER",)
|
||||
RETURN_NAMES = ("speaker",)
|
||||
FUNCTION = "build"
|
||||
CATEGORY = "OmniVoice"
|
||||
|
||||
def build(self, label, ref_audio, ref_text=""):
|
||||
return ({"label": label, "ref_audio": ref_audio, "ref_text": ref_text},)
|
||||
|
||||
|
||||
class OmniVoiceSpeakers:
|
||||
"""Collect multiple speakers into a roster for multi-speaker generation.
|
||||
|
||||
The number of speaker input slots expands dynamically when num_speakers changes
|
||||
(requires the OmniVoice web extension to be loaded by ComfyUI).
|
||||
Connect one OmniVoice Speaker node per slot.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def INPUT_TYPES(cls):
|
||||
# speaker_1…speaker_8 are declared here so ComfyUI validation accepts them.
|
||||
# Visibility is controlled by the JS extension (web/multi_speaker.js):
|
||||
# only the first num_speakers slots are shown as live inputs.
|
||||
optional_speakers = {
|
||||
f"speaker_{i}": ("OMNIVOICE_SPEAKER", {})
|
||||
for i in range(1, 9)
|
||||
}
|
||||
return {
|
||||
"required": {
|
||||
"num_speakers": ("INT", {
|
||||
"default": 2, "min": 2, "max": 8, "step": 1,
|
||||
"tooltip": (
|
||||
"Number of active speaker slots.\n"
|
||||
"Changing this value adds or removes speaker_N inputs on the node."
|
||||
),
|
||||
}),
|
||||
"mode": (
|
||||
["alternate_paragraphs", "tagged_speakers"],
|
||||
{
|
||||
"default": "alternate_paragraphs",
|
||||
"tooltip": (
|
||||
"alternate_paragraphs – paragraphs (separated by blank lines) rotate\n"
|
||||
" through speakers in order: 1 → 2 → 3 → 1 → …\n"
|
||||
"\n"
|
||||
"tagged_speakers – prefix each paragraph with [Label] to assign\n"
|
||||
" a specific speaker. Labels must match those on the Speaker nodes.\n"
|
||||
" Unrecognised tags fall back to speaker 1.\n"
|
||||
"\n"
|
||||
" Example:\n"
|
||||
" [Narrator] The door creaked open.\n"
|
||||
"\n"
|
||||
" [Alice] Who is there?"
|
||||
),
|
||||
},
|
||||
),
|
||||
},
|
||||
"optional": optional_speakers,
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("OMNIVOICE_SPEAKERS",)
|
||||
RETURN_NAMES = ("speakers",)
|
||||
FUNCTION = "build"
|
||||
CATEGORY = "OmniVoice"
|
||||
|
||||
def build(self, num_speakers, mode, **kwargs):
|
||||
speakers = []
|
||||
for i in range(1, num_speakers + 1):
|
||||
spk = kwargs.get(f"speaker_{i}")
|
||||
if spk is not None:
|
||||
speakers.append(spk)
|
||||
if len(speakers) < 2:
|
||||
raise ValueError(
|
||||
f"OmniVoice Speakers: at least 2 speakers must be connected "
|
||||
f"(got {len(speakers)})."
|
||||
)
|
||||
return ({"speakers": speakers, "mode": mode},)
|
||||
@@ -0,0 +1,74 @@
|
||||
class OmniVoiceVoiceDesign:
|
||||
"""Compose a voice design instruct string from structured dropdowns."""
|
||||
|
||||
GENDERS = ["none", "male", "female"]
|
||||
|
||||
AGES = ["none", "child", "teenager", "young adult", "middle-aged", "elderly"]
|
||||
|
||||
PITCHES = [
|
||||
"none",
|
||||
"very low pitch", "low pitch", "moderate pitch",
|
||||
"high pitch", "very high pitch", "whisper",
|
||||
]
|
||||
|
||||
# Exactly the accents validated by the model's _resolve_instruct() for English
|
||||
ACCENTS = [
|
||||
"none",
|
||||
"american accent", "australian accent", "british accent",
|
||||
"canadian accent", "chinese accent", "indian accent",
|
||||
"japanese accent", "korean accent", "portuguese accent",
|
||||
"russian accent",
|
||||
]
|
||||
|
||||
# Chinese dialect items validated by the model's _resolve_instruct()
|
||||
ZH_GENDERS = ["none", "男", "女"]
|
||||
ZH_AGES = ["none", "儿童", "少年", "青年", "中年", "老年"]
|
||||
ZH_PITCHES = ["none", "极低音调", "低音调", "中音调", "高音调", "极高音调", "耳语"]
|
||||
ZH_DIALECTS = [
|
||||
"none",
|
||||
"东北话", "云南话", "四川话", "宁夏话", "桂林话",
|
||||
"河南话", "济南话", "甘肃话", "石家庄话", "贵州话",
|
||||
"陕西话", "青岛话",
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def INPUT_TYPES(cls):
|
||||
return {
|
||||
"required": {
|
||||
"language": (
|
||||
["English", "Chinese"],
|
||||
{
|
||||
"default": "English",
|
||||
"tooltip": "Selects the instruct vocabulary. The language output wires directly into Generate — no need to set it there too.",
|
||||
},
|
||||
),
|
||||
"gender": (cls.GENDERS, {"default": "female",
|
||||
"tooltip": "Voice gender (English). Ignored when language is Chinese — use zh_gender."}),
|
||||
"age": (cls.AGES, {"default": "none",
|
||||
"tooltip": "Age of the speaker (English). Ignored when language is Chinese — use zh_age."}),
|
||||
"pitch": (cls.PITCHES, {"default": "none",
|
||||
"tooltip": "Pitch (English). Ignored when language is Chinese — use zh_pitch."}),
|
||||
"accent": (cls.ACCENTS, {"default": "none",
|
||||
"tooltip": "Accent (English only, 10 supported values)."}),
|
||||
},
|
||||
"optional": {
|
||||
"zh_gender": (cls.ZH_GENDERS, {"default": "none", "tooltip": "声线性别 (Chinese mode)"}),
|
||||
"zh_age": (cls.ZH_AGES, {"default": "none", "tooltip": "年龄段 (Chinese mode)"}),
|
||||
"zh_pitch": (cls.ZH_PITCHES, {"default": "none", "tooltip": "音调 (Chinese mode)"}),
|
||||
"zh_dialect": (cls.ZH_DIALECTS, {"default": "none", "tooltip": "方言/口音 (Chinese mode)"}),
|
||||
},
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("STRING",)
|
||||
RETURN_NAMES = ("instruct",)
|
||||
FUNCTION = "compose"
|
||||
CATEGORY = "OmniVoice"
|
||||
|
||||
def compose(self, language, gender, age, pitch, accent,
|
||||
zh_gender="none", zh_age="none", zh_pitch="none", zh_dialect="none"):
|
||||
if language == "Chinese":
|
||||
parts = [v for v in [zh_gender, zh_age, zh_pitch, zh_dialect] if v != "none"]
|
||||
return (",".join(parts),)
|
||||
else:
|
||||
parts = [v for v in [gender, age, pitch, accent] if v != "none"]
|
||||
return (", ".join(parts),)
|
||||
+46
-9
@@ -53,14 +53,45 @@ PRESETS = {
|
||||
}
|
||||
|
||||
|
||||
def _load_audio(url):
|
||||
"""Download (once) and return (waveform_tensor, sample_rate)."""
|
||||
_AUDIO_EXTS = {".wav", ".flac", ".mp3", ".ogg", ".m4a"}
|
||||
_BUILTIN_FILES = frozenset(os.path.basename(url.split("?")[0]) for url, _ in PRESETS.values())
|
||||
|
||||
|
||||
def _scan_user_presets():
|
||||
"""Return a dict of user presets found in _CACHE_DIR.
|
||||
|
||||
For each audio file that is not a cached built-in, look for a same-stem
|
||||
.txt file for the transcript. Key format: "<stem> (local)".
|
||||
"""
|
||||
user = {}
|
||||
if not os.path.isdir(_CACHE_DIR):
|
||||
return user
|
||||
for fname in sorted(os.listdir(_CACHE_DIR)):
|
||||
stem, ext = os.path.splitext(fname)
|
||||
if ext.lower() not in _AUDIO_EXTS or fname in _BUILTIN_FILES:
|
||||
continue
|
||||
audio_path = os.path.join(_CACHE_DIR, fname)
|
||||
txt_path = os.path.join(_CACHE_DIR, stem + ".txt")
|
||||
transcript = ""
|
||||
if os.path.exists(txt_path):
|
||||
with open(txt_path, "r", encoding="utf-8") as f:
|
||||
transcript = f.read().strip()
|
||||
user[f"{stem} (local)"] = (audio_path, transcript)
|
||||
return user
|
||||
|
||||
|
||||
def _load_audio(source):
|
||||
"""Load audio from a URL (downloading once) or a local file path."""
|
||||
os.makedirs(_CACHE_DIR, exist_ok=True)
|
||||
filename = os.path.basename(url.split("?")[0])
|
||||
if source.startswith("http://") or source.startswith("https://"):
|
||||
filename = os.path.basename(source.split("?")[0])
|
||||
cache_path = os.path.join(_CACHE_DIR, filename)
|
||||
if not os.path.exists(cache_path):
|
||||
urllib.request.urlretrieve(url, cache_path)
|
||||
audio_np, sr = sf.read(cache_path, dtype="float32")
|
||||
urllib.request.urlretrieve(source, cache_path)
|
||||
path = cache_path
|
||||
else:
|
||||
path = source
|
||||
audio_np, sr = sf.read(path, dtype="float32")
|
||||
if audio_np.ndim == 1:
|
||||
audio_np = audio_np[np.newaxis, :] # (1, samples)
|
||||
else:
|
||||
@@ -72,15 +103,20 @@ def _load_audio(url):
|
||||
class OmniVoiceVoicePreset:
|
||||
@classmethod
|
||||
def INPUT_TYPES(cls):
|
||||
all_presets = {**PRESETS, **_scan_user_presets()}
|
||||
return {
|
||||
"required": {
|
||||
"preset": (
|
||||
list(PRESETS.keys()),
|
||||
list(all_presets.keys()),
|
||||
{
|
||||
"tooltip": (
|
||||
"Pre-fetched reference voice for OmniVoice Generate.\n"
|
||||
"Connect ref_audio → ref_audio and ref_text → ref_text.\n"
|
||||
"If ref_text is blank, connect a Whisper node to supply the transcript."
|
||||
"\n"
|
||||
"To add your own presets, drop audio files into:\n"
|
||||
f" {_CACHE_DIR}\n"
|
||||
"Add a same-name .txt file alongside for the transcript.\n"
|
||||
"Restart ComfyUI to pick up new files."
|
||||
),
|
||||
},
|
||||
),
|
||||
@@ -93,6 +129,7 @@ class OmniVoiceVoicePreset:
|
||||
CATEGORY = "OmniVoice"
|
||||
|
||||
def load_preset(self, preset):
|
||||
url, transcript = PRESETS[preset]
|
||||
waveform, sr = _load_audio(url)
|
||||
all_presets = {**PRESETS, **_scan_user_presets()}
|
||||
source, transcript = all_presets[preset]
|
||||
waveform, sr = _load_audio(source)
|
||||
return ({"waveform": waveform, "sample_rate": sr}, transcript)
|
||||
|
||||
+2
-2
@@ -1,7 +1,7 @@
|
||||
[project]
|
||||
name = "comfyui-omnivoice"
|
||||
name = "comfyui-omnivoice-fel"
|
||||
description = "ComfyUI nodes for OmniVoice — multilingual zero-shot TTS with voice cloning, voice design, EPUB loading, and voice mixing."
|
||||
version = "1.0.0"
|
||||
version = "1.0.6"
|
||||
license = { text = "GPL-3.0-only" }
|
||||
dependencies = []
|
||||
|
||||
|
||||
+5
-2
@@ -1,6 +1,9 @@
|
||||
transformers>=4.40.0
|
||||
transformers>=5.3.0
|
||||
accelerate
|
||||
pydub
|
||||
soundfile
|
||||
numpy
|
||||
pydub
|
||||
tensorboardx
|
||||
webdataset
|
||||
beautifulsoup4
|
||||
torchcodec
|
||||
|
||||
@@ -58,14 +58,14 @@ def test_input_types_structure():
|
||||
|
||||
|
||||
def test_return_types():
|
||||
assert OmniVoiceEpubLoader.RETURN_TYPES == ("STRING", "STRING")
|
||||
assert OmniVoiceEpubLoader.RETURN_NAMES == ("text", "chapter_list")
|
||||
assert OmniVoiceEpubLoader.RETURN_TYPES == ("STRING", "STRING", "STRING")
|
||||
assert OmniVoiceEpubLoader.RETURN_NAMES == ("text", "chapter_title", "chapter_list")
|
||||
|
||||
|
||||
def test_chapter_extraction_basic():
|
||||
epub = make_fake_epub([("Intro", "<p>Hello world</p>"), ("Chapter One", "<p>Body here</p>")])
|
||||
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
|
||||
text, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 2)
|
||||
text, chapter_title, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 2)
|
||||
assert "Hello world" in text
|
||||
assert "Body here" in text
|
||||
assert "---" in text
|
||||
@@ -75,7 +75,7 @@ def test_chapter_extraction_basic():
|
||||
def test_chapter_range_single():
|
||||
epub = make_fake_epub([("One", "<p>First</p>"), ("Two", "<p>Second</p>"), ("Three", "<p>Third</p>")])
|
||||
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
|
||||
text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 2)
|
||||
text, _, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 2)
|
||||
assert "Second" in text
|
||||
assert "First" not in text
|
||||
assert "Third" not in text
|
||||
@@ -84,7 +84,7 @@ def test_chapter_range_single():
|
||||
def test_chapter_list_contains_all():
|
||||
epub = make_fake_epub([("A", ""), ("B", ""), ("C", "")])
|
||||
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
|
||||
_, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 2)
|
||||
_, _, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 2)
|
||||
lines = chapter_list.strip().splitlines()
|
||||
assert len(lines) == 3
|
||||
assert lines[0].startswith("1.")
|
||||
@@ -94,14 +94,14 @@ def test_chapter_list_contains_all():
|
||||
def test_range_clamping_high():
|
||||
epub = make_fake_epub([("A", "<p>aaa</p>"), ("B", "<p>bbb</p>")])
|
||||
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
|
||||
text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 99)
|
||||
text, _, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 99)
|
||||
assert "aaa" in text and "bbb" in text
|
||||
|
||||
|
||||
def test_range_clamping_end_below_start():
|
||||
epub = make_fake_epub([("A", "<p>aaa</p>"), ("B", "<p>bbb</p>")])
|
||||
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
|
||||
text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 1)
|
||||
text, _, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 1)
|
||||
assert "bbb" in text
|
||||
assert "aaa" not in text
|
||||
|
||||
@@ -119,14 +119,14 @@ def test_missing_title_fallback():
|
||||
z.writestr('OEBPS/ch0.xhtml', '<html><body><p>No title here</p></body></html>')
|
||||
buf.seek(0)
|
||||
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(buf.read())):
|
||||
_, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 1)
|
||||
_, _, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 1)
|
||||
assert "1. Chapter 1" in chapter_list
|
||||
|
||||
|
||||
def test_script_style_stripped():
|
||||
epub = make_fake_epub([("Test", '<script>alert("xss")</script><style>color:red</style><p>clean</p>')])
|
||||
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
|
||||
text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 1)
|
||||
text, _, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 1)
|
||||
assert "alert" not in text
|
||||
assert "color" not in text
|
||||
assert "clean" in text
|
||||
|
||||
@@ -46,7 +46,7 @@ def test_generate_auto_voice():
|
||||
assert "sample_rate" in audio
|
||||
assert audio["sample_rate"] == 24000
|
||||
mock_model.generate.assert_called_once_with(
|
||||
text="Hello world", speed=1.0, num_step=32
|
||||
text="Hello world", speed=1.0, num_step=32, guidance_scale=2.0
|
||||
)
|
||||
|
||||
|
||||
@@ -64,7 +64,7 @@ def test_generate_voice_design():
|
||||
audio = result[0]
|
||||
assert audio["sample_rate"] == 24000
|
||||
mock_model.generate.assert_called_once_with(
|
||||
text="Hello world", instruct="female, low pitch", speed=1.0, num_step=32
|
||||
text="Hello world", instruct="female, low pitch", speed=1.0, num_step=32, guidance_scale=2.0
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
import { app } from "../../scripts/app.js";
|
||||
|
||||
const MAX_SPEAKERS = 8;
|
||||
|
||||
app.registerExtension({
|
||||
name: "OmniVoice.MultiSpeaker",
|
||||
|
||||
beforeRegisterNodeDef(nodeType, nodeData) {
|
||||
if (nodeData.name !== "OmniVoiceSpeakers") return;
|
||||
|
||||
/**
|
||||
* Ensure the node has exactly `count` speaker_N inputs.
|
||||
* Safe to call multiple times with the same count (idempotent).
|
||||
*/
|
||||
function syncSpeakerInputs(node, count) {
|
||||
count = Math.max(2, Math.min(MAX_SPEAKERS, Math.floor(count)));
|
||||
|
||||
// Add any missing slots
|
||||
for (let i = 1; i <= count; i++) {
|
||||
const name = `speaker_${i}`;
|
||||
if (!node.inputs?.find(inp => inp.name === name)) {
|
||||
node.addInput(name, "OMNIVOICE_SPEAKER");
|
||||
}
|
||||
}
|
||||
|
||||
// Remove excess slots (high → low so indices stay valid)
|
||||
for (let i = MAX_SPEAKERS; i > count; i--) {
|
||||
const name = `speaker_${i}`;
|
||||
const idx = node.inputs?.findIndex(inp => inp.name === name) ?? -1;
|
||||
if (idx === -1) continue;
|
||||
// Sever any connected link before removing the slot
|
||||
const linkId = node.inputs[idx].link;
|
||||
if (linkId != null) node.graph?.removeLink(linkId);
|
||||
node.removeInput(idx);
|
||||
}
|
||||
|
||||
node.setDirtyCanvas(true, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attach the num_speakers widget callback once per node instance.
|
||||
* Guarded by a flag so configure() can call it safely on reload.
|
||||
*/
|
||||
function attachCallback(node) {
|
||||
if (node._omnivoiceCbAttached) return;
|
||||
const w = node.widgets?.find(w => w.name === "num_speakers");
|
||||
if (!w) return;
|
||||
node._omnivoiceCbAttached = true;
|
||||
w.callback = (value) => syncSpeakerInputs(node, value);
|
||||
}
|
||||
|
||||
// --- Fresh node creation ---
|
||||
const onNodeCreated = nodeType.prototype.onNodeCreated;
|
||||
nodeType.prototype.onNodeCreated = function () {
|
||||
onNodeCreated?.apply(this, arguments);
|
||||
attachCallback(this);
|
||||
const w = this.widgets?.find(w => w.name === "num_speakers");
|
||||
if (w) syncSpeakerInputs(this, w.value);
|
||||
};
|
||||
|
||||
// --- Workflow load: called by LiteGraph after widget values are restored ---
|
||||
const onConfigure = nodeType.prototype.onConfigure;
|
||||
nodeType.prototype.onConfigure = function (data) {
|
||||
onConfigure?.apply(this, arguments);
|
||||
attachCallback(this);
|
||||
const w = this.widgets?.find(w => w.name === "num_speakers");
|
||||
if (w) syncSpeakerInputs(this, w.value);
|
||||
};
|
||||
},
|
||||
});
|
||||
@@ -1,55 +1,115 @@
|
||||
{
|
||||
"id": "85925298-bd01-4df7-b8e5-ac92f9d6582e",
|
||||
"revision": 0,
|
||||
"last_node_id": 6,
|
||||
"last_link_id": 9,
|
||||
"last_node_id": 14,
|
||||
"last_link_id": 30,
|
||||
"nodes": [
|
||||
{
|
||||
"id": 1,
|
||||
"type": "OmniVoiceModelLoader",
|
||||
"id": 6,
|
||||
"type": "PreviewAudio",
|
||||
"pos": [
|
||||
32,
|
||||
928,
|
||||
96
|
||||
],
|
||||
"size": [
|
||||
320,
|
||||
160
|
||||
448,
|
||||
96
|
||||
],
|
||||
"flags": {},
|
||||
"order": 0,
|
||||
"order": 4,
|
||||
"mode": 0,
|
||||
"inputs": [],
|
||||
"outputs": [
|
||||
"inputs": [
|
||||
{
|
||||
"name": "audio",
|
||||
"type": "AUDIO",
|
||||
"link": 27
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"properties": {
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.18.1",
|
||||
"Node name for S&R": "PreviewAudio",
|
||||
"ue_properties": {
|
||||
"widget_ue_connectable": {},
|
||||
"input_ue_unconnectable": {},
|
||||
"version": "7.8"
|
||||
}
|
||||
},
|
||||
"widgets_values": []
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"type": "OmniVoiceGenerate",
|
||||
"pos": [
|
||||
512,
|
||||
96
|
||||
],
|
||||
"size": [
|
||||
384,
|
||||
448
|
||||
],
|
||||
"flags": {},
|
||||
"order": 3,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model",
|
||||
"type": "OMNIVOICE_MODEL",
|
||||
"slot_index": 0,
|
||||
"link": 23
|
||||
},
|
||||
{
|
||||
"name": "ref_audio",
|
||||
"shape": 7,
|
||||
"type": "AUDIO",
|
||||
"link": 29
|
||||
},
|
||||
{
|
||||
"name": "ref_text",
|
||||
"shape": 7,
|
||||
"type": "STRING",
|
||||
"widget": {
|
||||
"name": "ref_text"
|
||||
},
|
||||
"link": 30
|
||||
}
|
||||
],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "audio",
|
||||
"type": "AUDIO",
|
||||
"links": [
|
||||
5
|
||||
27
|
||||
]
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"aux_id": "ethanfel/ComfyUI-Omnivoice",
|
||||
"ver": "9b62c9bda8469171025709e2fae3ce877ee0f059",
|
||||
"Node name for S&R": "OmniVoiceModelLoader",
|
||||
"cnr_id": "comfyui-omnivoice-fel",
|
||||
"ver": "340c0aa402fdf8b56fad5eb1559ff901c7fc7cfc",
|
||||
"Node name for S&R": "OmniVoiceGenerate",
|
||||
"ue_properties": {
|
||||
"widget_ue_connectable": {},
|
||||
"version": "7.8",
|
||||
"input_ue_unconnectable": {}
|
||||
"input_ue_unconnectable": {},
|
||||
"version": "7.8"
|
||||
}
|
||||
},
|
||||
"widgets_values": [
|
||||
"cuda:0",
|
||||
"float16",
|
||||
false
|
||||
"Hello! This is a test of the OmniVoice text-to-speech system.",
|
||||
"voice_cloning",
|
||||
"",
|
||||
"",
|
||||
2,
|
||||
1,
|
||||
32,
|
||||
42,
|
||||
"fixed"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"type": "OmniVoiceVoicePreset",
|
||||
"pos": [
|
||||
32,
|
||||
64,
|
||||
320
|
||||
],
|
||||
"size": [
|
||||
@@ -57,7 +117,7 @@
|
||||
128
|
||||
],
|
||||
"flags": {},
|
||||
"order": 1,
|
||||
"order": 0,
|
||||
"mode": 0,
|
||||
"inputs": [],
|
||||
"outputs": [
|
||||
@@ -66,7 +126,7 @@
|
||||
"type": "AUDIO",
|
||||
"slot_index": 0,
|
||||
"links": [
|
||||
6
|
||||
29
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -74,14 +134,15 @@
|
||||
"type": "STRING",
|
||||
"slot_index": 1,
|
||||
"links": [
|
||||
7
|
||||
30
|
||||
]
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"aux_id": "ethanfel/ComfyUI-Omnivoice",
|
||||
"cnr_id": "comfyui-omnivoice-fel",
|
||||
"ver": "9b62c9bda8469171025709e2fae3ce877ee0f059",
|
||||
"Node name for S&R": "OmniVoiceVoicePreset",
|
||||
"aux_id": "ethanfel/ComfyUI-Omnivoice",
|
||||
"ue_properties": {
|
||||
"widget_ue_connectable": {},
|
||||
"version": "7.8",
|
||||
@@ -93,136 +154,124 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"type": "OmniVoiceGenerate",
|
||||
"id": 1,
|
||||
"type": "OmniVoiceModelLoader",
|
||||
"pos": [
|
||||
416,
|
||||
64,
|
||||
96
|
||||
],
|
||||
"size": [
|
||||
384,
|
||||
448
|
||||
320,
|
||||
160
|
||||
],
|
||||
"flags": {},
|
||||
"order": 1,
|
||||
"mode": 0,
|
||||
"inputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "model",
|
||||
"type": "OMNIVOICE_MODEL",
|
||||
"slot_index": 0,
|
||||
"links": [
|
||||
23
|
||||
]
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"cnr_id": "comfyui-omnivoice-fel",
|
||||
"ver": "9b62c9bda8469171025709e2fae3ce877ee0f059",
|
||||
"Node name for S&R": "OmniVoiceModelLoader",
|
||||
"aux_id": "ethanfel/ComfyUI-Omnivoice",
|
||||
"ue_properties": {
|
||||
"widget_ue_connectable": {},
|
||||
"version": "7.8",
|
||||
"input_ue_unconnectable": {}
|
||||
}
|
||||
},
|
||||
"widgets_values": [
|
||||
"cuda:0",
|
||||
"float32",
|
||||
false
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"type": "OmniVoiceVoiceDesign",
|
||||
"pos": [
|
||||
64,
|
||||
512
|
||||
],
|
||||
"size": [
|
||||
288,
|
||||
288
|
||||
],
|
||||
"flags": {},
|
||||
"order": 2,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "model",
|
||||
"type": "OMNIVOICE_MODEL",
|
||||
"link": 5
|
||||
},
|
||||
{
|
||||
"name": "ref_audio",
|
||||
"shape": 7,
|
||||
"type": "AUDIO",
|
||||
"link": 6
|
||||
},
|
||||
{
|
||||
"name": "ref_text",
|
||||
"shape": 7,
|
||||
"type": "STRING",
|
||||
"widget": {
|
||||
"name": "ref_text"
|
||||
},
|
||||
"link": 7
|
||||
}
|
||||
],
|
||||
"inputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "audio",
|
||||
"type": "AUDIO",
|
||||
"links": [
|
||||
9
|
||||
]
|
||||
"name": "instruct",
|
||||
"type": "STRING",
|
||||
"links": []
|
||||
}
|
||||
],
|
||||
"properties": {
|
||||
"cnr_id": "comfyui-omnivoice-fel",
|
||||
"ver": "340c0aa402fdf8b56fad5eb1559ff901c7fc7cfc",
|
||||
"Node name for S&R": "OmniVoiceVoiceDesign",
|
||||
"ue_properties": {
|
||||
"widget_ue_connectable": {},
|
||||
"input_ue_unconnectable": {}
|
||||
},
|
||||
"aux_id": "ethanfel/ComfyUI-Omnivoice",
|
||||
"ver": "9b62c9bda8469171025709e2fae3ce877ee0f059",
|
||||
"Node name for S&R": "OmniVoiceGenerate"
|
||||
"input_ue_unconnectable": {},
|
||||
"version": "7.8"
|
||||
}
|
||||
},
|
||||
"widgets_values": [
|
||||
"Hello! This is a test of the OmniVoice text-to-speech system.",
|
||||
"voice_cloning",
|
||||
"",
|
||||
"",
|
||||
1,
|
||||
32,
|
||||
42,
|
||||
"fixed"
|
||||
"English",
|
||||
"none",
|
||||
"none",
|
||||
"none",
|
||||
"american accent",
|
||||
"none",
|
||||
"none",
|
||||
"none",
|
||||
"none"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"type": "PreviewAudio",
|
||||
"pos": [
|
||||
832,
|
||||
96
|
||||
],
|
||||
"size": [
|
||||
270,
|
||||
88
|
||||
],
|
||||
"flags": {},
|
||||
"order": 3,
|
||||
"mode": 0,
|
||||
"inputs": [
|
||||
{
|
||||
"name": "audio",
|
||||
"type": "AUDIO",
|
||||
"link": 9
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"properties": {
|
||||
"ue_properties": {
|
||||
"widget_ue_connectable": {},
|
||||
"input_ue_unconnectable": {}
|
||||
},
|
||||
"cnr_id": "comfy-core",
|
||||
"ver": "0.18.1",
|
||||
"Node name for S&R": "PreviewAudio"
|
||||
},
|
||||
"widgets_values": []
|
||||
}
|
||||
],
|
||||
"links": [
|
||||
[
|
||||
5,
|
||||
23,
|
||||
1,
|
||||
0,
|
||||
5,
|
||||
14,
|
||||
0,
|
||||
"OMNIVOICE_MODEL"
|
||||
],
|
||||
[
|
||||
27,
|
||||
14,
|
||||
0,
|
||||
6,
|
||||
0,
|
||||
"AUDIO"
|
||||
],
|
||||
[
|
||||
29,
|
||||
2,
|
||||
0,
|
||||
5,
|
||||
14,
|
||||
1,
|
||||
"AUDIO"
|
||||
],
|
||||
[
|
||||
7,
|
||||
30,
|
||||
2,
|
||||
1,
|
||||
5,
|
||||
14,
|
||||
2,
|
||||
"STRING"
|
||||
],
|
||||
[
|
||||
9,
|
||||
5,
|
||||
0,
|
||||
6,
|
||||
0,
|
||||
"AUDIO"
|
||||
]
|
||||
],
|
||||
"groups": [],
|
||||
@@ -231,8 +280,8 @@
|
||||
"ds": {
|
||||
"scale": 1.201632379715383,
|
||||
"offset": [
|
||||
1340.797529638862,
|
||||
246.44960712937743
|
||||
1415.539606763376,
|
||||
123.61214765199416
|
||||
]
|
||||
},
|
||||
"ue_links": [],
|
||||
|
||||
Reference in New Issue
Block a user