Add OmniVoice Voice Preset node with two female voice samples
Two built-in presets, auto-downloaded and cached to ComfyUI/models/omnivoice/presets/: - "Nature – female, warm" (F5-TTS basic_ref_en.wav, transcript included) - "Shadowheart – female, expressive" (Chatterbox demo, connect Whisper for transcript) Outputs ref_audio (AUDIO) and ref_text (STRING) — wire directly into OmniVoice Generate. Updated default workflow to use this node. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+3
-1
@@ -1,15 +1,17 @@
|
|||||||
from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate, OmniVoiceEpubLoader
|
from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate, OmniVoiceEpubLoader, OmniVoiceVoicePreset
|
||||||
|
|
||||||
NODE_CLASS_MAPPINGS = {
|
NODE_CLASS_MAPPINGS = {
|
||||||
"OmniVoiceModelLoader": OmniVoiceModelLoader,
|
"OmniVoiceModelLoader": OmniVoiceModelLoader,
|
||||||
"OmniVoiceGenerate": OmniVoiceGenerate,
|
"OmniVoiceGenerate": OmniVoiceGenerate,
|
||||||
"OmniVoiceEpubLoader": OmniVoiceEpubLoader,
|
"OmniVoiceEpubLoader": OmniVoiceEpubLoader,
|
||||||
|
"OmniVoiceVoicePreset": OmniVoiceVoicePreset,
|
||||||
}
|
}
|
||||||
|
|
||||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||||
"OmniVoiceModelLoader": "OmniVoice Model Loader",
|
"OmniVoiceModelLoader": "OmniVoice Model Loader",
|
||||||
"OmniVoiceGenerate": "OmniVoice Generate",
|
"OmniVoiceGenerate": "OmniVoice Generate",
|
||||||
"OmniVoiceEpubLoader": "OmniVoice EPUB Loader",
|
"OmniVoiceEpubLoader": "OmniVoice EPUB Loader",
|
||||||
|
"OmniVoiceVoicePreset": "OmniVoice Voice Preset",
|
||||||
}
|
}
|
||||||
|
|
||||||
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
|
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
|
||||||
|
|||||||
+2
-1
@@ -1,5 +1,6 @@
|
|||||||
from .loader import OmniVoiceModelLoader
|
from .loader import OmniVoiceModelLoader
|
||||||
from .generator import OmniVoiceGenerate
|
from .generator import OmniVoiceGenerate
|
||||||
from .epub_loader import OmniVoiceEpubLoader
|
from .epub_loader import OmniVoiceEpubLoader
|
||||||
|
from .voice_presets import OmniVoiceVoicePreset
|
||||||
|
|
||||||
__all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate", "OmniVoiceEpubLoader"]
|
__all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate", "OmniVoiceEpubLoader", "OmniVoiceVoicePreset"]
|
||||||
|
|||||||
@@ -0,0 +1,69 @@
|
|||||||
|
import os
|
||||||
|
import urllib.request
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
import soundfile as sf
|
||||||
|
|
||||||
|
try:
|
||||||
|
import folder_paths
|
||||||
|
_CACHE_DIR = os.path.join(folder_paths.models_dir, "omnivoice", "presets")
|
||||||
|
except ImportError:
|
||||||
|
_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".cache", "omnivoice", "presets")
|
||||||
|
|
||||||
|
# Each entry: (display_name, url, transcript)
|
||||||
|
# transcript="" means unknown — connect a Whisper node to ref_text to fill it.
|
||||||
|
PRESETS = {
|
||||||
|
"Nature – female, warm (F5-TTS ref)": (
|
||||||
|
"https://raw.githubusercontent.com/SWivid/F5-TTS/main/src/f5_tts/infer/examples/basic/basic_ref_en.wav",
|
||||||
|
"Some call me nature, others call me mother nature.",
|
||||||
|
),
|
||||||
|
"Shadowheart – female, expressive (Chatterbox ref)": (
|
||||||
|
"https://storage.googleapis.com/chatterbox-demo-samples/prompts/female_shadowheart4.flac",
|
||||||
|
"", # transcript unknown — connect Whisper node to ref_text
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _load_audio(url):
|
||||||
|
"""Download (once) and return (waveform_tensor, sample_rate)."""
|
||||||
|
os.makedirs(_CACHE_DIR, exist_ok=True)
|
||||||
|
filename = os.path.basename(url.split("?")[0])
|
||||||
|
cache_path = os.path.join(_CACHE_DIR, filename)
|
||||||
|
if not os.path.exists(cache_path):
|
||||||
|
urllib.request.urlretrieve(url, cache_path)
|
||||||
|
audio_np, sr = sf.read(cache_path, dtype="float32")
|
||||||
|
if audio_np.ndim == 1:
|
||||||
|
audio_np = audio_np[np.newaxis, :] # (1, samples)
|
||||||
|
else:
|
||||||
|
audio_np = audio_np.T # (channels, samples)
|
||||||
|
waveform = torch.from_numpy(audio_np).unsqueeze(0) # (1, channels, samples)
|
||||||
|
return waveform, sr
|
||||||
|
|
||||||
|
|
||||||
|
class OmniVoiceVoicePreset:
|
||||||
|
@classmethod
|
||||||
|
def INPUT_TYPES(cls):
|
||||||
|
return {
|
||||||
|
"required": {
|
||||||
|
"preset": (
|
||||||
|
list(PRESETS.keys()),
|
||||||
|
{
|
||||||
|
"tooltip": (
|
||||||
|
"Pre-fetched reference voice for OmniVoice Generate.\n"
|
||||||
|
"Connect ref_audio → ref_audio and ref_text → ref_text.\n"
|
||||||
|
"If ref_text is blank, connect a Whisper node to supply the transcript."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
RETURN_TYPES = ("AUDIO", "STRING")
|
||||||
|
RETURN_NAMES = ("ref_audio", "ref_text")
|
||||||
|
FUNCTION = "load_preset"
|
||||||
|
CATEGORY = "OmniVoice"
|
||||||
|
|
||||||
|
def load_preset(self, preset):
|
||||||
|
url, transcript = PRESETS[preset]
|
||||||
|
waveform, sr = _load_audio(url)
|
||||||
|
return ({"waveform": waveform, "sample_rate": sr}, transcript)
|
||||||
@@ -1,84 +1,57 @@
|
|||||||
{
|
{
|
||||||
"last_node_id": 4,
|
"last_node_id": 4,
|
||||||
"last_link_id": 3,
|
"last_link_id": 4,
|
||||||
"nodes": [
|
"nodes": [
|
||||||
{
|
{
|
||||||
"id": 1,
|
"id": 1,
|
||||||
"type": "OmniVoiceModelLoader",
|
"type": "OmniVoiceModelLoader",
|
||||||
"pos": [40, 80],
|
"pos": [40, 80],
|
||||||
"size": {"0": 320, "1": 130},
|
"size": {"0": 300, "1": 100},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 0,
|
"order": 0,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{"name": "model", "type": "OMNIVOICE_MODEL", "links": [1], "shape": 3, "slot_index": 0}
|
||||||
"name": "model",
|
|
||||||
"type": "OMNIVOICE_MODEL",
|
|
||||||
"links": [1],
|
|
||||||
"shape": 3,
|
|
||||||
"slot_index": 0
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
"properties": {"Node name for S&R": "OmniVoiceModelLoader"},
|
"properties": {"Node name for S&R": "OmniVoiceModelLoader"},
|
||||||
"widgets_values": ["cuda:0", "float16"]
|
"widgets_values": ["cuda:0", "float16"]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 2,
|
"id": 2,
|
||||||
"type": "LoadAudio",
|
"type": "OmniVoiceVoicePreset",
|
||||||
"pos": [40, 280],
|
"pos": [40, 240],
|
||||||
"size": {"0": 320, "1": 76},
|
"size": {"0": 300, "1": 80},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 1,
|
"order": 1,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{"name": "ref_audio", "type": "AUDIO", "links": [2], "shape": 3, "slot_index": 0},
|
||||||
"name": "AUDIO",
|
{"name": "ref_text", "type": "STRING", "links": [3], "shape": 3, "slot_index": 1}
|
||||||
"type": "AUDIO",
|
|
||||||
"links": [2],
|
|
||||||
"shape": 3,
|
|
||||||
"slot_index": 0
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
"properties": {"Node name for S&R": "LoadAudio"},
|
"properties": {"Node name for S&R": "OmniVoiceVoicePreset"},
|
||||||
"widgets_values": ["reference_voice.wav"]
|
"widgets_values": ["Nature – female, warm (F5-TTS ref)"]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 3,
|
"id": 3,
|
||||||
"type": "OmniVoiceGenerate",
|
"type": "OmniVoiceGenerate",
|
||||||
"pos": [440, 80],
|
"pos": [420, 80],
|
||||||
"size": {"0": 380, "1": 310},
|
"size": {"0": 380, "1": 310},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 2,
|
"order": 2,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [
|
"inputs": [
|
||||||
{
|
{"name": "model", "type": "OMNIVOICE_MODEL", "link": 1, "slot_index": 0},
|
||||||
"name": "model",
|
{"name": "ref_audio", "type": "AUDIO", "link": 2, "slot_index": 1},
|
||||||
"type": "OMNIVOICE_MODEL",
|
{"name": "ref_text", "type": "STRING", "link": 3, "slot_index": 2}
|
||||||
"link": 1,
|
|
||||||
"slot_index": 0
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ref_audio",
|
|
||||||
"type": "AUDIO",
|
|
||||||
"link": 2,
|
|
||||||
"slot_index": 1
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{"name": "audio", "type": "AUDIO", "links": [4], "shape": 3, "slot_index": 0}
|
||||||
"name": "audio",
|
|
||||||
"type": "AUDIO",
|
|
||||||
"links": [3],
|
|
||||||
"shape": 3,
|
|
||||||
"slot_index": 0
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
"properties": {"Node name for S&R": "OmniVoiceGenerate"},
|
"properties": {"Node name for S&R": "OmniVoiceGenerate"},
|
||||||
"widgets_values": [
|
"widgets_values": [
|
||||||
"Hello! Connect a Whisper node to ref_text for best results.",
|
"Hello! This is a test of the OmniVoice text-to-speech system.",
|
||||||
"voice_cloning",
|
"voice_cloning",
|
||||||
"",
|
"",
|
||||||
"",
|
|
||||||
1.0,
|
1.0,
|
||||||
32
|
32
|
||||||
]
|
]
|
||||||
@@ -86,18 +59,13 @@
|
|||||||
{
|
{
|
||||||
"id": 4,
|
"id": 4,
|
||||||
"type": "SaveAudio",
|
"type": "SaveAudio",
|
||||||
"pos": [900, 80],
|
"pos": [880, 80],
|
||||||
"size": {"0": 320, "1": 100},
|
"size": {"0": 300, "1": 100},
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 3,
|
"order": 3,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [
|
"inputs": [
|
||||||
{
|
{"name": "audio", "type": "AUDIO", "link": 4, "slot_index": 0}
|
||||||
"name": "audio",
|
|
||||||
"type": "AUDIO",
|
|
||||||
"link": 3,
|
|
||||||
"slot_index": 0
|
|
||||||
}
|
|
||||||
],
|
],
|
||||||
"properties": {"Node name for S&R": "SaveAudio"},
|
"properties": {"Node name for S&R": "SaveAudio"},
|
||||||
"widgets_values": ["omnivoice"]
|
"widgets_values": ["omnivoice"]
|
||||||
@@ -106,12 +74,11 @@
|
|||||||
"links": [
|
"links": [
|
||||||
[1, 1, 0, 3, 0, "OMNIVOICE_MODEL"],
|
[1, 1, 0, 3, 0, "OMNIVOICE_MODEL"],
|
||||||
[2, 2, 0, 3, 1, "AUDIO"],
|
[2, 2, 0, 3, 1, "AUDIO"],
|
||||||
[3, 3, 0, 4, 0, "AUDIO"]
|
[3, 2, 1, 3, 2, "STRING"],
|
||||||
|
[4, 3, 0, 4, 0, "AUDIO"]
|
||||||
],
|
],
|
||||||
"groups": [],
|
"groups": [],
|
||||||
"config": {},
|
"config": {},
|
||||||
"extra": {
|
"extra": {"ds": {"scale": 0.9, "offset": [0, 0]}},
|
||||||
"ds": {"scale": 0.9, "offset": [0, 0]}
|
|
||||||
},
|
|
||||||
"version": 0.4
|
"version": 0.4
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user