feat: auto-discover user presets from the presets folder
Drop any audio file (wav/flac/mp3/ogg/m4a) into the presets cache dir and it will appear as "<name> (local)" in the Voice Preset dropdown on next ComfyUI restart. Add a same-stem .txt file for the transcript. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+48
-11
@@ -53,14 +53,45 @@ PRESETS = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def _load_audio(url):
|
_AUDIO_EXTS = {".wav", ".flac", ".mp3", ".ogg", ".m4a"}
|
||||||
"""Download (once) and return (waveform_tensor, sample_rate)."""
|
_BUILTIN_FILES = frozenset(os.path.basename(url.split("?")[0]) for url, _ in PRESETS.values())
|
||||||
|
|
||||||
|
|
||||||
|
def _scan_user_presets():
|
||||||
|
"""Return a dict of user presets found in _CACHE_DIR.
|
||||||
|
|
||||||
|
For each audio file that is not a cached built-in, look for a same-stem
|
||||||
|
.txt file for the transcript. Key format: "<stem> (local)".
|
||||||
|
"""
|
||||||
|
user = {}
|
||||||
|
if not os.path.isdir(_CACHE_DIR):
|
||||||
|
return user
|
||||||
|
for fname in sorted(os.listdir(_CACHE_DIR)):
|
||||||
|
stem, ext = os.path.splitext(fname)
|
||||||
|
if ext.lower() not in _AUDIO_EXTS or fname in _BUILTIN_FILES:
|
||||||
|
continue
|
||||||
|
audio_path = os.path.join(_CACHE_DIR, fname)
|
||||||
|
txt_path = os.path.join(_CACHE_DIR, stem + ".txt")
|
||||||
|
transcript = ""
|
||||||
|
if os.path.exists(txt_path):
|
||||||
|
with open(txt_path, "r", encoding="utf-8") as f:
|
||||||
|
transcript = f.read().strip()
|
||||||
|
user[f"{stem} (local)"] = (audio_path, transcript)
|
||||||
|
return user
|
||||||
|
|
||||||
|
|
||||||
|
def _load_audio(source):
|
||||||
|
"""Load audio from a URL (downloading once) or a local file path."""
|
||||||
os.makedirs(_CACHE_DIR, exist_ok=True)
|
os.makedirs(_CACHE_DIR, exist_ok=True)
|
||||||
filename = os.path.basename(url.split("?")[0])
|
if source.startswith("http://") or source.startswith("https://"):
|
||||||
cache_path = os.path.join(_CACHE_DIR, filename)
|
filename = os.path.basename(source.split("?")[0])
|
||||||
if not os.path.exists(cache_path):
|
cache_path = os.path.join(_CACHE_DIR, filename)
|
||||||
urllib.request.urlretrieve(url, cache_path)
|
if not os.path.exists(cache_path):
|
||||||
audio_np, sr = sf.read(cache_path, dtype="float32")
|
urllib.request.urlretrieve(source, cache_path)
|
||||||
|
path = cache_path
|
||||||
|
else:
|
||||||
|
path = source
|
||||||
|
audio_np, sr = sf.read(path, dtype="float32")
|
||||||
if audio_np.ndim == 1:
|
if audio_np.ndim == 1:
|
||||||
audio_np = audio_np[np.newaxis, :] # (1, samples)
|
audio_np = audio_np[np.newaxis, :] # (1, samples)
|
||||||
else:
|
else:
|
||||||
@@ -72,15 +103,20 @@ def _load_audio(url):
|
|||||||
class OmniVoiceVoicePreset:
|
class OmniVoiceVoicePreset:
|
||||||
@classmethod
|
@classmethod
|
||||||
def INPUT_TYPES(cls):
|
def INPUT_TYPES(cls):
|
||||||
|
all_presets = {**PRESETS, **_scan_user_presets()}
|
||||||
return {
|
return {
|
||||||
"required": {
|
"required": {
|
||||||
"preset": (
|
"preset": (
|
||||||
list(PRESETS.keys()),
|
list(all_presets.keys()),
|
||||||
{
|
{
|
||||||
"tooltip": (
|
"tooltip": (
|
||||||
"Pre-fetched reference voice for OmniVoice Generate.\n"
|
"Pre-fetched reference voice for OmniVoice Generate.\n"
|
||||||
"Connect ref_audio → ref_audio and ref_text → ref_text.\n"
|
"Connect ref_audio → ref_audio and ref_text → ref_text.\n"
|
||||||
"If ref_text is blank, connect a Whisper node to supply the transcript."
|
"\n"
|
||||||
|
"To add your own presets, drop audio files into:\n"
|
||||||
|
f" {_CACHE_DIR}\n"
|
||||||
|
"Add a same-name .txt file alongside for the transcript.\n"
|
||||||
|
"Restart ComfyUI to pick up new files."
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
@@ -93,6 +129,7 @@ class OmniVoiceVoicePreset:
|
|||||||
CATEGORY = "OmniVoice"
|
CATEGORY = "OmniVoice"
|
||||||
|
|
||||||
def load_preset(self, preset):
|
def load_preset(self, preset):
|
||||||
url, transcript = PRESETS[preset]
|
all_presets = {**PRESETS, **_scan_user_presets()}
|
||||||
waveform, sr = _load_audio(url)
|
source, transcript = all_presets[preset]
|
||||||
|
waveform, sr = _load_audio(source)
|
||||||
return ({"waveform": waveform, "sample_rate": sr}, transcript)
|
return ({"waveform": waveform, "sample_rate": sr}, transcript)
|
||||||
|
|||||||
Reference in New Issue
Block a user