From 2c71d4c1847ed22e273470a646470424af16552a Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Thu, 9 Apr 2026 14:09:43 +0200 Subject: [PATCH] feat: add SelvaDatasetLoader node Co-Authored-By: Claude Sonnet 4.6 --- nodes/selva_dataset_pipeline.py | 42 +++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/nodes/selva_dataset_pipeline.py b/nodes/selva_dataset_pipeline.py index d25f4e1..d1d78dd 100644 --- a/nodes/selva_dataset_pipeline.py +++ b/nodes/selva_dataset_pipeline.py @@ -24,3 +24,45 @@ from .utils import SELVA_CATEGORY AUDIO_DATASET = "AUDIO_DATASET" _AUDIO_EXTS = {".wav", ".flac", ".mp3", ".ogg", ".aac", ".m4a"} + + +class SelvaDatasetLoader: + """Load all audio files in a folder into an in-memory AUDIO_DATASET.""" + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "folder": ("STRING", { + "default": "", + "tooltip": "Absolute path to folder containing audio files. Searched recursively.", + }), + } + } + + RETURN_TYPES = (AUDIO_DATASET,) + RETURN_NAMES = ("dataset",) + FUNCTION = "load" + CATEGORY = SELVA_CATEGORY + DESCRIPTION = "Load all audio files from a folder into memory as an AUDIO_DATASET." + + def load(self, folder: str): + folder = Path(folder.strip()) + if not folder.exists(): + raise FileNotFoundError(f"[DatasetLoader] Folder not found: {folder}") + + files = [f for f in folder.rglob("*") if f.suffix.lower() in _AUDIO_EXTS] + if not files: + raise RuntimeError(f"[DatasetLoader] No audio files found in {folder}") + + dataset = [] + for f in sorted(files): + try: + wav, sr = torchaudio.load(str(f)) # [C, L] + wav = wav.unsqueeze(0).float() # [1, C, L] + dataset.append({"waveform": wav, "sample_rate": sr, "name": f.stem}) + except Exception as e: + print(f"[DatasetLoader] Skipping {f.name}: {e}", flush=True) + + print(f"[DatasetLoader] Loaded {len(dataset)} clips from {folder}", flush=True) + return (dataset,)