From d25df10aa5b0b4b9f677690301ed9404248718cd Mon Sep 17 00:00:00 2001
From: Ethanfel <ethan.fel@ts-pc.fr>
Date: Thu, 9 Apr 2026 14:05:31 +0200
Subject: [PATCH] feat: add audio dataset pipeline skeleton

---
 nodes/selva_dataset_pipeline.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 nodes/selva_dataset_pipeline.py

diff --git a/nodes/selva_dataset_pipeline.py b/nodes/selva_dataset_pipeline.py
new file mode 100644
index 0000000..d25f4e1
--- /dev/null
+++ b/nodes/selva_dataset_pipeline.py
@@ -0,0 +1,26 @@
+"""SelVA Audio Dataset Pipeline — chainable in-memory preprocessing nodes.
+
+Typical chain:
+  SelvaDatasetLoader
+      ↓ AUDIO_DATASET
+  SelvaDatasetResampler       (optional)
+      ↓ AUDIO_DATASET
+  SelvaDatasetLUFSNormalizer  (optional)
+      ↓ AUDIO_DATASET
+  SelvaDatasetInspector       (optional)
+      ↓ AUDIO_DATASET  +  STRING report
+  SelvaDatasetItemExtractor   → AUDIO (bridges to save/preview nodes)
+"""
+
+from pathlib import Path
+
+import numpy as np
+import torch
+import torchaudio
+
+from .utils import SELVA_CATEGORY
+
+# ComfyUI custom type name — passed between all dataset pipeline nodes
+AUDIO_DATASET = "AUDIO_DATASET"
+
+_AUDIO_EXTS = {".wav", ".flac", ".mp3", ".ogg", ".aac", ".m4a"}