From 15fc5f07932cc24e692951b320e7a6ec8fd1a848 Mon Sep 17 00:00:00 2001
From: Ethanfel <ethan.fel@ts-pc.fr>
Date: Thu, 9 Apr 2026 15:36:27 +0200
Subject: [PATCH] feat: add SelvaDatasetCompressor node for parallel
 compression

Mild 2:1-3:1 parallel compression via pedalboard.Compressor to reduce
within-clip loudness variance after LUFS normalization. Blend ratio
keeps transients intact while tightening dynamics.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 nodes/__init__.py               |  1 +
 nodes/selva_dataset_pipeline.py | 79 +++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/nodes/__init__.py b/nodes/__init__.py
index 1e96234..d03d60e 100644
--- a/nodes/__init__.py
+++ b/nodes/__init__.py
@@ -25,6 +25,7 @@ _NODES = {
     "SelvaDatasetLoader":          (".selva_dataset_pipeline", "SelvaDatasetLoader",          "SelVA Dataset Loader"),
     "SelvaDatasetResampler":       (".selva_dataset_pipeline", "SelvaDatasetResampler",       "SelVA Dataset Resampler"),
     "SelvaDatasetLUFSNormalizer":  (".selva_dataset_pipeline", "SelvaDatasetLUFSNormalizer",  "SelVA Dataset LUFS Normalizer"),
+    "SelvaDatasetCompressor":      (".selva_dataset_pipeline", "SelvaDatasetCompressor",      "SelVA Dataset Compressor"),
     "SelvaDatasetInspector":       (".selva_dataset_pipeline", "SelvaDatasetInspector",       "SelVA Dataset Inspector"),
     "SelvaDatasetItemExtractor":   (".selva_dataset_pipeline", "SelvaDatasetItemExtractor",   "SelVA Dataset Item Extractor"),
     "SelvaDatasetSaver":           (".selva_dataset_pipeline", "SelvaDatasetSaver",           "SelVA Dataset Saver"),
diff --git a/nodes/selva_dataset_pipeline.py b/nodes/selva_dataset_pipeline.py
index 66e3e6f..9d41391 100644
--- a/nodes/selva_dataset_pipeline.py
+++ b/nodes/selva_dataset_pipeline.py
@@ -7,6 +7,8 @@ Typical chain:
       ↓ AUDIO_DATASET
   SelvaDatasetLUFSNormalizer  (optional)
       ↓ AUDIO_DATASET
+  SelvaDatasetCompressor      (optional)
+      ↓ AUDIO_DATASET
   SelvaDatasetInspector       (optional)
       ↓ AUDIO_DATASET  +  STRING report
   SelvaDatasetItemExtractor   → AUDIO (bridges to save/preview nodes)
@@ -201,6 +203,83 @@ class SelvaDatasetLUFSNormalizer:
         return (out,)
 
 
+class SelvaDatasetCompressor:
+    """Apply mild parallel compression to reduce within-clip loudness variance.
+
+    Uses pedalboard.Compressor (2:1–3:1 ratio). Parallel (New York) style:
+    blends compressed signal with dry so transients are preserved while
+    the dynamic range is gently tightened. Apply after LUFS normalization.
+    """
+
+    @classmethod
+    def INPUT_TYPES(cls):
+        return {
+            "required": {
+                "dataset":        (AUDIO_DATASET,),
+                "threshold_db":   ("FLOAT", {
+                    "default": -18.0, "min": -40.0, "max": -6.0, "step": 1.0,
+                    "tooltip": "Compression kicks in above this level. -18 dB is a safe starting point after LUFS normalization.",
+                }),
+                "ratio":          ("FLOAT", {
+                    "default": 2.5, "min": 1.5, "max": 4.0, "step": 0.5,
+                    "tooltip": "Compression ratio. 2:1–3:1 is mild; stay below 4:1 to avoid pumping.",
+                }),
+                "attack_ms":      ("FLOAT", {
+                    "default": 10.0, "min": 1.0, "max": 100.0, "step": 1.0,
+                    "tooltip": "Attack time in ms. Slower attack preserves transients.",
+                }),
+                "release_ms":     ("FLOAT", {
+                    "default": 100.0, "min": 20.0, "max": 500.0, "step": 10.0,
+                    "tooltip": "Release time in ms.",
+                }),
+                "mix":            ("FLOAT", {
+                    "default": 0.4, "min": 0.0, "max": 1.0, "step": 0.05,
+                    "tooltip": "Parallel blend: 0.0 = dry only, 1.0 = fully compressed. 0.3–0.5 is typical.",
+                }),
+            }
+        }
+
+    RETURN_TYPES  = (AUDIO_DATASET,)
+    RETURN_NAMES  = ("dataset",)
+    FUNCTION      = "compress"
+    CATEGORY      = SELVA_CATEGORY
+    DESCRIPTION   = (
+        "Mild parallel compression to reduce within-clip dynamic range. "
+        "Blends compressed signal with dry at the given mix ratio. "
+        "Apply after LUFS normalization."
+    )
+
+    def compress(self, dataset, threshold_db: float, ratio: float,
+                 attack_ms: float, release_ms: float, mix: float):
+        from pedalboard import Compressor, Pedalboard
+
+        board = Pedalboard([Compressor(
+            threshold_db=threshold_db,
+            ratio=ratio,
+            attack_ms=attack_ms,
+            release_ms=release_ms,
+        )])
+
+        out = []
+        for item in dataset:
+            wav = item["waveform"][0]   # [C, L]
+            sr  = item["sample_rate"]
+
+            # pedalboard expects [C, L] float32 numpy
+            wav_np = wav.float().numpy()                        # [C, L]
+            compressed = board(wav_np, sr)                      # [C, L]
+            mixed = (1.0 - mix) * wav_np + mix * compressed
+            wav_out = torch.from_numpy(mixed).unsqueeze(0)      # [1, C, L]
+            out.append({"waveform": wav_out, "sample_rate": sr, "name": item["name"]})
+
+        print(
+            f"[DatasetCompressor] {len(out)} clips compressed  "
+            f"thr={threshold_db}dB  ratio={ratio}:1  mix={mix:.0%}",
+            flush=True,
+        )
+        return (out,)
+
+
 def _check_hf_shelf(wav: torch.Tensor, sr: int) -> bool:
     """Return True if clip looks codec-compressed (hard HF shelf above 15 kHz).