feat: thorough overnight sweep + dataset browser updates

- Dataset browser: audio/features now resolve through features/ subdir - tier1_sweep.json: update data_dir to BJ dataset path - tier1_thorough.json: 12-experiment overnight sweep across 4 groups (rank 16/32/64, alpha scaling, LoRA+/dropout/curriculum isolation, full Tier 1 stack at r16 and r64) — output to BJ/experiment/ Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-08 00:38:19 +02:00
parent 675644189d
commit 0000878e76
3 changed files with 126 additions and 11 deletions
@@ -1,10 +1,10 @@
 {
  "name": "tier1_sweep",
  "description": "Ablation of Tier 1 improvements: LoRA+, dropout, curriculum sampling. Baseline = uniform, no regularisation.",
-  "data_dir": "dataset/my_sound",
+  "data_dir": "/media/unraid/davinci/Selva/BJ",
  "output_root": "lora_sweeps/tier1_sweep",
  "base": {
-    "steps": 2000,
+    "steps": 4000,
    "rank": 16,
    "alpha": 0.0,
    "lr": 1e-4,
@@ -32,7 +32,7 @@
    },
    {
      "id": "dropout_0.05",
-      "description": "LoRA dropout 0.05 only. Light regularisation for 10-clip dataset.",
+      "description": "LoRA dropout 0.05 only. Light regularisation for 49-clip dataset.",
      "lora_dropout": 0.05
    },
    {
@@ -42,7 +42,7 @@
    },
    {
      "id": "curriculum",
-      "description": "Curriculum sampling only: logit_normal for steps 1-1200, then uniform. Should improve convergence vs pure uniform.",
+      "description": "Curriculum sampling only: logit_normal for steps 1-2400, then uniform. Should improve convergence vs pure uniform.",
      "timestep_mode": "curriculum"
    },
    {
@@ -51,6 +51,11 @@
      "lora_plus_ratio": 16.0,
      "lora_dropout": 0.05,
      "timestep_mode": "curriculum"
+    },
+    {
+      "id": "rank_64",
+      "description": "Rank 64 baseline — MMAudio LoRA guide default. More expressive adapter for 49-clip dataset.",
+      "rank": 64
    }
  ]
 }
@@ -0,0 +1,103 @@
+{
+  "name": "tier1_thorough",
+  "description": "Full overnight Tier 1 ablation on 49-clip BJ dataset. 4 groups: rank, alpha, regularisation, and best combinations. ~10-12h depending on GPU.",
+  "data_dir": "/media/unraid/davinci/Selva/BJ",
+  "output_root": "/media/unraid/davinci/Selva/BJ/experiment/tier1_thorough",
+  "base": {
+    "steps": 4000,
+    "rank": 16,
+    "alpha": 0.0,
+    "lr": 1e-4,
+    "batch_size": 16,
+    "warmup_steps": 100,
+    "grad_accum": 1,
+    "save_every": 1000,
+    "seed": 42,
+    "target": "attn.qkv",
+    "timestep_mode": "uniform",
+    "logit_normal_sigma": 1.0,
+    "curriculum_switch": 0.6,
+    "lora_dropout": 0.0,
+    "lora_plus_ratio": 1.0
+  },
+  "experiments": [
+    {
+      "id": "g1_rank_16",
+      "group": "rank",
+      "description": "Rank 16 baseline — reference point for all groups."
+    },
+    {
+      "id": "g1_rank_32",
+      "group": "rank",
+      "description": "Rank 32 — midpoint. Does doubling rank improve quality without overfitting?",
+      "rank": 32
+    },
+    {
+      "id": "g1_rank_64",
+      "group": "rank",
+      "description": "Rank 64 — MMAudio LoRA guide default. Maximum expressiveness at 49 clips.",
+      "rank": 64
+    },
+    {
+      "id": "g2_alpha_half_r16",
+      "group": "alpha",
+      "description": "Alpha=8 with rank 16 (scale=0.5). Reduces intruder singular dimensions (arXiv:2410.21228).",
+      "alpha": 8.0
+    },
+    {
+      "id": "g2_alpha_half_r64",
+      "group": "alpha",
+      "description": "Alpha=32 with rank 64 (scale=0.5). Best-practice scaling for high-rank adapters.",
+      "rank": 64,
+      "alpha": 32.0
+    },
+    {
+      "id": "g3_lora_plus_4",
+      "group": "regularisation",
+      "description": "LoRA+ ratio=4 — conservative asymmetric LR. Lower bound for the technique.",
+      "lora_plus_ratio": 4.0
+    },
+    {
+      "id": "g3_lora_plus_16",
+      "group": "regularisation",
+      "description": "LoRA+ ratio=16 — standard from FLUX LoRA literature. Faster early convergence.",
+      "lora_plus_ratio": 16.0
+    },
+    {
+      "id": "g3_dropout_0.05",
+      "group": "regularisation",
+      "description": "LoRA dropout 0.05 only. Light sparsity regularisation (arXiv:2404.09610).",
+      "lora_dropout": 0.05
+    },
+    {
+      "id": "g3_dropout_0.1",
+      "group": "regularisation",
+      "description": "LoRA dropout 0.1 only. Stronger regularisation — may prevent overfitting past step 2000.",
+      "lora_dropout": 0.1
+    },
+    {
+      "id": "g3_curriculum",
+      "group": "regularisation",
+      "description": "Curriculum sampling only: logit_normal steps 1-2400, then uniform (arXiv:2603.12517).",
+      "timestep_mode": "curriculum"
+    },
+    {
+      "id": "g4_full_r16",
+      "group": "combined",
+      "description": "All Tier 1 at rank 16: LoRA+ 16 + dropout 0.05 + curriculum.",
+      "lora_plus_ratio": 16.0,
+      "lora_dropout": 0.05,
+      "timestep_mode": "curriculum"
+    },
+    {
+      "id": "g4_full_r64",
+      "group": "combined",
+      "description": "All Tier 1 at rank 64 + alpha=32. Best expressiveness + best regularisation.",
+      "rank": 64,
+      "alpha": 32.0,
+      "lora_plus_ratio": 16.0,
+      "lora_dropout": 0.05,
+      "timestep_mode": "curriculum"
+    }
+  ]
+}
@@ -43,9 +43,9 @@ class SelvaDatasetBrowser:
    RETURN_NAMES  = ("video_path", "audio_wav", "audio_flac", "features_path", "frames_dir", "mask_dir", "label", "max_index")
    OUTPUT_TOOLTIPS = (
        "path + '.mp4'",
-        "path + '.wav'",
-        "path + '.flac'",
-        "path + '.npz'  (pre-extracted SelVA features)",
+        "features/ + name + '.wav'",
+        "features/ + name + '.flac'",
+        "features/ + name + '.npz'  (pre-extracted SelVA features)",
        "path  (image-sequence directory)",
        "path + '_mask'  (mask image-sequence directory)",
        "Text label for this clip",
@@ -76,12 +76,19 @@ class SelvaDatasetBrowser:
            raise ValueError(f"[SelVA Dataset Browser] Expected a non-empty JSON array in {p}")

        count = len(data)
-        index = max(0, min(index, count - 1))   # clamp silently
+        if index >= count:
+            raise IndexError(
+                f"[SelVA Dataset Browser] index {index} is out of range "
+                f"(dataset has {count} entries, last index is {count - 1})"
+            )
        entry = data[index]

        base  = entry["path"]
        label = entry.get("label", "")

+        p_base    = Path(base)
+        feat_base = str(p_base.parent / "features" / p_base.name)
+
        print(
            f"[SelVA Dataset Browser] {index + 1}/{count}  label='{label}'  base={base}",
            flush=True,
@@ -89,9 +96,9 @@ class SelvaDatasetBrowser:

        return (
            base + ".mp4",
-            base + ".wav",
-            base + ".flac",
-            base + ".npz",
+            feat_base + ".wav",
+            feat_base + ".flac",
+            feat_base + ".npz",
            base,
            base + "_mask",
            label,