From c8e6b91f67caf6586be3e6161f0ba1d1dd7a9cd0 Mon Sep 17 00:00:00 2001
From: Ethanfel <ethan.fel@ts-pc.fr>
Date: Wed, 8 Apr 2026 17:55:05 +0200
Subject: [PATCH] feat: add alpha_scale_sweep to fix LoRA noise contamination
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previous sweep used alpha=rank (scale=1.0) which at rank 128/256 drowned
base model priors — spectral flatness went from 0.013 (baseline) to 0.094.
This sweep tests alpha dramatically below rank across r16/r32/r128 to find
the scale where LoRA nudges rather than overwrites.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 experiments/alpha_scale_sweep.json | 77 ++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 experiments/alpha_scale_sweep.json

diff --git a/experiments/alpha_scale_sweep.json b/experiments/alpha_scale_sweep.json
new file mode 100644
index 0000000..5a91569
--- /dev/null
+++ b/experiments/alpha_scale_sweep.json
@@ -0,0 +1,77 @@
+{
+  "name": "alpha_scale_sweep",
+  "description": "Fix LoRA noise contamination (flatness 0.013→0.094 at alpha=rank). Root cause: alpha=rank (scale=1.0) at high rank drowns base model priors. Testing dramatically lower alpha to nudge rather than overwrite. All runs at lr=3e-4 (best stable LR from r128_sweet_spot).",
+  "data_dir": "/media/unraid/davinci/Selva/BJ/features",
+  "output_root": "/media/unraid/davinci/Selva/BJ/experiment/alpha_scale_sweep",
+  "base": {
+    "steps": 6000,
+    "lr": 3e-4,
+    "batch_size": 16,
+    "warmup_steps": 200,
+    "grad_accum": 1,
+    "save_every": 2000,
+    "seed": 42,
+    "target": "attn.qkv",
+    "timestep_mode": "uniform",
+    "logit_normal_sigma": 1.0,
+    "curriculum_switch": 0.6,
+    "lora_dropout": 0.0,
+    "lora_plus_ratio": 1.0,
+    "lr_schedule": "constant"
+  },
+  "experiments": [
+
+    {
+      "id": "g1_r16_alpha4",
+      "group": "conservative",
+      "description": "Back to basics: rank=16 alpha=4 (scale=0.25). Small adapter, gentle scale — cleanest possible LoRA signal.",
+      "rank": 16,
+      "alpha": 4.0
+    },
+    {
+      "id": "g1_r16_alpha16",
+      "group": "conservative",
+      "description": "rank=16 alpha=16 (scale=1.0) — the original default. Reference point: is the noise issue rank-specific or universal?",
+      "rank": 16,
+      "alpha": 16.0
+    },
+
+    {
+      "id": "g2_r32_alpha8",
+      "group": "mid",
+      "description": "rank=32 alpha=8 (scale=0.25). More capacity than r16 but still gentle scale.",
+      "rank": 32,
+      "alpha": 8.0
+    },
+    {
+      "id": "g2_r32_alpha32",
+      "group": "mid",
+      "description": "rank=32 alpha=32 (scale=1.0). Same rank, full scale — isolates whether scale or rank is causing noise.",
+      "rank": 32,
+      "alpha": 32.0
+    },
+
+    {
+      "id": "g3_r128_alpha8",
+      "group": "high_rank_low_alpha",
+      "description": "rank=128 alpha=8 (scale=0.0625). High capacity, very gentle contribution — can r128 stay clean at low alpha?",
+      "rank": 128,
+      "alpha": 8.0
+    },
+    {
+      "id": "g3_r128_alpha16",
+      "group": "high_rank_low_alpha",
+      "description": "rank=128 alpha=16 (scale=0.125). Slightly more signal than alpha=8.",
+      "rank": 128,
+      "alpha": 16.0
+    },
+    {
+      "id": "g3_r128_alpha32",
+      "group": "high_rank_low_alpha",
+      "description": "rank=128 alpha=32 (scale=0.25). Same scale as r16_alpha4 and r32_alpha8 — comparable contribution across ranks.",
+      "rank": 128,
+      "alpha": 32.0
+    }
+
+  ]
+}