feat: add PiSSA/rsLoRA support to scheduler and PiSSA sweep experiment

Thread init_mode and use_rslora through the scheduler's config parsing, experiment record, and _train_inner call. Default alpha changed to 2*rank to match trainer. Add pissa_sweep.json with 7 experiments ablating PiSSA init vs standard, rsLoRA scaling, and learning rate variations at rank 128. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-09 22:07:27 +02:00
parent 784fb2753f
commit e16480b4c9
2 changed files with 69 additions and 2 deletions
@@ -0,0 +1,62 @@
+{
+  "name": "pissa_sweep",
+  "description": "PiSSA vs standard init ablation at rank 128. Best prior config (lr=3e-4, bs=16, 10k steps) as baseline. PiSSA starts on-manifold via SVD init — should eliminate intruder dimensions. rsLoRA stabilises scaling at high rank.",
+  "data_dir": "/media/unraid/davinci/Selva/BJ/features",
+  "output_root": "/media/unraid/davinci/Selva/BJ/experiment/pissa_sweep",
+  "base": {
+    "steps": 10000,
+    "rank": 128,
+    "alpha": 0.0,
+    "lr": 3e-4,
+    "batch_size": 16,
+    "warmup_steps": 200,
+    "grad_accum": 1,
+    "save_every": 2000,
+    "seed": 42,
+    "target": "attn.qkv",
+    "timestep_mode": "uniform",
+    "lora_dropout": 0.0,
+    "lora_plus_ratio": 1.0,
+    "lr_schedule": "constant",
+    "init_mode": "pissa",
+    "use_rslora": true
+  },
+  "experiments": [
+    {
+      "id": "standard_baseline",
+      "description": "Standard Kaiming init + classic alpha/rank scaling. Replicates best prior config for A/B comparison.",
+      "init_mode": "standard",
+      "use_rslora": false
+    },
+    {
+      "id": "pissa_rslora",
+      "description": "PiSSA init + rsLoRA scaling. Full Tier-S config. Should start on-manifold and avoid intruder dimensions."
+    },
+    {
+      "id": "pissa_classic_scale",
+      "description": "PiSSA init + classic alpha/rank scaling. Isolates PiSSA contribution from rsLoRA.",
+      "use_rslora": false
+    },
+    {
+      "id": "standard_rslora",
+      "description": "Standard init + rsLoRA only. Isolates rsLoRA contribution from PiSSA.",
+      "init_mode": "standard"
+    },
+    {
+      "id": "pissa_rslora_lr1e-4",
+      "description": "PiSSA+rsLoRA at lower lr=1e-4. PiSSA starts closer to optimum — may need less aggressive lr.",
+      "lr": 1e-4
+    },
+    {
+      "id": "pissa_rslora_lr5e-4",
+      "description": "PiSSA+rsLoRA at higher lr=5e-4. Test if on-manifold start tolerates faster learning.",
+      "lr": 5e-4
+    },
+    {
+      "id": "pissa_rslora_dropout",
+      "description": "PiSSA+rsLoRA with dropout 0.05. Note: PiSSA forces dropout=0 (principal components should not be dropped) — this tests standard init with rsLoRA + dropout.",
+      "init_mode": "standard",
+      "lora_dropout": 0.05
+    }
+  ]
+}
@@ -79,6 +79,8 @@ _PARAM_DEFAULTS = {
    "lora_dropout":        0.0,
    "lora_plus_ratio":     1.0,
    "lr_schedule":         "constant",
+    "init_mode":           "pissa",
+    "use_rslora":          True,
 }

 # Palette for comparison chart: one color per experiment (cycles if > 8)
@@ -388,7 +390,9 @@ class SelvaLoraScheduler:
            dropout     = float(cfg.get("lora_dropout",       0.0))
            plus_ratio  = float(cfg.get("lora_plus_ratio",    1.0))
            lr_schedule = str(cfg.get("lr_schedule",          "constant"))
-            alpha_val   = alpha if alpha > 0.0 else float(rank)
+            init_mode   = str(cfg.get("init_mode",            "pissa"))
+            use_rslora  = bool(cfg.get("use_rslora",          True))
+            alpha_val   = alpha if alpha > 0.0 else float(2 * rank)
            target_suffixes = tuple(target.strip().split())

            output_dir = output_root / exp_id
@@ -410,6 +414,7 @@ class SelvaLoraScheduler:
                    "curriculum_switch": curr_switch,
                    "lora_dropout": dropout, "lora_plus_ratio": plus_ratio,
                    "lr_schedule": lr_schedule,
+                    "init_mode": init_mode, "use_rslora": use_rslora,
                },
                "results":     {"status": "running"},
                "adapter_path": None,
@@ -428,7 +433,7 @@ class SelvaLoraScheduler:
                        alpha_val, target_suffixes, batch_size, warmup,
                        grad_accum, save_every, resume_path, seed,
                        ts_mode, ln_sigma, curr_switch, dropout, plus_ratio,
-                        lr_schedule,
+                        lr_schedule, init_mode, use_rslora,
                    )

                duration          = time.monotonic() - t_start