diff --git a/experiments/pissa_sweep.json b/experiments/pissa_sweep.json new file mode 100644 index 0000000..6a792dc --- /dev/null +++ b/experiments/pissa_sweep.json @@ -0,0 +1,62 @@ +{ + "name": "pissa_sweep", + "description": "PiSSA vs standard init ablation at rank 128. Best prior config (lr=3e-4, bs=16, 10k steps) as baseline. PiSSA starts on-manifold via SVD init — should eliminate intruder dimensions. rsLoRA stabilises scaling at high rank.", + "data_dir": "/media/unraid/davinci/Selva/BJ/features", + "output_root": "/media/unraid/davinci/Selva/BJ/experiment/pissa_sweep", + "base": { + "steps": 10000, + "rank": 128, + "alpha": 0.0, + "lr": 3e-4, + "batch_size": 16, + "warmup_steps": 200, + "grad_accum": 1, + "save_every": 2000, + "seed": 42, + "target": "attn.qkv", + "timestep_mode": "uniform", + "lora_dropout": 0.0, + "lora_plus_ratio": 1.0, + "lr_schedule": "constant", + "init_mode": "pissa", + "use_rslora": true + }, + "experiments": [ + { + "id": "standard_baseline", + "description": "Standard Kaiming init + classic alpha/rank scaling. Replicates best prior config for A/B comparison.", + "init_mode": "standard", + "use_rslora": false + }, + { + "id": "pissa_rslora", + "description": "PiSSA init + rsLoRA scaling. Full Tier-S config. Should start on-manifold and avoid intruder dimensions." + }, + { + "id": "pissa_classic_scale", + "description": "PiSSA init + classic alpha/rank scaling. Isolates PiSSA contribution from rsLoRA.", + "use_rslora": false + }, + { + "id": "standard_rslora", + "description": "Standard init + rsLoRA only. Isolates rsLoRA contribution from PiSSA.", + "init_mode": "standard" + }, + { + "id": "pissa_rslora_lr1e-4", + "description": "PiSSA+rsLoRA at lower lr=1e-4. PiSSA starts closer to optimum — may need less aggressive lr.", + "lr": 1e-4 + }, + { + "id": "pissa_rslora_lr5e-4", + "description": "PiSSA+rsLoRA at higher lr=5e-4. Test if on-manifold start tolerates faster learning.", + "lr": 5e-4 + }, + { + "id": "pissa_rslora_dropout", + "description": "PiSSA+rsLoRA with dropout 0.05. Note: PiSSA forces dropout=0 (principal components should not be dropped) — this tests standard init with rsLoRA + dropout.", + "init_mode": "standard", + "lora_dropout": 0.05 + } + ] +} diff --git a/nodes/selva_lora_scheduler.py b/nodes/selva_lora_scheduler.py index 570eda3..d8609a9 100644 --- a/nodes/selva_lora_scheduler.py +++ b/nodes/selva_lora_scheduler.py @@ -79,6 +79,8 @@ _PARAM_DEFAULTS = { "lora_dropout": 0.0, "lora_plus_ratio": 1.0, "lr_schedule": "constant", + "init_mode": "pissa", + "use_rslora": True, } # Palette for comparison chart: one color per experiment (cycles if > 8) @@ -388,7 +390,9 @@ class SelvaLoraScheduler: dropout = float(cfg.get("lora_dropout", 0.0)) plus_ratio = float(cfg.get("lora_plus_ratio", 1.0)) lr_schedule = str(cfg.get("lr_schedule", "constant")) - alpha_val = alpha if alpha > 0.0 else float(rank) + init_mode = str(cfg.get("init_mode", "pissa")) + use_rslora = bool(cfg.get("use_rslora", True)) + alpha_val = alpha if alpha > 0.0 else float(2 * rank) target_suffixes = tuple(target.strip().split()) output_dir = output_root / exp_id @@ -410,6 +414,7 @@ class SelvaLoraScheduler: "curriculum_switch": curr_switch, "lora_dropout": dropout, "lora_plus_ratio": plus_ratio, "lr_schedule": lr_schedule, + "init_mode": init_mode, "use_rslora": use_rslora, }, "results": {"status": "running"}, "adapter_path": None, @@ -428,7 +433,7 @@ class SelvaLoraScheduler: alpha_val, target_suffixes, batch_size, warmup, grad_accum, save_every, resume_path, seed, ts_mode, ln_sigma, curr_switch, dropout, plus_ratio, - lr_schedule, + lr_schedule, init_mode, use_rslora, ) duration = time.monotonic() - t_start