{ "name": "tier1_sweep", "description": "Ablation of Tier 1 improvements: LoRA+, dropout, curriculum sampling. Baseline = uniform, no regularisation.", "data_dir": "dataset/my_sound", "output_root": "lora_sweeps/tier1_sweep", "base": { "steps": 2000, "rank": 16, "alpha": 0.0, "lr": 1e-4, "batch_size": 16, "warmup_steps": 100, "grad_accum": 1, "save_every": 500, "seed": 42, "target": "attn.qkv", "timestep_mode": "uniform", "logit_normal_sigma": 1.0, "curriculum_switch": 0.6, "lora_dropout": 0.0, "lora_plus_ratio": 1.0 }, "experiments": [ { "id": "baseline", "description": "Standard LoRA — no Tier 1 changes. Reference point." }, { "id": "lora_plus_16", "description": "LoRA+ only: lr_B = 16 * lr_A. Should converge faster in early steps.", "lora_plus_ratio": 16.0 }, { "id": "dropout_0.05", "description": "LoRA dropout 0.05 only. Light regularisation for 10-clip dataset.", "lora_dropout": 0.05 }, { "id": "dropout_0.1", "description": "LoRA dropout 0.1 only. Stronger regularisation — may prevent overfitting past step 2000.", "lora_dropout": 0.1 }, { "id": "curriculum", "description": "Curriculum sampling only: logit_normal for steps 1-1200, then uniform. Should improve convergence vs pure uniform.", "timestep_mode": "curriculum" }, { "id": "full_tier1", "description": "All Tier 1 combined: LoRA+ + dropout 0.05 + curriculum.", "lora_plus_ratio": 16.0, "lora_dropout": 0.05, "timestep_mode": "curriculum" } ] }