{ "name": "lora_logit_cosine_combo", "description": "Combine the two best findings from optimized dataset sweep: logit-normal timestep sampling + cosine LR schedule. Both individually outperformed baseline by large margins (56% and 68% lower loss). Tests if gains stack.", "data_dir": "/media/unraid/davinci/Selva/BJ/features_v2_improved/", "output_root": "/media/unraid/davinci/Selva/BJ/experiment/lora_logit_cosine_combo", "base": { "rank": 128, "lr": 3e-4, "steps": 5000, "batch_size": 16, "warmup_steps": 100, "save_every": 1000, "seed": 42, "init_mode": "pissa", "use_rslora": true, "target": "attn.qkv", "timestep_mode": "uniform", "lr_schedule": "constant" }, "experiments": [ { "id": "logit_normal_cosine", "description": "Logit-normal timesteps + cosine LR decay. Combining the two best individual improvements.", "timestep_mode": "logit_normal", "lr_schedule": "cosine" }, { "id": "logit_normal_control", "description": "Control: logit-normal only (constant LR). Reproduces previous winner for direct comparison.", "timestep_mode": "logit_normal" } ] }