40d29bcaf8
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
34 lines
1.2 KiB
JSON
34 lines
1.2 KiB
JSON
{
|
|
"name": "lora_logit_cosine_combo",
|
|
"description": "Combine the two best findings from optimized dataset sweep: logit-normal timestep sampling + cosine LR schedule. Both individually outperformed baseline by large margins (56% and 68% lower loss). Tests if gains stack.",
|
|
"data_dir": "/media/unraid/davinci/Selva/BJ/features_v2_improved/",
|
|
"output_root": "/media/unraid/davinci/Selva/BJ/experiment/lora_logit_cosine_combo",
|
|
"base": {
|
|
"rank": 128,
|
|
"lr": 3e-4,
|
|
"steps": 5000,
|
|
"batch_size": 16,
|
|
"warmup_steps": 100,
|
|
"save_every": 1000,
|
|
"seed": 42,
|
|
"init_mode": "pissa",
|
|
"use_rslora": true,
|
|
"target": "attn.qkv",
|
|
"timestep_mode": "uniform",
|
|
"lr_schedule": "constant"
|
|
},
|
|
"experiments": [
|
|
{
|
|
"id": "logit_normal_cosine",
|
|
"description": "Logit-normal timesteps + cosine LR decay. Combining the two best individual improvements.",
|
|
"timestep_mode": "logit_normal",
|
|
"lr_schedule": "cosine"
|
|
},
|
|
{
|
|
"id": "logit_normal_control",
|
|
"description": "Control: logit-normal only (constant LR). Reproduces previous winner for direct comparison.",
|
|
"timestep_mode": "logit_normal"
|
|
}
|
|
]
|
|
}
|