Files
ComfyUI-SelVA/experiments/r128_sweet_spot.json
T
Ethanfel 94610b8943 feat: r128_sweet_spot sweep — noise-free LR search + rank 256
9 experiments targeting loss 0.25-0.35 without LoRA+ noise.
Tests higher base LR (2e-4/3e-4/5e-4), curriculum combos, conservative
LoRA+ ratio=4, and rank 256 baseline + lr=3e-4.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-08 10:46:08 +02:00

89 lines
2.7 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"name": "r128_sweet_spot",
"description": "Find the noise-free sweet spot on rank 128. LoRA+ ratio=16 caused noise — testing higher base LR without LoRA+ as a cleaner alternative. Target loss range 0.250.35. Also probing rank 256 since 102GB VRAM allows it.",
"data_dir": "/media/unraid/davinci/Selva/BJ/features",
"output_root": "/media/unraid/davinci/Selva/BJ/experiment/r128_sweet_spot",
"base": {
"steps": 10000,
"rank": 128,
"alpha": 0.0,
"lr": 1e-4,
"batch_size": 16,
"warmup_steps": 200,
"grad_accum": 1,
"save_every": 2000,
"seed": 42,
"target": "attn.qkv",
"timestep_mode": "uniform",
"logit_normal_sigma": 1.0,
"curriculum_switch": 0.6,
"lora_dropout": 0.0,
"lora_plus_ratio": 1.0
},
"experiments": [
{
"id": "g1_r128_lr_2e4",
"group": "lr",
"description": "LR=2e-4. Conservative 2× step up from baseline — noise-free descent toward sweet spot.",
"lr": 2e-4
},
{
"id": "g1_r128_lr_3e4",
"group": "lr",
"description": "LR=3e-4. 3× baseline — landed at 0.41 on r64, should reach 0.250.35 on r128.",
"lr": 3e-4
},
{
"id": "g1_r128_lr_5e4",
"group": "lr",
"description": "LR=5e-4. Aggressive but no LoRA+ B-matrix asymmetry — cleaner noise profile.",
"lr": 5e-4
},
{
"id": "g2_r128_curriculum",
"group": "curriculum",
"description": "Curriculum only at baseline LR. Clean slow descent — reference for what curriculum contributes alone.",
"timestep_mode": "curriculum"
},
{
"id": "g2_r128_lr_3e4_curriculum",
"group": "curriculum",
"description": "LR=3e-4 + curriculum. Speed of higher LR with coverage of curriculum — no LoRA+.",
"lr": 3e-4,
"timestep_mode": "curriculum"
},
{
"id": "g2_r128_lr_3e4_curriculum_dropout",
"group": "curriculum",
"description": "LR=3e-4 + curriculum + dropout=0.05. Full controlled stack without LoRA+.",
"lr": 3e-4,
"timestep_mode": "curriculum",
"lora_dropout": 0.05
},
{
"id": "g3_r128_lora_plus_4",
"group": "lora_plus",
"description": "LoRA+ ratio=4 (lr_B=4e-4). Much more conservative than ratio=16 — tests if noise came from ratio not the technique.",
"lora_plus_ratio": 4.0
},
{
"id": "g4_r256_baseline",
"group": "rank256",
"description": "Rank 256 at baseline LR. 102GB VRAM makes this viable — does more capacity keep helping?",
"rank": 256
},
{
"id": "g4_r256_lr_3e4",
"group": "rank256",
"description": "Rank 256 + LR=3e-4. Best rank + best LR candidate combined.",
"rank": 256,
"lr": 3e-4
}
]
}