Files
ComfyUI-SelVA/experiments/r64_overnight.json
T
Ethanfel a7923d5fb7 feat: r64_overnight sweep — focused rank-64 ablation at 8000 steps
15 experiments across rank (64/128), alpha, regularisation, LR, target
layers, and combined stacks. Based on tier1_thorough early results
confirming rank 64 sounds best perceptually.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-08 01:32:23 +02:00

131 lines
3.9 KiB
JSON
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"name": "r64_overnight",
"description": "Focused rank-64 overnight sweep. All experiments use rank 64 as base — confirmed best from tier1_thorough early results. 8000 steps to reach convergence (none converged at 4000).",
"data_dir": "/media/unraid/davinci/Selva/BJ/features",
"output_root": "/media/unraid/davinci/Selva/BJ/experiment/r64_overnight",
"base": {
"steps": 8000,
"rank": 64,
"alpha": 0.0,
"lr": 1e-4,
"batch_size": 16,
"warmup_steps": 200,
"grad_accum": 1,
"save_every": 2000,
"seed": 42,
"target": "attn.qkv",
"timestep_mode": "uniform",
"logit_normal_sigma": 1.0,
"curriculum_switch": 0.6,
"lora_dropout": 0.0,
"lora_plus_ratio": 1.0
},
"experiments": [
{
"id": "g1_r64_baseline",
"group": "rank",
"description": "Rank 64 baseline — clean reference at 8000 steps."
},
{
"id": "g1_r128_baseline",
"group": "rank",
"description": "Rank 128 — 102GB VRAM makes this free. Does doubling rank from 64 help further?",
"rank": 128
},
{
"id": "g2_r64_alpha_32",
"group": "alpha",
"description": "Rank 64 alpha=32 (scale=0.5). Reduces intruder singular dimensions (arXiv:2410.21228).",
"alpha": 32.0
},
{
"id": "g2_r64_alpha_16",
"group": "alpha",
"description": "Rank 64 alpha=16 (scale=0.25). More aggressive scale reduction — may over-constrain.",
"alpha": 16.0
},
{
"id": "g3_r64_lora_plus",
"group": "regularisation",
"description": "LoRA+ ratio=16. lr_B = 16 × lr_A. Faster convergence at constant step budget.",
"lora_plus_ratio": 16.0
},
{
"id": "g3_r64_dropout_0.05",
"group": "regularisation",
"description": "Dropout=0.05. Light sparsity regularisation on LoRA path.",
"lora_dropout": 0.05
},
{
"id": "g3_r64_dropout_0.1",
"group": "regularisation",
"description": "Dropout=0.1. Stronger regularisation — tests if 49 clips needs heavier constraint.",
"lora_dropout": 0.1
},
{
"id": "g3_r64_curriculum",
"group": "regularisation",
"description": "Curriculum sampling: logit_normal for steps 1-4800, then uniform (arXiv:2603.12517).",
"timestep_mode": "curriculum"
},
{
"id": "g4_r64_lr_low",
"group": "lr",
"description": "LR=3e-5. 3× lower — checks if 1e-4 is overshooting at rank 64.",
"lr": 3e-5
},
{
"id": "g4_r64_lr_high",
"group": "lr",
"description": "LR=3e-4. 3× higher — may converge faster but risk instability.",
"lr": 3e-4
},
{
"id": "g5_r64_target_full",
"group": "target",
"description": "Rank 64 targeting attn.qkv + linear1 (FFN projections). Doubles LoRA coverage.",
"target": "attn.qkv linear1"
},
{
"id": "g5_r128_target_full",
"group": "target",
"description": "Rank 128 + full target. Maximum possible coverage with available VRAM.",
"rank": 128,
"target": "attn.qkv linear1"
},
{
"id": "g6_r64_full_tier1",
"group": "combined",
"description": "All Tier 1 at rank 64: LoRA+ 16 + dropout 0.05 + curriculum. Full stack at 8000 steps.",
"lora_plus_ratio": 16.0,
"lora_dropout": 0.05,
"timestep_mode": "curriculum"
},
{
"id": "g6_r64_alpha32_full",
"group": "combined",
"description": "Rank 64 alpha=32 + all Tier 1. Best alpha scaling + best regularisation stack.",
"alpha": 32.0,
"lora_plus_ratio": 16.0,
"lora_dropout": 0.05,
"timestep_mode": "curriculum"
},
{
"id": "g6_r128_full_tier1",
"group": "combined",
"description": "Rank 128 + all Tier 1. Tests if more capacity + regularisation beats rank 64 full.",
"rank": 128,
"lora_plus_ratio": 16.0,
"lora_dropout": 0.05,
"timestep_mode": "curriculum"
}
]
}