Files
ComfyUI-SelVA/experiments/lora_optimized_dataset.json
T
Ethanfel 65dc549494 feat: add reference audio comparison metrics to LoRA trainer eval
New _reference_metrics() computes LSD, MCD, and per-band correlation
between eval samples and the original source audio at each checkpoint.
Loads reference audio once before the training loop and logs metrics
alongside existing spectral metrics.

Also fix batch_size in lora_optimized_dataset.json (4 -> 16).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-10 15:04:07 +02:00

65 lines
2.3 KiB
JSON

{
"name": "lora_optimized_dataset",
"description": "LoRA training on optimized dataset (134 clips: resampled 44.1kHz, LUFS-normalized, spectral matched, HF smoothed, gain-augmented). Tests latent augmentation and schedule variants on top of known-best config (PiSSA, rank=128, lr=3e-4).",
"data_dir": "/media/unraid/davinci/Selva/BJ/features_v2_improved/",
"output_root": "/media/unraid/davinci/Selva/BJ/experiment/lora_optimized_dataset",
"base": {
"rank": 128,
"lr": 3e-4,
"steps": 5000,
"batch_size": 16,
"warmup_steps": 100,
"save_every": 1000,
"seed": 42,
"init_mode": "pissa",
"use_rslora": true,
"target": "attn.qkv",
"timestep_mode": "uniform",
"lr_schedule": "constant"
},
"experiments": [
{
"id": "baseline",
"description": "Control: known-best config (PiSSA r128 lr=3e-4) on the optimized dataset. No latent augmentation."
},
{
"id": "latent_mixup",
"description": "Latent mixup alpha=0.4 (MusicLDM). Tests if mixing training latents reduces memorization on 134 clips.",
"latent_mixup_alpha": 0.4
},
{
"id": "latent_noise",
"description": "Latent noise sigma=0.02. Mild Gaussian noise on training latents for regularization.",
"latent_noise_sigma": 0.02
},
{
"id": "mixup_and_noise",
"description": "Both latent mixup (0.4) and noise (0.02). Combined regularization.",
"latent_mixup_alpha": 0.4,
"latent_noise_sigma": 0.02
},
{
"id": "cosine_schedule",
"description": "Cosine LR decay. lr=3e-4 was stable with constant, but cosine may extract more from 5k steps.",
"lr_schedule": "cosine"
},
{
"id": "cosine_mixup",
"description": "Cosine LR + latent mixup. Best regularization combo candidate.",
"lr_schedule": "cosine",
"latent_mixup_alpha": 0.4
},
{
"id": "logit_normal",
"description": "Logit-normal timestep sampling (sigma=1.0). Concentrates training near t=0.5 where flow matching is hardest.",
"timestep_mode": "logit_normal"
},
{
"id": "curriculum_mixup",
"description": "Curriculum timesteps (logit_normal first 60%, then uniform) + latent mixup. Full regularization stack.",
"timestep_mode": "curriculum",
"latent_mixup_alpha": 0.4
}
]
}