chore: sanitize tooltips/comments + add experiment configs
- Replace all BJ references with generic "target style/audio" in activation steering, DITTO optimizer, and BigVGAN trainer - Add latent_mixup_alpha/latent_noise_sigma to LoRA scheduler defaults - Add bigvgan_disc_fm_retest.json and lora_optimized_dataset.json Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"name": "bigvgan_disc_fm_retest",
|
||||
"description": "Retest discriminator feature matching after bfloat16 dtype fix. Uses optimal config from overnight sweep (snake_alpha, GAFilter, lr=1e-4, phase=1.0, L2-SP=1e-3, 5000 steps).",
|
||||
"data_dir": "/media/unraid/davinci/Selva/BJ/features",
|
||||
"output_root": "/media/unraid/davinci/Selva/BJ/experiment/bigvgan_disc_fm_retest",
|
||||
"base": {
|
||||
"train_mode": "snake_alpha_only",
|
||||
"steps": 5000,
|
||||
"lr": 1e-4,
|
||||
"batch_size": 8,
|
||||
"segment_seconds": 0.5,
|
||||
"lambda_l2sp": 1e-3,
|
||||
"use_gafilter": true,
|
||||
"gafilter_kernel_size": 9,
|
||||
"lambda_phase": 1.0,
|
||||
"save_every": 1000,
|
||||
"seed": 42,
|
||||
"lora_adapter": "/media/unraid/davinci/Selva/BJ/experiment/pissa_sweep/standard_baseline/adapter_final.pt"
|
||||
},
|
||||
"experiments": [
|
||||
{
|
||||
"id": "snake_5k_control",
|
||||
"description": "Control: best config from overnight sweep without discriminator. Baseline for A/B comparison."
|
||||
},
|
||||
{
|
||||
"id": "disc_fm_5k",
|
||||
"description": "Discriminator feature matching at 5k steps. Tests if perceptual FM loss improves over mel+phase alone.",
|
||||
"discriminator_path": "/media/unraid/davinci/Selva/BJ/experiment/bigvgan_discriminator_optimizer.pt"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
{
|
||||
"name": "lora_optimized_dataset",
|
||||
"description": "LoRA training on optimized dataset (134 clips: resampled 44.1kHz, LUFS-normalized, spectral matched, HF smoothed, gain-augmented). Tests latent augmentation and schedule variants on top of known-best config (PiSSA, rank=128, lr=3e-4).",
|
||||
"data_dir": "/media/unraid/davinci/Selva/BJ/features_v2_improved/",
|
||||
"output_root": "/media/unraid/davinci/Selva/BJ/experiment/lora_optimized_dataset",
|
||||
"base": {
|
||||
"rank": 128,
|
||||
"lr": 3e-4,
|
||||
"steps": 5000,
|
||||
"batch_size": 4,
|
||||
"warmup_steps": 100,
|
||||
"save_every": 1000,
|
||||
"seed": 42,
|
||||
"init_mode": "pissa",
|
||||
"use_rslora": true,
|
||||
"target": "attn.qkv",
|
||||
"timestep_mode": "uniform",
|
||||
"lr_schedule": "constant"
|
||||
},
|
||||
"experiments": [
|
||||
{
|
||||
"id": "baseline",
|
||||
"description": "Control: known-best config (PiSSA r128 lr=3e-4) on the optimized dataset. No latent augmentation."
|
||||
},
|
||||
{
|
||||
"id": "latent_mixup",
|
||||
"description": "Latent mixup alpha=0.4 (MusicLDM). Tests if mixing training latents reduces memorization on 134 clips.",
|
||||
"latent_mixup_alpha": 0.4
|
||||
},
|
||||
{
|
||||
"id": "latent_noise",
|
||||
"description": "Latent noise sigma=0.02. Mild Gaussian noise on training latents for regularization.",
|
||||
"latent_noise_sigma": 0.02
|
||||
},
|
||||
{
|
||||
"id": "mixup_and_noise",
|
||||
"description": "Both latent mixup (0.4) and noise (0.02). Combined regularization.",
|
||||
"latent_mixup_alpha": 0.4,
|
||||
"latent_noise_sigma": 0.02
|
||||
},
|
||||
{
|
||||
"id": "cosine_schedule",
|
||||
"description": "Cosine LR decay. lr=3e-4 was stable with constant, but cosine may extract more from 5k steps.",
|
||||
"lr_schedule": "cosine"
|
||||
},
|
||||
{
|
||||
"id": "cosine_mixup",
|
||||
"description": "Cosine LR + latent mixup. Best regularization combo candidate.",
|
||||
"lr_schedule": "cosine",
|
||||
"latent_mixup_alpha": 0.4
|
||||
},
|
||||
{
|
||||
"id": "logit_normal",
|
||||
"description": "Logit-normal timestep sampling (sigma=1.0). Concentrates training near t=0.5 where flow matching is hardest.",
|
||||
"timestep_mode": "logit_normal"
|
||||
},
|
||||
{
|
||||
"id": "curriculum_mixup",
|
||||
"description": "Curriculum timesteps (logit_normal first 60%, then uniform) + latent mixup. Full regularization stack.",
|
||||
"timestep_mode": "curriculum",
|
||||
"latent_mixup_alpha": 0.4
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user