diff --git a/experiments/tier1_sweep.json b/experiments/tier1_sweep.json index b51ea61..46fe0fe 100644 --- a/experiments/tier1_sweep.json +++ b/experiments/tier1_sweep.json @@ -1,10 +1,10 @@ { "name": "tier1_sweep", "description": "Ablation of Tier 1 improvements: LoRA+, dropout, curriculum sampling. Baseline = uniform, no regularisation.", - "data_dir": "dataset/my_sound", + "data_dir": "/media/unraid/davinci/Selva/BJ", "output_root": "lora_sweeps/tier1_sweep", "base": { - "steps": 2000, + "steps": 4000, "rank": 16, "alpha": 0.0, "lr": 1e-4, @@ -32,7 +32,7 @@ }, { "id": "dropout_0.05", - "description": "LoRA dropout 0.05 only. Light regularisation for 10-clip dataset.", + "description": "LoRA dropout 0.05 only. Light regularisation for 49-clip dataset.", "lora_dropout": 0.05 }, { @@ -42,7 +42,7 @@ }, { "id": "curriculum", - "description": "Curriculum sampling only: logit_normal for steps 1-1200, then uniform. Should improve convergence vs pure uniform.", + "description": "Curriculum sampling only: logit_normal for steps 1-2400, then uniform. Should improve convergence vs pure uniform.", "timestep_mode": "curriculum" }, { @@ -51,6 +51,11 @@ "lora_plus_ratio": 16.0, "lora_dropout": 0.05, "timestep_mode": "curriculum" + }, + { + "id": "rank_64", + "description": "Rank 64 baseline — MMAudio LoRA guide default. More expressive adapter for 49-clip dataset.", + "rank": 64 } ] } diff --git a/experiments/tier1_thorough.json b/experiments/tier1_thorough.json new file mode 100644 index 0000000..c32a38b --- /dev/null +++ b/experiments/tier1_thorough.json @@ -0,0 +1,103 @@ +{ + "name": "tier1_thorough", + "description": "Full overnight Tier 1 ablation on 49-clip BJ dataset. 4 groups: rank, alpha, regularisation, and best combinations. ~10-12h depending on GPU.", + "data_dir": "/media/unraid/davinci/Selva/BJ", + "output_root": "/media/unraid/davinci/Selva/BJ/experiment/tier1_thorough", + "base": { + "steps": 4000, + "rank": 16, + "alpha": 0.0, + "lr": 1e-4, + "batch_size": 16, + "warmup_steps": 100, + "grad_accum": 1, + "save_every": 1000, + "seed": 42, + "target": "attn.qkv", + "timestep_mode": "uniform", + "logit_normal_sigma": 1.0, + "curriculum_switch": 0.6, + "lora_dropout": 0.0, + "lora_plus_ratio": 1.0 + }, + "experiments": [ + { + "id": "g1_rank_16", + "group": "rank", + "description": "Rank 16 baseline — reference point for all groups." + }, + { + "id": "g1_rank_32", + "group": "rank", + "description": "Rank 32 — midpoint. Does doubling rank improve quality without overfitting?", + "rank": 32 + }, + { + "id": "g1_rank_64", + "group": "rank", + "description": "Rank 64 — MMAudio LoRA guide default. Maximum expressiveness at 49 clips.", + "rank": 64 + }, + { + "id": "g2_alpha_half_r16", + "group": "alpha", + "description": "Alpha=8 with rank 16 (scale=0.5). Reduces intruder singular dimensions (arXiv:2410.21228).", + "alpha": 8.0 + }, + { + "id": "g2_alpha_half_r64", + "group": "alpha", + "description": "Alpha=32 with rank 64 (scale=0.5). Best-practice scaling for high-rank adapters.", + "rank": 64, + "alpha": 32.0 + }, + { + "id": "g3_lora_plus_4", + "group": "regularisation", + "description": "LoRA+ ratio=4 — conservative asymmetric LR. Lower bound for the technique.", + "lora_plus_ratio": 4.0 + }, + { + "id": "g3_lora_plus_16", + "group": "regularisation", + "description": "LoRA+ ratio=16 — standard from FLUX LoRA literature. Faster early convergence.", + "lora_plus_ratio": 16.0 + }, + { + "id": "g3_dropout_0.05", + "group": "regularisation", + "description": "LoRA dropout 0.05 only. Light sparsity regularisation (arXiv:2404.09610).", + "lora_dropout": 0.05 + }, + { + "id": "g3_dropout_0.1", + "group": "regularisation", + "description": "LoRA dropout 0.1 only. Stronger regularisation — may prevent overfitting past step 2000.", + "lora_dropout": 0.1 + }, + { + "id": "g3_curriculum", + "group": "regularisation", + "description": "Curriculum sampling only: logit_normal steps 1-2400, then uniform (arXiv:2603.12517).", + "timestep_mode": "curriculum" + }, + { + "id": "g4_full_r16", + "group": "combined", + "description": "All Tier 1 at rank 16: LoRA+ 16 + dropout 0.05 + curriculum.", + "lora_plus_ratio": 16.0, + "lora_dropout": 0.05, + "timestep_mode": "curriculum" + }, + { + "id": "g4_full_r64", + "group": "combined", + "description": "All Tier 1 at rank 64 + alpha=32. Best expressiveness + best regularisation.", + "rank": 64, + "alpha": 32.0, + "lora_plus_ratio": 16.0, + "lora_dropout": 0.05, + "timestep_mode": "curriculum" + } + ] +} diff --git a/nodes/selva_dataset_browser.py b/nodes/selva_dataset_browser.py index 33c502c..5f77d70 100644 --- a/nodes/selva_dataset_browser.py +++ b/nodes/selva_dataset_browser.py @@ -43,9 +43,9 @@ class SelvaDatasetBrowser: RETURN_NAMES = ("video_path", "audio_wav", "audio_flac", "features_path", "frames_dir", "mask_dir", "label", "max_index") OUTPUT_TOOLTIPS = ( "path + '.mp4'", - "path + '.wav'", - "path + '.flac'", - "path + '.npz' (pre-extracted SelVA features)", + "features/ + name + '.wav'", + "features/ + name + '.flac'", + "features/ + name + '.npz' (pre-extracted SelVA features)", "path (image-sequence directory)", "path + '_mask' (mask image-sequence directory)", "Text label for this clip", @@ -76,12 +76,19 @@ class SelvaDatasetBrowser: raise ValueError(f"[SelVA Dataset Browser] Expected a non-empty JSON array in {p}") count = len(data) - index = max(0, min(index, count - 1)) # clamp silently + if index >= count: + raise IndexError( + f"[SelVA Dataset Browser] index {index} is out of range " + f"(dataset has {count} entries, last index is {count - 1})" + ) entry = data[index] base = entry["path"] label = entry.get("label", "") + p_base = Path(base) + feat_base = str(p_base.parent / "features" / p_base.name) + print( f"[SelVA Dataset Browser] {index + 1}/{count} label='{label}' base={base}", flush=True, @@ -89,9 +96,9 @@ class SelvaDatasetBrowser: return ( base + ".mp4", - base + ".wav", - base + ".flac", - base + ".npz", + feat_base + ".wav", + feat_base + ".flac", + feat_base + ".npz", base, base + "_mask", label,