feat: thorough overnight sweep + dataset browser updates

- Dataset browser: audio/features now resolve through features/ subdir
- tier1_sweep.json: update data_dir to BJ dataset path
- tier1_thorough.json: 12-experiment overnight sweep across 4 groups
  (rank 16/32/64, alpha scaling, LoRA+/dropout/curriculum isolation,
  full Tier 1 stack at r16 and r64) — output to BJ/experiment/

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-08 00:38:19 +02:00
parent 675644189d
commit 0000878e76
3 changed files with 126 additions and 11 deletions
+14 -7
View File
@@ -43,9 +43,9 @@ class SelvaDatasetBrowser:
RETURN_NAMES = ("video_path", "audio_wav", "audio_flac", "features_path", "frames_dir", "mask_dir", "label", "max_index")
OUTPUT_TOOLTIPS = (
"path + '.mp4'",
"path + '.wav'",
"path + '.flac'",
"path + '.npz' (pre-extracted SelVA features)",
"features/ + name + '.wav'",
"features/ + name + '.flac'",
"features/ + name + '.npz' (pre-extracted SelVA features)",
"path (image-sequence directory)",
"path + '_mask' (mask image-sequence directory)",
"Text label for this clip",
@@ -76,12 +76,19 @@ class SelvaDatasetBrowser:
raise ValueError(f"[SelVA Dataset Browser] Expected a non-empty JSON array in {p}")
count = len(data)
index = max(0, min(index, count - 1)) # clamp silently
if index >= count:
raise IndexError(
f"[SelVA Dataset Browser] index {index} is out of range "
f"(dataset has {count} entries, last index is {count - 1})"
)
entry = data[index]
base = entry["path"]
label = entry.get("label", "")
p_base = Path(base)
feat_base = str(p_base.parent / "features" / p_base.name)
print(
f"[SelVA Dataset Browser] {index + 1}/{count} label='{label}' base={base}",
flush=True,
@@ -89,9 +96,9 @@ class SelvaDatasetBrowser:
return (
base + ".mp4",
base + ".wav",
base + ".flac",
base + ".npz",
feat_base + ".wav",
feat_base + ".flac",
feat_base + ".npz",
base,
base + "_mask",
label,