Files
ComfyUI-SelVA/nodes/selva_dataset_browser.py
T
Ethanfel 0000878e76 feat: thorough overnight sweep + dataset browser updates
- Dataset browser: audio/features now resolve through features/ subdir
- tier1_sweep.json: update data_dir to BJ dataset path
- tier1_thorough.json: 12-experiment overnight sweep across 4 groups
  (rank 16/32/64, alpha scaling, LoRA+/dropout/curriculum isolation,
  full Tier 1 stack at r16 and r64) — output to BJ/experiment/

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-08 00:38:19 +02:00

107 lines
3.6 KiB
Python

import json
from pathlib import Path
import folder_paths
from .utils import SELVA_CATEGORY
class SelvaDatasetBrowser:
"""Browse a dataset.json file entry by entry using an integer index.
Each entry in the JSON is expected to have:
- "path" : base path (no extension) — directory that holds frame images
- "label" : text description of the clip
Derived outputs:
- video_path : path + ".mp4"
- audio_path : path + ".wav"
- frames_dir : path (the directory itself, for image-sequence loaders)
- label : entry["label"]
- count : total number of entries in the file
"""
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"dataset_json": ("STRING", {
"default": "",
"tooltip": "Absolute or ComfyUI-relative path to a dataset.json file.",
}),
"index": ("INT", {
"default": 0,
"min": 0,
"max": 9999,
"step": 1,
"tooltip": "Zero-based index of the entry to inspect.",
}),
},
}
RETURN_TYPES = ("STRING", "STRING", "STRING", "STRING", "STRING", "STRING", "STRING", "INT")
RETURN_NAMES = ("video_path", "audio_wav", "audio_flac", "features_path", "frames_dir", "mask_dir", "label", "max_index")
OUTPUT_TOOLTIPS = (
"path + '.mp4'",
"features/ + name + '.wav'",
"features/ + name + '.flac'",
"features/ + name + '.npz' (pre-extracted SelVA features)",
"path (image-sequence directory)",
"path + '_mask' (mask image-sequence directory)",
"Text label for this clip",
"count - 1 — wire to a primitive INT's max to constrain the index widget",
)
FUNCTION = "browse"
CATEGORY = SELVA_CATEGORY
DESCRIPTION = (
"Reads a dataset.json produced by the SelVA dataset preparation pipeline "
"and exposes one entry at a time via an integer index. "
"Outputs the video path, audio path, frames directory, label, and total entry count."
)
# Re-read the file every call so edits are picked up without restarting ComfyUI.
IS_CHANGED = classmethod(lambda cls, **_: float("nan"))
def browse(self, dataset_json: str, index: int):
p = Path(dataset_json.strip())
if not p.is_absolute():
p = Path(folder_paths.base_path) / p
if not p.exists():
raise FileNotFoundError(f"[SelVA Dataset Browser] File not found: {p}")
with p.open("r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, list) or len(data) == 0:
raise ValueError(f"[SelVA Dataset Browser] Expected a non-empty JSON array in {p}")
count = len(data)
if index >= count:
raise IndexError(
f"[SelVA Dataset Browser] index {index} is out of range "
f"(dataset has {count} entries, last index is {count - 1})"
)
entry = data[index]
base = entry["path"]
label = entry.get("label", "")
p_base = Path(base)
feat_base = str(p_base.parent / "features" / p_base.name)
print(
f"[SelVA Dataset Browser] {index + 1}/{count} label='{label}' base={base}",
flush=True,
)
return (
base + ".mp4",
feat_base + ".wav",
feat_base + ".flac",
feat_base + ".npz",
base,
base + "_mask",
label,
count - 1,
)