From 675644189d80257a028d2bf3b5de95dfa0ed6106 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Tue, 7 Apr 2026 14:55:27 +0200 Subject: [PATCH] feat: add SelVA Dataset Browser node Companion node for inspecting dataset.json entries by integer index. Outputs video (.mp4), audio (.wav/.flac), features (.npz), frames dir, mask dir, label, and max_index for constraining the index widget range. Co-Authored-By: Claude Sonnet 4.6 --- nodes/__init__.py | 1 + nodes/selva_dataset_browser.py | 99 ++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 nodes/selva_dataset_browser.py diff --git a/nodes/__init__.py b/nodes/__init__.py index 4914aa2..f965e29 100644 --- a/nodes/__init__.py +++ b/nodes/__init__.py @@ -8,6 +8,7 @@ _NODES = { "SelvaLoraLoader": (".selva_lora_loader", "SelvaLoraLoader", "SelVA LoRA Loader"), "SelvaLoraTrainer": (".selva_lora_trainer", "SelvaLoraTrainer", "SelVA LoRA Trainer"), "SelvaLoraScheduler": (".selva_lora_scheduler", "SelvaLoraScheduler", "SelVA LoRA Scheduler"), + "SelvaDatasetBrowser": (".selva_dataset_browser", "SelvaDatasetBrowser", "SelVA Dataset Browser"), } for key, (module_path, class_name, display_name) in _NODES.items(): diff --git a/nodes/selva_dataset_browser.py b/nodes/selva_dataset_browser.py new file mode 100644 index 0000000..33c502c --- /dev/null +++ b/nodes/selva_dataset_browser.py @@ -0,0 +1,99 @@ +import json +from pathlib import Path + +import folder_paths + +from .utils import SELVA_CATEGORY + + +class SelvaDatasetBrowser: + """Browse a dataset.json file entry by entry using an integer index. + + Each entry in the JSON is expected to have: + - "path" : base path (no extension) — directory that holds frame images + - "label" : text description of the clip + + Derived outputs: + - video_path : path + ".mp4" + - audio_path : path + ".wav" + - frames_dir : path (the directory itself, for image-sequence loaders) + - label : entry["label"] + - count : total number of entries in the file + """ + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "dataset_json": ("STRING", { + "default": "", + "tooltip": "Absolute or ComfyUI-relative path to a dataset.json file.", + }), + "index": ("INT", { + "default": 0, + "min": 0, + "max": 9999, + "step": 1, + "tooltip": "Zero-based index of the entry to inspect.", + }), + }, + } + + RETURN_TYPES = ("STRING", "STRING", "STRING", "STRING", "STRING", "STRING", "STRING", "INT") + RETURN_NAMES = ("video_path", "audio_wav", "audio_flac", "features_path", "frames_dir", "mask_dir", "label", "max_index") + OUTPUT_TOOLTIPS = ( + "path + '.mp4'", + "path + '.wav'", + "path + '.flac'", + "path + '.npz' (pre-extracted SelVA features)", + "path (image-sequence directory)", + "path + '_mask' (mask image-sequence directory)", + "Text label for this clip", + "count - 1 — wire to a primitive INT's max to constrain the index widget", + ) + FUNCTION = "browse" + CATEGORY = SELVA_CATEGORY + DESCRIPTION = ( + "Reads a dataset.json produced by the SelVA dataset preparation pipeline " + "and exposes one entry at a time via an integer index. " + "Outputs the video path, audio path, frames directory, label, and total entry count." + ) + + # Re-read the file every call so edits are picked up without restarting ComfyUI. + IS_CHANGED = classmethod(lambda cls, **_: float("nan")) + + def browse(self, dataset_json: str, index: int): + p = Path(dataset_json.strip()) + if not p.is_absolute(): + p = Path(folder_paths.base_path) / p + if not p.exists(): + raise FileNotFoundError(f"[SelVA Dataset Browser] File not found: {p}") + + with p.open("r", encoding="utf-8") as f: + data = json.load(f) + + if not isinstance(data, list) or len(data) == 0: + raise ValueError(f"[SelVA Dataset Browser] Expected a non-empty JSON array in {p}") + + count = len(data) + index = max(0, min(index, count - 1)) # clamp silently + entry = data[index] + + base = entry["path"] + label = entry.get("label", "") + + print( + f"[SelVA Dataset Browser] {index + 1}/{count} label='{label}' base={base}", + flush=True, + ) + + return ( + base + ".mp4", + base + ".wav", + base + ".flac", + base + ".npz", + base, + base + "_mask", + label, + count - 1, + )