From 6fa235f26cd84f65df437358b2096ac7388f7ec2 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Thu, 19 Feb 2026 22:14:03 +0100 Subject: [PATCH] Add VACE Source Prep node for trimming long source clips New node that handles frame selection/trimming before the mask generator, with input_left/input_right controls for per-mode windowing. Adds B > target_frames validation to the mask generator with a helpful error message. Includes JS extension for smart widget visibility per mode. Co-Authored-By: Claude Opus 4.6 --- README.md | 56 ++++++++- __init__.py | 4 +- nodes.py | 254 ++++++++++++++++++++++++++++++++++++++++- web/js/vace_widgets.js | 78 +++++++++++++ 4 files changed, 388 insertions(+), 4 deletions(-) create mode 100644 web/js/vace_widgets.js diff --git a/README.md b/README.md index 391c8dc..fd948b2 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,65 @@ git clone https://github.com/ethanfel/Comfyui-VACE-Tools.git Restart ComfyUI. Nodes appear under the **VACE Tools** and **WanVideoWrapper** categories. -## Node: VACE Mask Generator +## Node: VACE Source Prep + +Trims long source clips so they can be used with VACE Mask Generator. Place this node **before** the mask generator when your source clip has more frames than `target_frames`. It selects the relevant frames based on mode and outputs adjusted parameters to wire directly into the mask generator. + +Irrelevant widgets are automatically hidden based on the selected mode. ### Inputs | Input | Type | Default | Description | |---|---|---|---| -| `source_clip` | IMAGE | — | Source video frames (B, H, W, C tensor) | +| `source_clip` | IMAGE | — | Full source video frames (B, H, W, C tensor). | +| `mode` | ENUM | `End Extend` | Generation mode — must match the mask generator's mode. | +| `split_index` | INT | `0` | Split position in the full source video. Same meaning as the mask generator's split_index. | +| `input_left` | INT | `0` | Frames from the left side of the split point to keep (0 = all available). End: trailing context. Middle: frames before split. Edge/Join: start edge size. Bidirectional: trailing context. Replace: context before region. | +| `input_right` | INT | `0` | Frames from the right side of the split point to keep (0 = all available). Pre: leading reference. Middle: frames after split. Edge/Join: end edge size. Replace: context after region. | +| `edge_frames` | INT | `8` | Default edge size for Edge/Join modes (overridden by input_left/input_right if non-zero). Replace/Inpaint: number of frames to replace. | +| `inpaint_mask` | MASK | *(optional)* | Spatial inpaint mask — trimmed to match output frames for Video Inpaint mode. | +| `keyframe_positions` | STRING | *(optional)* | Keyframe positions pass-through for Keyframe mode. | + +### Outputs + +| Output | Type | Description | +|---|---|---| +| `source_clip` | IMAGE | Trimmed frames — wire to mask generator's source_clip. | +| `mode` | STRING | Selected mode — wire to mask generator's mode (convert widget to input). | +| `split_index` | INT | Adjusted for the trimmed clip — wire to mask generator. | +| `edge_frames` | INT | Adjusted/passed through — wire to mask generator. | +| `segment_1`–`segment_4` | IMAGE | Frame segments per mode (same meaning as mask generator segments). Unused segments are 1-frame black placeholders. | +| `inpaint_mask` | MASK | Trimmed to match output, or placeholder. | +| `keyframe_positions` | STRING | Pass-through. | + +### Per-Mode Trimming + +| Mode | input_left | input_right | Behavior | +|---|---|---|---| +| End Extend | Trailing context frames | — | Keeps last N frames | +| Pre Extend | — | Leading reference frames | Keeps first N frames | +| Middle Extend | Frames before split | Frames after split | Window around split_index | +| Edge Extend | Start edge size | End edge size | Overrides edge_frames; forced symmetric (min of both) | +| Join Extend | Edge from first half | Edge from second half | Edge context around midpoint; forced symmetric | +| Bidirectional | Trailing context frames | — | Keeps last N frames | +| Frame Interpolation | — | — | Pass-through (no trimming) | +| Replace/Inpaint | Context before region | Context after region | Window around replace region | +| Video Inpaint | — | — | Pass-through (no trimming) | +| Keyframe | — | — | Pass-through (no trimming) | + +--- + +## Node: VACE Mask Generator + +Builds mask and control_frames sequences for all VACE generation modes. Works standalone for short clips, or downstream of VACE Source Prep for long clips. + +**Note:** For modes that use `target_frames` (End, Pre, Middle, Edge, Join, Bidirectional, Keyframe), `source_clip` must not have more frames than `target_frames`. If your source is longer, use VACE Source Prep upstream to trim it first. + +### Inputs + +| Input | Type | Default | Description | +|---|---|---|---| +| `source_clip` | IMAGE | — | Source video frames (B, H, W, C tensor). Must not exceed target_frames for modes that use it. | | `mode` | ENUM | `End Extend` | Generation mode (see below). 10 modes available. | | `target_frames` | INT | `81` | Total output frame count for mask and control_frames (1–10000). Used by Keyframe to set output length. Unused by Frame Interpolation, Replace/Inpaint, and Video Inpaint. | | `split_index` | INT | `0` | Where to split the source. Meaning varies by mode. Unused by Edge/Join/Keyframe. Bidirectional: frames before clip (0 = even split). Frame Interpolation: new frames per gap. Replace/Inpaint: start index of replace region. | diff --git a/__init__.py b/__init__.py index 7eddfca..c4bb0eb 100644 --- a/__init__.py +++ b/__init__.py @@ -13,4 +13,6 @@ NODE_CLASS_MAPPINGS.update(LATENT_CLASS_MAPPINGS) NODE_DISPLAY_NAME_MAPPINGS.update(SAVE_DISPLAY_MAPPINGS) NODE_DISPLAY_NAME_MAPPINGS.update(LATENT_DISPLAY_MAPPINGS) -__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"] +WEB_DIRECTORY = "./web/js" + +__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS", "WEB_DIRECTORY"] diff --git a/nodes.py b/nodes.py index cb312d4..1467a96 100644 --- a/nodes.py +++ b/nodes.py @@ -64,7 +64,10 @@ Parameter usage by mode: split_index : End, Pre, Middle, Bidirectional, Frame Interpolation, Replace/Inpaint edge_frames : Edge, Join, Replace/Inpaint inpaint_mask : Video Inpaint only - keyframe_positions : Keyframe only (optional)""" + keyframe_positions : Keyframe only (optional) + +Note: source_clip must not exceed target_frames for modes that use it. +If your source is longer, use VACE Source Prep upstream to trim it first.""" @classmethod def INPUT_TYPES(cls): @@ -139,6 +142,15 @@ Parameter usage by mode: def generate(self, source_clip, mode, target_frames, split_index, edge_frames, inpaint_mask=None, keyframe_positions=None): B, H, W, C = source_clip.shape dev = source_clip.device + + modes_using_target = {"End Extend", "Pre Extend", "Middle Extend", "Edge Extend", + "Join Extend", "Bidirectional Extend", "Keyframe"} + if mode in modes_using_target and B > target_frames: + raise ValueError( + f"{mode}: source_clip has {B} frames but target_frames is {target_frames}. " + "Use VACE Source Prep to trim long clips." + ) + BLACK = 0.0 WHITE = 1.0 GREY = 0.498 @@ -315,10 +327,250 @@ Parameter usage by mode: raise ValueError(f"Unknown mode: {mode}") +class VACESourcePrep: + CATEGORY = "VACE Tools" + FUNCTION = "prepare" + RETURN_TYPES = ("IMAGE", "STRING", "INT", "INT", "IMAGE", "IMAGE", "IMAGE", "IMAGE", "MASK", "STRING") + RETURN_NAMES = ( + "source_clip", "mode", "split_index", "edge_frames", + "segment_1", "segment_2", "segment_3", "segment_4", + "inpaint_mask", "keyframe_positions", + ) + OUTPUT_TOOLTIPS = ( + "Trimmed source frames — wire to VACE Mask Generator's source_clip.", + "Selected mode — wire to VACE Mask Generator's mode (convert widget to input).", + "Adjusted split_index for the trimmed clip — wire to VACE Mask Generator.", + "Adjusted edge_frames — wire to VACE Mask Generator.", + "Segment 1: End/Pre/Bidirectional/Frame Interpolation/Video Inpaint/Keyframe: full output clip. Middle: part A. Edge: start edge. Join: part 1. Replace/Inpaint: before region.", + "Segment 2: Middle: part B. Edge: discarded middle. Join: part 2. Replace/Inpaint: replace region. Others: placeholder.", + "Segment 3: Edge: end edge. Join: part 3. Replace/Inpaint: after region. Others: placeholder.", + "Segment 4: Join: part 4. Others: placeholder.", + "Inpaint mask trimmed to match output — wire to VACE Mask Generator.", + "Keyframe positions pass-through — wire to VACE Mask Generator.", + ) + DESCRIPTION = """VACE Source Prep — trims long source clips for VACE Mask Generator. + +Use this node BEFORE VACE Mask Generator when your source clip is longer than target_frames. +It selects the relevant frames based on mode, input_left, and input_right, then outputs +adjusted parameters to wire directly into the mask generator. + +input_left / input_right (0 = use all available): + End Extend: input_left = trailing context frames to keep + Pre Extend: input_right = leading reference frames to keep + Middle Extend: input_left/input_right = frames each side of split + Edge Extend: input_left/input_right = start/end edge size (overrides edge_frames) + Join Extend: input_left/input_right = edge context from each half + Bidirectional: input_left = trailing context frames to keep + Frame Interpolation: pass-through (no trimming) + Replace/Inpaint: input_left/input_right = context frames around replace region + Video Inpaint: pass-through (no trimming) + Keyframe: pass-through (no trimming)""" + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "source_clip": ("IMAGE", {"description": "Full source video frames (B,H,W,C tensor)."}), + "mode": ( + [ + "End Extend", + "Pre Extend", + "Middle Extend", + "Edge Extend", + "Join Extend", + "Bidirectional Extend", + "Frame Interpolation", + "Replace/Inpaint", + "Video Inpaint", + "Keyframe", + ], + { + "default": "End Extend", + "description": "Generation mode — must match VACE Mask Generator's mode.", + }, + ), + "split_index": ( + "INT", + { + "default": 0, + "min": -10000, + "max": 10000, + "description": "Split position in the full source video. Same meaning as mask generator's split_index.", + }, + ), + "input_left": ( + "INT", + { + "default": 0, + "min": 0, + "max": 10000, + "description": "Frames from the left side of the split point to keep (0 = all available). " + "End: trailing context. Middle: frames before split. Edge/Join: start edge size. " + "Bidirectional: trailing context. Replace: context before region.", + }, + ), + "input_right": ( + "INT", + { + "default": 0, + "min": 0, + "max": 10000, + "description": "Frames from the right side of the split point to keep (0 = all available). " + "Pre: leading reference. Middle: frames after split. Edge/Join: end edge size. " + "Replace: context after region.", + }, + ), + "edge_frames": ( + "INT", + { + "default": 8, + "min": 1, + "max": 10000, + "description": "Default edge size for Edge/Join modes (overridden by input_left/input_right if non-zero). " + "Replace/Inpaint: number of frames to replace.", + }, + ), + }, + "optional": { + "inpaint_mask": ( + "MASK", + { + "description": "Spatial inpaint mask — trimmed to match output frames for Video Inpaint mode.", + }, + ), + "keyframe_positions": ( + "STRING", + { + "default": "", + "description": "Keyframe positions pass-through for Keyframe mode.", + }, + ), + }, + } + + def prepare(self, source_clip, mode, split_index, input_left, input_right, edge_frames, inpaint_mask=None, keyframe_positions=None): + B, H, W, C = source_clip.shape + dev = source_clip.device + + def ph(): + return _placeholder(H, W, dev) + + def safe(t): + return _ensure_nonempty(t, H, W, dev) + + def mask_ph(): + return torch.zeros((1, H, W), dtype=torch.float32, device=dev) + + def trim_mask(start, end): + if inpaint_mask is None: + return mask_ph() + m = inpaint_mask.to(dev) + if m.shape[0] == 1: + return m + actual_end = min(end, m.shape[0]) + actual_start = min(start, actual_end) + trimmed = m[actual_start:actual_end] + if trimmed.shape[0] == 0: + return mask_ph() + return trimmed + + kp_out = keyframe_positions if keyframe_positions else "" + + if mode == "End Extend": + if input_left > 0: + start = max(0, B - input_left) + output = source_clip[start:] + else: + output = source_clip + start = 0 + return (output, mode, 0, edge_frames, safe(output), ph(), ph(), ph(), trim_mask(start, B), kp_out) + + elif mode == "Pre Extend": + if input_right > 0: + end = min(B, input_right) + output = source_clip[:end] + else: + output = source_clip + end = B + return (output, mode, output.shape[0], edge_frames, safe(output), ph(), ph(), ph(), trim_mask(0, end), kp_out) + + elif mode == "Middle Extend": + left_start = max(0, split_index - input_left) if input_left > 0 else 0 + right_end = min(B, split_index + input_right) if input_right > 0 else B + output = source_clip[left_start:right_end] + out_split = split_index - left_start + part_a = source_clip[left_start:split_index] + part_b = source_clip[split_index:right_end] + return (output, mode, out_split, edge_frames, safe(part_a), safe(part_b), ph(), ph(), trim_mask(left_start, right_end), kp_out) + + elif mode == "Edge Extend": + eff_left = min(input_left if input_left > 0 else edge_frames, B) + eff_right = min(input_right if input_right > 0 else edge_frames, B) + sym = min(eff_left, eff_right) + start_seg = source_clip[:sym] + end_seg = source_clip[-sym:] if sym > 0 else source_clip[:0] + mid_seg = source_clip[sym:B - sym] if 2 * sym < B else source_clip[:0] + output = torch.cat([start_seg, end_seg], dim=0) + return (output, mode, 0, sym, safe(start_seg), safe(mid_seg), safe(end_seg), ph(), mask_ph(), kp_out) + + elif mode == "Join Extend": + half = B // 2 + first_half = source_clip[:half] + second_half = source_clip[half:] + eff_left = input_left if input_left > 0 else edge_frames + eff_right = input_right if input_right > 0 else edge_frames + eff_left = min(eff_left, first_half.shape[0]) + eff_right = min(eff_right, second_half.shape[0]) + sym = min(eff_left, eff_right) + part_1 = first_half[:-sym] if sym < first_half.shape[0] else first_half[:0] + part_2 = first_half[-sym:] + part_3 = second_half[:sym] + part_4 = second_half[sym:] + output = torch.cat([part_2, part_3], dim=0) + return (output, mode, 0, sym, safe(part_1), safe(part_2), safe(part_3), safe(part_4), mask_ph(), kp_out) + + elif mode == "Bidirectional Extend": + if input_left > 0: + start = max(0, B - input_left) + output = source_clip[start:] + else: + output = source_clip + start = 0 + return (output, mode, split_index, edge_frames, safe(output), ph(), ph(), ph(), trim_mask(start, B), kp_out) + + elif mode == "Frame Interpolation": + return (source_clip, mode, split_index, edge_frames, safe(source_clip), ph(), ph(), ph(), trim_mask(0, B), kp_out) + + elif mode == "Replace/Inpaint": + start = max(0, min(split_index, B)) + end_idx = min(start + edge_frames, B) + length = end_idx - start + ctx_start = max(0, start - input_left) if input_left > 0 else 0 + ctx_end = min(B, end_idx + input_right) if input_right > 0 else B + before = source_clip[ctx_start:start] + replace_region = source_clip[start:end_idx] + after = source_clip[end_idx:ctx_end] + output = torch.cat([before, replace_region, after], dim=0) + out_split = before.shape[0] + out_edge = length + return (output, mode, out_split, out_edge, safe(before), safe(replace_region), safe(after), ph(), trim_mask(ctx_start, ctx_end), kp_out) + + elif mode == "Video Inpaint": + out_mask = inpaint_mask.to(dev) if inpaint_mask is not None else mask_ph() + return (source_clip, mode, split_index, edge_frames, safe(source_clip), ph(), ph(), ph(), out_mask, kp_out) + + elif mode == "Keyframe": + return (source_clip, mode, split_index, edge_frames, safe(source_clip), ph(), ph(), ph(), mask_ph(), kp_out) + + raise ValueError(f"Unknown mode: {mode}") + + NODE_CLASS_MAPPINGS = { "VACEMaskGenerator": VACEMaskGenerator, + "VACESourcePrep": VACESourcePrep, } NODE_DISPLAY_NAME_MAPPINGS = { "VACEMaskGenerator": "VACE Mask Generator", + "VACESourcePrep": "VACE Source Prep", } diff --git a/web/js/vace_widgets.js b/web/js/vace_widgets.js new file mode 100644 index 0000000..f924a0e --- /dev/null +++ b/web/js/vace_widgets.js @@ -0,0 +1,78 @@ +import { app } from "../../scripts/app.js"; + +app.registerExtension({ + name: "VACE.SourcePrep.SmartDisplay", + nodeCreated(node) { + if (node.comfyClass !== "VACESourcePrep") return; + + const modeWidget = node.widgets.find(w => w.name === "mode"); + if (!modeWidget) return; + + const VISIBILITY = { + "End Extend": { split_index: false, input_left: true, input_right: false, edge_frames: false }, + "Pre Extend": { split_index: false, input_left: false, input_right: true, edge_frames: false }, + "Middle Extend": { split_index: true, input_left: true, input_right: true, edge_frames: false }, + "Edge Extend": { split_index: false, input_left: true, input_right: true, edge_frames: true }, + "Join Extend": { split_index: false, input_left: true, input_right: true, edge_frames: true }, + "Bidirectional Extend": { split_index: true, input_left: true, input_right: false, edge_frames: false }, + "Frame Interpolation": { split_index: true, input_left: false, input_right: false, edge_frames: false }, + "Replace/Inpaint": { split_index: true, input_left: true, input_right: true, edge_frames: true }, + "Video Inpaint": { split_index: false, input_left: false, input_right: false, edge_frames: false }, + "Keyframe": { split_index: false, input_left: false, input_right: false, edge_frames: false }, + }; + + function toggleWidget(widget, show) { + if (!widget) return; + if (!widget._origType) widget._origType = widget.type; + widget.type = show ? widget._origType : "hidden"; + } + + function updateVisibility(mode) { + const vis = VISIBILITY[mode]; + if (!vis) return; + for (const [name, show] of Object.entries(vis)) { + toggleWidget(node.widgets.find(w => w.name === name), show); + } + node.setSize(node.computeSize()); + app.graph.setDirtyCanvas(true); + } + + // Hook mode widget value setter to catch both UI and programmatic changes + const descriptor = Object.getOwnPropertyDescriptor(modeWidget, "value") || + { configurable: true }; + const hasCustomAccessor = !!descriptor.get; + + if (!hasCustomAccessor) { + let _value = modeWidget.value; + Object.defineProperty(modeWidget, "value", { + get() { return _value; }, + set(v) { + _value = v; + updateVisibility(v); + }, + configurable: true, + }); + } else { + const origGet = descriptor.get; + const origSet = descriptor.set; + Object.defineProperty(modeWidget, "value", { + get() { return origGet.call(this); }, + set(v) { + origSet.call(this, v); + updateVisibility(v); + }, + configurable: true, + }); + } + + // Also hook callback for dropdown selection events + const origCallback = modeWidget.callback; + modeWidget.callback = function(value) { + updateVisibility(value); + if (origCallback) origCallback.call(this, value); + }; + + // Initial update + updateVisibility(modeWidget.value); + }, +});