diff --git a/README.md b/README.md index 88e876f..33fb37e 100644 --- a/README.md +++ b/README.md @@ -38,11 +38,9 @@ Irrelevant widgets are automatically hidden based on the selected mode. | `mode` | STRING | Selected mode — wire to mask generator's mode (convert widget to input). | | `split_index` | INT | Adjusted for the trimmed clip — wire to mask generator. | | `edge_frames` | INT | Adjusted/passed through — wire to mask generator. | -| `segment_1`–`segment_4` | IMAGE | Frame segments per mode (same meaning as mask generator segments). Unused segments are 1-frame black placeholders. | | `inpaint_mask` | MASK | Trimmed to match output, or placeholder. | | `keyframe_positions` | STRING | Pass-through. | -| `trim_start` | INT | Start index of the trimmed region in the original clip — wire to VACE Merge Back. | -| `trim_end` | INT | End index of the trimmed region in the original clip — wire to VACE Merge Back. | +| `vace_pipe` | VACE_PIPE | Pipe carrying mode, trim bounds, and context frame counts — wire to VACE Merge Back. | ### Per-Mode Trimming @@ -318,7 +316,7 @@ control_frames: [ k0][ GREY ][ k1][ GREY ][ k2][ GREY ][ k3] ## Node: VACE Merge Back -Splices VACE sampler output back into the original full-length video. Connect the original (untrimmed) clip, the VACE sampler output, the mask from VACE Mask Generator, and the `mode`/`trim_start`/`trim_end` from VACE Source Prep. +Splices VACE sampler output back into the original full-length video. Connect the original (untrimmed) clip, the VACE sampler output, and the `vace_pipe` from VACE Source Prep. The pipe carries mode, trim bounds, and context frame counts for automatic blending. Irrelevant widgets are automatically hidden based on the selected blend method. @@ -328,11 +326,7 @@ Irrelevant widgets are automatically hidden based on the selected blend method. |---|---|---|---| | `original_clip` | IMAGE | — | Full original video (before any trimming). | | `vace_output` | IMAGE | — | VACE sampler output. | -| `mask` | IMAGE | — | Mask from VACE Mask Generator — BLACK=context, WHITE=generated. | -| `mode` | STRING | *(wired)* | Mode from VACE Source Prep (must be wired, not typed). | -| `trim_start` | INT | *(wired)* | Start of trimmed region in original (from VACE Source Prep). | -| `trim_end` | INT | *(wired)* | End of trimmed region in original (from VACE Source Prep). | -| `blend_frames` | INT | `4` | Context frames to blend at each seam (0 = hard cut). | +| `vace_pipe` | VACE_PIPE | — | Pipe from VACE Source Prep carrying mode, trim bounds, and context counts. | | `blend_method` | ENUM | `optical_flow` | `none` (hard cut), `alpha` (linear crossfade), or `optical_flow` (motion-compensated). | | `of_preset` | ENUM | `balanced` | Optical flow quality: `fast`, `balanced`, `quality`, `max`. | @@ -346,23 +340,23 @@ Irrelevant widgets are automatically hidden based on the selected blend method. **Pass-through modes** (Edge Extend, Frame Interpolation, Keyframe, Video Inpaint): returns `vace_output` as-is — the VACE output IS the final result for these modes. -**Splice modes** (End, Pre, Middle, Join, Bidirectional, Replace): reconstructs `original[:trim_start] + vace_output + original[trim_end:]`, then blends at the seams where context frames meet original frames. +**Splice modes** (End, Pre, Middle, Join, Bidirectional, Replace): reconstructs `original[:trim_start] + vace_output + original[trim_end:]`, then blends across the full context zones at each seam. -The node detects context zones by counting consecutive black frames at the start and end of the mask. At each seam, `blend_frames` frames are blended with a smooth alpha ramp. Optical flow blending warps both frames along the motion field before blending, reducing ghosting on moving subjects. +Context frame counts (`left_ctx`, `right_ctx`) are carried in the `vace_pipe` and determined automatically by VACE Source Prep based on the mode and input_left/input_right settings. Blending uses a smooth alpha ramp across the entire context zone. Optical flow blending warps both frames along the motion field before blending, reducing ghosting on moving subjects. ### Example: Middle Extend ``` Original: 274 frames (0–273) Prep: split_index=137, input_left=16, input_right=16 - → trim_start=121, trim_end=153, trimmed=32 frames + → vace_pipe: trim_start=121, trim_end=153, left_ctx=16, right_ctx=16 Mask Gen: target_frames=81 → mask = [BLACK×16] [WHITE×49] [BLACK×16] VACE out: 81 frames (from sampler) Merge: result = original[0:121] + vace[0:81] + original[153:274] → 121 + 81 + 121 = 323 frames - Left blend: vace[0..3] ↔ original[121..124] - Right blend: vace[77..80] ↔ original[149..152] + Left blend: vace[0..15] ↔ original[121..136] (full 16-frame context zone) + Right blend: vace[65..80] ↔ original[137..152] (full 16-frame context zone) ``` ### Wiring Diagram @@ -370,14 +364,12 @@ Merge: result = original[0:121] + vace[0:81] + original[153:274] ``` [Load Video] │ - ├─ source_clip ──→ [VACESourcePrep] ─┬─ source_clip ──→ [MaskGen] ─→ mask ──┐ - │ ├─ mode ───────────────────────────────┤ - │ ├─ trim_start ─────────────────────────┤ - │ └─ trim_end ──────────────────────────┤ - │ │ - └─ original_clip ───────────────────────────────────────────────────────────→ [VACEMergeBack] - │ - [Sampler] ─→ vace_output ────────────────┘ + ├─ source_clip ──→ [VACESourcePrep] ─┬─ source_clip ──→ [MaskGen] ─→ [Sampler] + │ ├─ mode ──────────→ [MaskGen] │ + │ └─ vace_pipe ─────────────────┐ │ + │ │ │ + └─ original_clip ──────────────────────────────────────→ [VACEMergeBack] ←┘ + vace_output ``` --- diff --git a/merge_node.py b/merge_node.py index c81b86d..c851c2e 100644 --- a/merge_node.py +++ b/merge_node.py @@ -12,27 +12,6 @@ OPTICAL_FLOW_PRESETS = { PASS_THROUGH_MODES = {"Edge Extend", "Frame Interpolation", "Keyframe", "Video Inpaint"} -def _count_leading_black(mask): - """Count consecutive black (context) frames at the start of mask.""" - count = 0 - for i in range(mask.shape[0]): - if mask[i].max().item() < 0.01: - count += 1 - else: - break - return count - - -def _count_trailing_black(mask): - """Count consecutive black (context) frames at the end of mask.""" - count = 0 - for i in range(mask.shape[0] - 1, -1, -1): - if mask[i].max().item() < 0.01: - count += 1 - else: - break - return count - def _alpha_blend(frame_a, frame_b, alpha): """Simple linear crossfade between two frames (H,W,3 tensors).""" @@ -102,16 +81,15 @@ class VACEMergeBack: ) DESCRIPTION = """VACE Merge Back — splices VACE sampler output back into the original full-length video. -Connect the original (untrimmed) clip, the VACE sampler output, the mask from VACE Mask Generator, -and the mode/trim_start/trim_end from VACE Source Prep. The node detects context zones from the mask -and blends at the seams where context meets generated frames. +Connect the original (untrimmed) clip, the VACE sampler output, and the vace_pipe from VACE Source Prep. +The pipe carries mode, trim bounds, and context frame counts for automatic blending. Pass-through modes (Edge Extend, Frame Interpolation, Keyframe, Video Inpaint): Returns vace_output as-is — the VACE output IS the final result. Splice modes (End, Pre, Middle, Join, Bidirectional, Replace): Reconstructs original[:trim_start] + vace_output + original[trim_end:] - with optional blending at the seams. + with automatic blending across the full context zones. Blend methods: none — Hard cut at seams (fastest) @@ -124,17 +102,19 @@ Blend methods: "required": { "original_clip": ("IMAGE", {"description": "Full original video (before any trimming)."}), "vace_output": ("IMAGE", {"description": "VACE sampler output."}), - "mask": ("IMAGE", {"description": "Mask from VACE Mask Generator — BLACK=context, WHITE=generated."}), - "mode": ("STRING", {"forceInput": True, "description": "Mode from VACE Source Prep."}), - "trim_start": ("INT", {"forceInput": True, "default": 0, "description": "Start of trimmed region in original."}), - "trim_end": ("INT", {"forceInput": True, "default": 0, "description": "End of trimmed region in original."}), - "blend_frames": ("INT", {"default": 4, "min": 0, "max": 100, "description": "Context frames to blend at each seam (0 = hard cut)."}), + "vace_pipe": ("VACE_PIPE", {"description": "Pipe from VACE Source Prep carrying mode, trim bounds, and context counts."}), "blend_method": (["optical_flow", "alpha", "none"], {"default": "optical_flow", "description": "Blending method at seams."}), "of_preset": (["fast", "balanced", "quality", "max"], {"default": "balanced", "description": "Optical flow quality preset."}), }, } - def merge(self, original_clip, vace_output, mask, mode, trim_start, trim_end, blend_frames, blend_method, of_preset): + def merge(self, original_clip, vace_output, vace_pipe, blend_method, of_preset): + mode = vace_pipe["mode"] + trim_start = vace_pipe["trim_start"] + trim_end = vace_pipe["trim_end"] + left_ctx = vace_pipe["left_ctx"] + right_ctx = vace_pipe["right_ctx"] + # Pass-through modes: VACE output IS the final result if mode in PASS_THROUGH_MODES: return (vace_output,) @@ -145,34 +125,24 @@ Blend methods: tail = original_clip[trim_end:] result = torch.cat([head, vace_output, tail], dim=0) - if blend_method == "none" or blend_frames <= 0: + if blend_method == "none" or (left_ctx == 0 and right_ctx == 0): return (result,) - # Detect context zones from mask - left_ctx_len = _count_leading_black(mask) - right_ctx_len = _count_trailing_black(mask) - def blend_frame(orig, vace, alpha): if blend_method == "optical_flow": return _optical_flow_blend(orig, vace, alpha, of_preset) return _alpha_blend(orig, vace, alpha) - # Blend at LEFT seam (context → generated transition) - bf_left = min(blend_frames, left_ctx_len) - for j in range(bf_left): - alpha = (j + 1) / (bf_left + 1) - orig_frame = original_clip[trim_start + j] - vace_frame = vace_output[j] - result[trim_start + j] = blend_frame(orig_frame, vace_frame, alpha) + # Blend across full left context zone + for j in range(left_ctx): + alpha = (j + 1) / (left_ctx + 1) + result[trim_start + j] = blend_frame(original_clip[trim_start + j], vace_output[j], alpha) - # Blend at RIGHT seam (generated → context transition) - bf_right = min(blend_frames, right_ctx_len) - for j in range(bf_right): - alpha = 1.0 - (j + 1) / (bf_right + 1) - frame_idx = V - bf_right + j - orig_frame = original_clip[trim_end - bf_right + j] - vace_frame = vace_output[frame_idx] - result[trim_start + frame_idx] = blend_frame(orig_frame, vace_frame, alpha) + # Blend across full right context zone + for j in range(right_ctx): + alpha = 1.0 - (j + 1) / (right_ctx + 1) + frame_idx = V - right_ctx + j + result[trim_start + frame_idx] = blend_frame(original_clip[trim_end - right_ctx + j], vace_output[frame_idx], alpha) return (result,) diff --git a/nodes.py b/nodes.py index 018a535..fb36a4e 100644 --- a/nodes.py +++ b/nodes.py @@ -330,25 +330,19 @@ If your source is longer, use VACE Source Prep upstream to trim it first.""" class VACESourcePrep: CATEGORY = "VACE Tools" FUNCTION = "prepare" - RETURN_TYPES = ("IMAGE", "STRING", "INT", "INT", "IMAGE", "IMAGE", "IMAGE", "IMAGE", "MASK", "STRING", "INT", "INT") + RETURN_TYPES = ("IMAGE", "STRING", "INT", "INT", "MASK", "STRING", "VACE_PIPE") RETURN_NAMES = ( "source_clip", "mode", "split_index", "edge_frames", - "segment_1", "segment_2", "segment_3", "segment_4", - "inpaint_mask", "keyframe_positions", "trim_start", "trim_end", + "inpaint_mask", "keyframe_positions", "vace_pipe", ) OUTPUT_TOOLTIPS = ( "Trimmed source frames — wire to VACE Mask Generator's source_clip.", "Selected mode — wire to VACE Mask Generator's mode (convert widget to input).", "Adjusted split_index for the trimmed clip — wire to VACE Mask Generator.", "Adjusted edge_frames — wire to VACE Mask Generator.", - "Segment 1: End/Pre/Bidirectional/Frame Interpolation/Video Inpaint/Keyframe: full output clip. Middle: part A. Edge: start edge. Join: part 1. Replace/Inpaint: before region.", - "Segment 2: Middle: part B. Edge: discarded middle. Join: part 2. Replace/Inpaint: replace region. Others: placeholder.", - "Segment 3: Edge: end edge. Join: part 3. Replace/Inpaint: after region. Others: placeholder.", - "Segment 4: Join: part 4. Others: placeholder.", "Inpaint mask trimmed to match output — wire to VACE Mask Generator.", "Keyframe positions pass-through — wire to VACE Mask Generator.", - "Start index of the trimmed region in the original clip — wire to VACE Merge Back.", - "End index of the trimmed region in the original clip — wire to VACE Merge Back.", + "Pipe carrying mode, trim bounds, and context counts — wire to VACE Merge Back.", ) DESCRIPTION = """VACE Source Prep — trims long source clips for VACE Mask Generator. @@ -454,12 +448,6 @@ input_left / input_right (0 = use all available): B, H, W, C = source_clip.shape dev = source_clip.device - def ph(): - return _placeholder(H, W, dev) - - def safe(t): - return _ensure_nonempty(t, H, W, dev) - def mask_ph(): return torch.zeros((1, H, W), dtype=torch.float32, device=dev) @@ -485,7 +473,8 @@ input_left / input_right (0 = use all available): else: output = source_clip start = 0 - return (output, mode, 0, edge_frames, safe(output), ph(), ph(), ph(), trim_mask(start, B), kp_out, start, B) + pipe = {"mode": mode, "trim_start": start, "trim_end": B, "left_ctx": output.shape[0], "right_ctx": 0} + return (output, mode, 0, edge_frames, trim_mask(start, B), kp_out, pipe) elif mode == "Pre Extend": if input_right > 0: @@ -494,7 +483,8 @@ input_left / input_right (0 = use all available): else: output = source_clip end = B - return (output, mode, output.shape[0], edge_frames, safe(output), ph(), ph(), ph(), trim_mask(0, end), kp_out, 0, end) + pipe = {"mode": mode, "trim_start": 0, "trim_end": end, "left_ctx": 0, "right_ctx": output.shape[0]} + return (output, mode, output.shape[0], edge_frames, trim_mask(0, end), kp_out, pipe) elif mode == "Middle Extend": left_start = max(0, split_index - input_left) if input_left > 0 else 0 @@ -503,7 +493,8 @@ input_left / input_right (0 = use all available): out_split = split_index - left_start part_a = source_clip[left_start:split_index] part_b = source_clip[split_index:right_end] - return (output, mode, out_split, edge_frames, safe(part_a), safe(part_b), ph(), ph(), trim_mask(left_start, right_end), kp_out, left_start, right_end) + pipe = {"mode": mode, "trim_start": left_start, "trim_end": right_end, "left_ctx": out_split, "right_ctx": part_b.shape[0]} + return (output, mode, out_split, edge_frames, trim_mask(left_start, right_end), kp_out, pipe) elif mode == "Edge Extend": eff_left = min(input_left if input_left > 0 else edge_frames, B) @@ -513,7 +504,8 @@ input_left / input_right (0 = use all available): end_seg = source_clip[-sym:] if sym > 0 else source_clip[:0] mid_seg = source_clip[sym:B - sym] if 2 * sym < B else source_clip[:0] output = torch.cat([start_seg, end_seg], dim=0) - return (output, mode, 0, sym, safe(start_seg), safe(mid_seg), safe(end_seg), ph(), mask_ph(), kp_out, 0, B) + pipe = {"mode": mode, "trim_start": 0, "trim_end": B, "left_ctx": 0, "right_ctx": 0} + return (output, mode, 0, sym, mask_ph(), kp_out, pipe) elif mode == "Join Extend": half = B // 2 @@ -529,7 +521,8 @@ input_left / input_right (0 = use all available): part_3 = second_half[:sym] part_4 = second_half[sym:] output = torch.cat([part_2, part_3], dim=0) - return (output, mode, 0, sym, safe(part_1), safe(part_2), safe(part_3), safe(part_4), mask_ph(), kp_out, half - sym, half + sym) + pipe = {"mode": mode, "trim_start": half - sym, "trim_end": half + sym, "left_ctx": sym, "right_ctx": sym} + return (output, mode, 0, sym, mask_ph(), kp_out, pipe) elif mode == "Bidirectional Extend": if input_left > 0: @@ -538,10 +531,12 @@ input_left / input_right (0 = use all available): else: output = source_clip start = 0 - return (output, mode, split_index, edge_frames, safe(output), ph(), ph(), ph(), trim_mask(start, B), kp_out, start, B) + pipe = {"mode": mode, "trim_start": start, "trim_end": B, "left_ctx": 0, "right_ctx": 0} + return (output, mode, split_index, edge_frames, trim_mask(start, B), kp_out, pipe) elif mode == "Frame Interpolation": - return (source_clip, mode, split_index, edge_frames, safe(source_clip), ph(), ph(), ph(), trim_mask(0, B), kp_out, 0, B) + pipe = {"mode": mode, "trim_start": 0, "trim_end": B, "left_ctx": 0, "right_ctx": 0} + return (source_clip, mode, split_index, edge_frames, trim_mask(0, B), kp_out, pipe) elif mode == "Replace/Inpaint": start = max(0, min(split_index, B)) @@ -555,14 +550,17 @@ input_left / input_right (0 = use all available): output = torch.cat([before, replace_region, after], dim=0) out_split = before.shape[0] out_edge = length - return (output, mode, out_split, out_edge, safe(before), safe(replace_region), safe(after), ph(), trim_mask(ctx_start, ctx_end), kp_out, ctx_start, ctx_end) + pipe = {"mode": mode, "trim_start": ctx_start, "trim_end": ctx_end, "left_ctx": before.shape[0], "right_ctx": after.shape[0]} + return (output, mode, out_split, out_edge, trim_mask(ctx_start, ctx_end), kp_out, pipe) elif mode == "Video Inpaint": out_mask = inpaint_mask.to(dev) if inpaint_mask is not None else mask_ph() - return (source_clip, mode, split_index, edge_frames, safe(source_clip), ph(), ph(), ph(), out_mask, kp_out, 0, B) + pipe = {"mode": mode, "trim_start": 0, "trim_end": B, "left_ctx": 0, "right_ctx": 0} + return (source_clip, mode, split_index, edge_frames, out_mask, kp_out, pipe) elif mode == "Keyframe": - return (source_clip, mode, split_index, edge_frames, safe(source_clip), ph(), ph(), ph(), mask_ph(), kp_out, 0, B) + pipe = {"mode": mode, "trim_start": 0, "trim_end": B, "left_ctx": 0, "right_ctx": 0} + return (source_clip, mode, split_index, edge_frames, mask_ph(), kp_out, pipe) raise ValueError(f"Unknown mode: {mode}") diff --git a/web/js/vace_widgets.js b/web/js/vace_widgets.js index 9710df2..2295c1e 100644 --- a/web/js/vace_widgets.js +++ b/web/js/vace_widgets.js @@ -92,9 +92,7 @@ app.registerExtension({ } function updateVisibility(method) { - const showBlend = method !== "none"; const showOf = method === "optical_flow"; - toggleWidget(node.widgets.find(w => w.name === "blend_frames"), showBlend); toggleWidget(node.widgets.find(w => w.name === "of_preset"), showOf); node.setSize(node.computeSize()); app.graph.setDirtyCanvas(true);