diff --git a/README.md b/README.md index 1af867e..0240f8b 100644 --- a/README.md +++ b/README.md @@ -18,11 +18,12 @@ Restart ComfyUI. The node appears under the **VACE Tools** category. | Input | Type | Default | Description | |---|---|---|---| | `source_clip` | IMAGE | — | Source video frames (B, H, W, C tensor) | -| `mode` | ENUM | `End Extend` | Generation mode (see below). 9 modes available. | -| `target_frames` | INT | `81` | Total output frame count for mask and control_frames (1–10000). Unused by Frame Interpolation, Replace/Inpaint, and Video Inpaint. | -| `split_index` | INT | `0` | Where to split the source. Meaning varies by mode. Unused by Edge/Join. Bidirectional: frames before clip (0 = even split). Frame Interpolation: new frames per gap. Replace/Inpaint: start index of replace region. | -| `edge_frames` | INT | `8` | Number of edge frames for Edge and Join modes. Replace/Inpaint: number of frames to replace. Unused by End/Pre/Middle/Bidirectional/Frame Interpolation/Video Inpaint. | +| `mode` | ENUM | `End Extend` | Generation mode (see below). 10 modes available. | +| `target_frames` | INT | `81` | Total output frame count for mask and control_frames (1–10000). Used by Keyframe to set output length. Unused by Frame Interpolation, Replace/Inpaint, and Video Inpaint. | +| `split_index` | INT | `0` | Where to split the source. Meaning varies by mode. Unused by Edge/Join/Keyframe. Bidirectional: frames before clip (0 = even split). Frame Interpolation: new frames per gap. Replace/Inpaint: start index of replace region. | +| `edge_frames` | INT | `8` | Number of edge frames for Edge and Join modes. Replace/Inpaint: number of frames to replace. Unused by End/Pre/Middle/Bidirectional/Frame Interpolation/Video Inpaint/Keyframe. | | `inpaint_mask` | MASK | *(optional)* | Spatial inpaint mask for Video Inpaint mode (B, H, W). White (1.0) = regenerate, Black (0.0) = keep. Single frame broadcasts to all source frames. | +| `keyframe_positions` | STRING | *(optional)* | Comma-separated frame indices for Keyframe mode (e.g. `0,20,50,80`). One position per source frame, sorted ascending, within [0, target_frames-1]. Leave empty for even auto-spread. | ### Outputs @@ -234,6 +235,31 @@ control_frames: [ source pixels where mask=0, grey where mask=1 ] | `segment_1` | Full source clip | | `segment_2`–`4` | Placeholder | +--- + +### Keyframe + +Place keyframe images at specific positions within a `target_frames`-length output, and generate everything between them. + +- **`source_clip`** — a small batch of keyframe images (e.g. 4 frames). +- **`target_frames`** — total output frame count. +- **`keyframe_positions`** *(optional)* — comma-separated frame indices (e.g. `"0,20,50,80"`). Must have one value per source frame, sorted ascending, no duplicates, all within [0, target_frames-1]. Leave empty for **auto-spread** (first keyframe at frame 0, last at `target_frames-1`, others evenly distributed). +- **`split_index`**, **`edge_frames`** — unused. +- **`frames_to_generate`** = `target_frames − source_frames` +- **Total output** = `target_frames` + +``` +Example: 4 keyframes, target_frames=81, positions auto-spread to 0,27,53,80 + +mask: [ B ][ W×26 ][ B ][ W×25 ][ B ][ W×26 ][ B ] +control_frames: [ k0][ GREY ][ k1][ GREY ][ k2][ GREY ][ k3] +``` + +| Segment | Content | +|---|---| +| `segment_1` | Full source clip (keyframe images) | +| `segment_2`–`4` | Placeholder | + ## Dependencies None beyond PyTorch, which is bundled with ComfyUI. diff --git a/nodes.py b/nodes.py index c4a15f9..cb312d4 100644 --- a/nodes.py +++ b/nodes.py @@ -36,7 +36,7 @@ class VACEMaskGenerator: OUTPUT_TOOLTIPS = ( "Mask sequence — black (0) = keep original, white (1) = generate. Per-frame for most modes; per-pixel for Video Inpaint.", "Visual reference for VACE — source pixels where mask is black, grey (#7f7f7f) fill where mask is white.", - "Segment 1: source/context frames. End/Pre/Bidirectional/Frame Interpolation/Video Inpaint: full clip. Middle: part A. Edge: start edge. Join: part 1. Replace/Inpaint: frames before replaced region.", + "Segment 1: source/context frames. End/Pre/Bidirectional/Frame Interpolation/Video Inpaint/Keyframe: full clip. Middle: part A. Edge: start edge. Join: part 1. Replace/Inpaint: frames before replaced region.", "Segment 2: secondary context. Middle: part B. Edge: middle remainder. Join: part 2. Replace/Inpaint: original replaced frames. Others: placeholder.", "Segment 3: Edge: end edge. Join: part 3. Replace/Inpaint: frames after replaced region. Others: placeholder.", "Segment 4: Join: part 4. Others: placeholder.", @@ -54,15 +54,17 @@ Modes: Frame Interpolation — insert new frames between each source pair Replace/Inpaint — regenerate a range of frames in-place Video Inpaint — regenerate masked spatial regions (requires inpaint_mask) + Keyframe — place keyframe images at positions, generate between them Mask colors: Black = keep original, White = generate new. Control frames: original pixels where kept, grey (#7f7f7f) where generating. Parameter usage by mode: - target_frames : End, Pre, Middle, Edge, Join, Bidirectional - split_index : End, Pre, Middle, Bidirectional, Frame Interpolation, Replace/Inpaint - edge_frames : Edge, Join, Replace/Inpaint - inpaint_mask : Video Inpaint only""" + target_frames : End, Pre, Middle, Edge, Join, Bidirectional, Keyframe + split_index : End, Pre, Middle, Bidirectional, Frame Interpolation, Replace/Inpaint + edge_frames : Edge, Join, Replace/Inpaint + inpaint_mask : Video Inpaint only + keyframe_positions : Keyframe only (optional)""" @classmethod def INPUT_TYPES(cls): @@ -80,10 +82,11 @@ Parameter usage by mode: "Frame Interpolation", "Replace/Inpaint", "Video Inpaint", + "Keyframe", ], { "default": "End Extend", - "description": "End: generate after clip. Pre: generate before clip. Middle: generate at split point. Edge: generate between reversed edges (looping). Join: generate to heal two halves. Bidirectional: generate before AND after clip. Frame Interpolation: insert generated frames between each source pair. Replace/Inpaint: regenerate a range of frames in-place. Video Inpaint: regenerate masked spatial regions across all frames (requires inpaint_mask).", + "description": "End: generate after clip. Pre: generate before clip. Middle: generate at split point. Edge: generate between reversed edges (looping). Join: generate to heal two halves. Bidirectional: generate before AND after clip. Frame Interpolation: insert generated frames between each source pair. Replace/Inpaint: regenerate a range of frames in-place. Video Inpaint: regenerate masked spatial regions across all frames (requires inpaint_mask). Keyframe: place keyframe images at positions within target_frames, generate between them (optional keyframe_positions for manual placement).", }, ), "target_frames": ( @@ -92,7 +95,7 @@ Parameter usage by mode: "default": 81, "min": 1, "max": 10000, - "description": "Total output frame count for mask and control_frames. Unused by Frame Interpolation, Replace/Inpaint, and Video Inpaint.", + "description": "Total output frame count for mask and control_frames. Used by Keyframe to set output length. Unused by Frame Interpolation, Replace/Inpaint, and Video Inpaint.", }, ), "split_index": ( @@ -101,7 +104,7 @@ Parameter usage by mode: "default": 0, "min": -10000, "max": 10000, - "description": "Where to split the source. End: trim from end (e.g. -16). Pre: reference frames from start (e.g. 24). Middle: split frame index. Unused by Edge/Join. Bidirectional: frames before clip (0 = even split). Frame Interpolation: new frames per gap. Replace/Inpaint: start index of replace region. Unused by Video Inpaint.", + "description": "Where to split the source. End: trim from end (e.g. -16). Pre: reference frames from start (e.g. 24). Middle: split frame index. Unused by Edge/Join. Bidirectional: frames before clip (0 = even split). Frame Interpolation: new frames per gap. Replace/Inpaint: start index of replace region. Unused by Video Inpaint and Keyframe.", }, ), "edge_frames": ( @@ -110,7 +113,7 @@ Parameter usage by mode: "default": 8, "min": 1, "max": 10000, - "description": "Number of edge frames to use for Edge and Join modes. Unused by End/Pre/Middle/Bidirectional/Frame Interpolation/Video Inpaint. Replace/Inpaint: number of frames to replace.", + "description": "Number of edge frames to use for Edge and Join modes. Unused by End/Pre/Middle/Bidirectional/Frame Interpolation/Video Inpaint/Keyframe. Replace/Inpaint: number of frames to replace.", }, ), }, @@ -121,10 +124,19 @@ Parameter usage by mode: "description": "Spatial inpaint mask for Video Inpaint mode. White (1.0) = regenerate, Black (0.0) = keep. Single frame broadcasts to all source frames.", }, ), + "keyframe_positions": ( + "STRING", + { + "default": "", + "description": "Comma-separated frame indices for Keyframe mode (e.g. '0,20,50,80'). " + "One position per source_clip frame, sorted ascending, within [0, target_frames-1]. " + "Leave empty or disconnected for even auto-spread.", + }, + ), }, } - def generate(self, source_clip, mode, target_frames, split_index, edge_frames, inpaint_mask=None): + def generate(self, source_clip, mode, target_frames, split_index, edge_frames, inpaint_mask=None, keyframe_positions=None): B, H, W, C = source_clip.shape dev = source_clip.device BLACK = 0.0 @@ -253,6 +265,53 @@ Parameter usage by mode: frames_to_generate = B return (mask, control_frames, source_clip, ph(), ph(), ph(), frames_to_generate) + elif mode == "Keyframe": + if B > target_frames: + raise ValueError( + f"Keyframe: source_clip has {B} frames but target_frames is only {target_frames}. " + "Need at least as many target frames as keyframes." + ) + if keyframe_positions and keyframe_positions.strip(): + positions = [int(x.strip()) for x in keyframe_positions.split(",")] + if len(positions) != B: + raise ValueError( + f"Keyframe: expected {B} positions (one per source frame), got {len(positions)}." + ) + if positions != sorted(positions): + raise ValueError("Keyframe: positions must be sorted in ascending order.") + if len(set(positions)) != len(positions): + raise ValueError("Keyframe: positions must not contain duplicates.") + if positions[0] < 0 or positions[-1] >= target_frames: + raise ValueError( + f"Keyframe: all positions must be in [0, {target_frames - 1}]." + ) + else: + if B == 1: + positions = [0] + else: + positions = [round(i * (target_frames - 1) / (B - 1)) for i in range(B)] + + mask_parts, ctrl_parts = [], [] + prev_end = 0 + for i, pos in enumerate(positions): + gap = pos - prev_end + if gap > 0: + mask_parts.append(solid(gap, WHITE)) + ctrl_parts.append(solid(gap, GREY)) + mask_parts.append(solid(1, BLACK)) + ctrl_parts.append(source_clip[i:i+1]) + prev_end = pos + 1 + + trailing = target_frames - prev_end + if trailing > 0: + mask_parts.append(solid(trailing, WHITE)) + ctrl_parts.append(solid(trailing, GREY)) + + mask = torch.cat(mask_parts, dim=0) + control_frames = torch.cat(ctrl_parts, dim=0) + frames_to_generate = target_frames - B + return (mask, control_frames, source_clip, ph(), ph(), ph(), frames_to_generate) + raise ValueError(f"Unknown mode: {mode}")