perf: replace PIL+PNG frame files with direct ffmpeg stdin pipe

Stream raw RGB bytes from tensor directly to ffmpeg stdin. Eliminates all intermediate PNG file I/O — much faster for large frame counts. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-27 21:20:00 +01:00
parent b3ac9ab22f
commit 934a401633
1 changed files with 18 additions and 27 deletions
@@ -79,37 +79,28 @@ def _hash_inputs(video_tensor, cot_text):
 def _save_video_tensor_to_mp4(video_tensor, output_path, fps=30):
-    """Save ComfyUI IMAGE tensor [T,H,W,C] to MP4 via PIL + ffmpeg.
+    """Save ComfyUI IMAGE tensor [T,H,W,C] to MP4 by piping raw RGB to ffmpeg.
-    torchvision.io.write_video requires the optional 'av' (PyAV) package
+    Avoids intermediate PNG files — frames are streamed directly to ffmpeg stdin.
    which is not installed in most ComfyUI environments. ffmpeg is always
    available in ComfyUI Docker images.
    """
    from PIL import Image
    import shutil
    frames_np = (video_tensor.cpu().numpy() * 255).astype("uint8")
-
+    T, H, W, C = frames_np.shape
    frame_dir = output_path + "_frames"
    os.makedirs(frame_dir, exist_ok=True)
    try:
        for i, frame in enumerate(frames_np):
            Image.fromarray(frame).save(os.path.join(frame_dir, f"{i:06d}.png"))
    result = subprocess.run(
        [
            "ffmpeg", "-y",
-                "-framerate", str(fps),
+            "-f", "rawvideo", "-vcodec", "rawvideo",
-                "-i", os.path.join(frame_dir, "%06d.png"),
+            "-s", f"{W}x{H}", "-pix_fmt", "rgb24",
            "-r", str(fps),
            "-i", "pipe:0",
            "-c:v", "libx264", "-pix_fmt", "yuv420p",
            output_path,
        ],
-            capture_output=True, text=True,
+        input=frames_np.tobytes(),
        capture_output=True,
    )
    if result.returncode != 0:
-            raise RuntimeError(f"[PrismAudio] ffmpeg failed:\n{result.stderr}")
+        raise RuntimeError(f"[PrismAudio] ffmpeg failed:\n{result.stderr.decode()}")
    finally:
        shutil.rmtree(frame_dir, ignore_errors=True)
 class PrismAudioFeatureExtractor: