From 934a40163315aae4f75294770790463723923157 Mon Sep 17 00:00:00 2001
From: Ethanfel <ethan.fel@ts-pc.fr>
Date: Fri, 27 Mar 2026 21:20:00 +0100
Subject: [PATCH] perf: replace PIL+PNG frame files with direct ffmpeg stdin
 pipe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stream raw RGB bytes from tensor directly to ffmpeg stdin.
Eliminates all intermediate PNG file I/O — much faster for large frame counts.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 nodes/feature_extractor.py | 45 +++++++++++++++-----------------------
 1 file changed, 18 insertions(+), 27 deletions(-)

diff --git a/nodes/feature_extractor.py b/nodes/feature_extractor.py
index 2c199fb..4b0117e 100644
--- a/nodes/feature_extractor.py
+++ b/nodes/feature_extractor.py
@@ -79,37 +79,28 @@ def _hash_inputs(video_tensor, cot_text):
 
 
 def _save_video_tensor_to_mp4(video_tensor, output_path, fps=30):
-    """Save ComfyUI IMAGE tensor [T,H,W,C] to MP4 via PIL + ffmpeg.
+    """Save ComfyUI IMAGE tensor [T,H,W,C] to MP4 by piping raw RGB to ffmpeg.
 
-    torchvision.io.write_video requires the optional 'av' (PyAV) package
-    which is not installed in most ComfyUI environments. ffmpeg is always
-    available in ComfyUI Docker images.
+    Avoids intermediate PNG files — frames are streamed directly to ffmpeg stdin.
     """
-    from PIL import Image
-    import shutil
-
     frames_np = (video_tensor.cpu().numpy() * 255).astype("uint8")
+    T, H, W, C = frames_np.shape
 
-    frame_dir = output_path + "_frames"
-    os.makedirs(frame_dir, exist_ok=True)
-    try:
-        for i, frame in enumerate(frames_np):
-            Image.fromarray(frame).save(os.path.join(frame_dir, f"{i:06d}.png"))
-
-        result = subprocess.run(
-            [
-                "ffmpeg", "-y",
-                "-framerate", str(fps),
-                "-i", os.path.join(frame_dir, "%06d.png"),
-                "-c:v", "libx264", "-pix_fmt", "yuv420p",
-                output_path,
-            ],
-            capture_output=True, text=True,
-        )
-        if result.returncode != 0:
-            raise RuntimeError(f"[PrismAudio] ffmpeg failed:\n{result.stderr}")
-    finally:
-        shutil.rmtree(frame_dir, ignore_errors=True)
+    result = subprocess.run(
+        [
+            "ffmpeg", "-y",
+            "-f", "rawvideo", "-vcodec", "rawvideo",
+            "-s", f"{W}x{H}", "-pix_fmt", "rgb24",
+            "-r", str(fps),
+            "-i", "pipe:0",
+            "-c:v", "libx264", "-pix_fmt", "yuv420p",
+            output_path,
+        ],
+        input=frames_np.tobytes(),
+        capture_output=True,
+    )
+    if result.returncode != 0:
+        raise RuntimeError(f"[PrismAudio] ffmpeg failed:\n{result.stderr.decode()}")
 
 
 class PrismAudioFeatureExtractor: