From 934a40163315aae4f75294770790463723923157 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Fri, 27 Mar 2026 21:20:00 +0100 Subject: [PATCH] perf: replace PIL+PNG frame files with direct ffmpeg stdin pipe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stream raw RGB bytes from tensor directly to ffmpeg stdin. Eliminates all intermediate PNG file I/O — much faster for large frame counts. Co-Authored-By: Claude Sonnet 4.6 --- nodes/feature_extractor.py | 45 +++++++++++++++----------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/nodes/feature_extractor.py b/nodes/feature_extractor.py index 2c199fb..4b0117e 100644 --- a/nodes/feature_extractor.py +++ b/nodes/feature_extractor.py @@ -79,37 +79,28 @@ def _hash_inputs(video_tensor, cot_text): def _save_video_tensor_to_mp4(video_tensor, output_path, fps=30): - """Save ComfyUI IMAGE tensor [T,H,W,C] to MP4 via PIL + ffmpeg. + """Save ComfyUI IMAGE tensor [T,H,W,C] to MP4 by piping raw RGB to ffmpeg. - torchvision.io.write_video requires the optional 'av' (PyAV) package - which is not installed in most ComfyUI environments. ffmpeg is always - available in ComfyUI Docker images. + Avoids intermediate PNG files — frames are streamed directly to ffmpeg stdin. """ - from PIL import Image - import shutil - frames_np = (video_tensor.cpu().numpy() * 255).astype("uint8") + T, H, W, C = frames_np.shape - frame_dir = output_path + "_frames" - os.makedirs(frame_dir, exist_ok=True) - try: - for i, frame in enumerate(frames_np): - Image.fromarray(frame).save(os.path.join(frame_dir, f"{i:06d}.png")) - - result = subprocess.run( - [ - "ffmpeg", "-y", - "-framerate", str(fps), - "-i", os.path.join(frame_dir, "%06d.png"), - "-c:v", "libx264", "-pix_fmt", "yuv420p", - output_path, - ], - capture_output=True, text=True, - ) - if result.returncode != 0: - raise RuntimeError(f"[PrismAudio] ffmpeg failed:\n{result.stderr}") - finally: - shutil.rmtree(frame_dir, ignore_errors=True) + result = subprocess.run( + [ + "ffmpeg", "-y", + "-f", "rawvideo", "-vcodec", "rawvideo", + "-s", f"{W}x{H}", "-pix_fmt", "rgb24", + "-r", str(fps), + "-i", "pipe:0", + "-c:v", "libx264", "-pix_fmt", "yuv420p", + output_path, + ], + input=frames_np.tobytes(), + capture_output=True, + ) + if result.returncode != 0: + raise RuntimeError(f"[PrismAudio] ffmpeg failed:\n{result.stderr.decode()}") class PrismAudioFeatureExtractor: