perf: replace PIL+PNG frame files with direct ffmpeg stdin pipe

Stream raw RGB bytes from tensor directly to ffmpeg stdin.
Eliminates all intermediate PNG file I/O — much faster for large frame counts.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-27 21:20:00 +01:00
parent b3ac9ab22f
commit 934a401633
+10 -19
View File
@@ -79,37 +79,28 @@ def _hash_inputs(video_tensor, cot_text):
def _save_video_tensor_to_mp4(video_tensor, output_path, fps=30): def _save_video_tensor_to_mp4(video_tensor, output_path, fps=30):
"""Save ComfyUI IMAGE tensor [T,H,W,C] to MP4 via PIL + ffmpeg. """Save ComfyUI IMAGE tensor [T,H,W,C] to MP4 by piping raw RGB to ffmpeg.
torchvision.io.write_video requires the optional 'av' (PyAV) package Avoids intermediate PNG files — frames are streamed directly to ffmpeg stdin.
which is not installed in most ComfyUI environments. ffmpeg is always
available in ComfyUI Docker images.
""" """
from PIL import Image
import shutil
frames_np = (video_tensor.cpu().numpy() * 255).astype("uint8") frames_np = (video_tensor.cpu().numpy() * 255).astype("uint8")
T, H, W, C = frames_np.shape
frame_dir = output_path + "_frames"
os.makedirs(frame_dir, exist_ok=True)
try:
for i, frame in enumerate(frames_np):
Image.fromarray(frame).save(os.path.join(frame_dir, f"{i:06d}.png"))
result = subprocess.run( result = subprocess.run(
[ [
"ffmpeg", "-y", "ffmpeg", "-y",
"-framerate", str(fps), "-f", "rawvideo", "-vcodec", "rawvideo",
"-i", os.path.join(frame_dir, "%06d.png"), "-s", f"{W}x{H}", "-pix_fmt", "rgb24",
"-r", str(fps),
"-i", "pipe:0",
"-c:v", "libx264", "-pix_fmt", "yuv420p", "-c:v", "libx264", "-pix_fmt", "yuv420p",
output_path, output_path,
], ],
capture_output=True, text=True, input=frames_np.tobytes(),
capture_output=True,
) )
if result.returncode != 0: if result.returncode != 0:
raise RuntimeError(f"[PrismAudio] ffmpeg failed:\n{result.stderr}") raise RuntimeError(f"[PrismAudio] ffmpeg failed:\n{result.stderr.decode()}")
finally:
shutil.rmtree(frame_dir, ignore_errors=True)
class PrismAudioFeatureExtractor: class PrismAudioFeatureExtractor: