Add optional audio input to FastAbsoluteSaver to mux audio into video

Adds an optional AUDIO input that is muxed into the encoded video via
ffmpeg. Writes the waveform to a temp WAV (stdlib wave, no new deps),
adds it as a third ffmpeg input, and maps streams explicitly with a
per-format audio codec (aac/libopus/flac/pcm). Uses -shortest to match
VideoHelperSuite behavior. GIF ignores audio.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-27 20:07:39 +02:00
parent 7f1fc92c54
commit aae0bdf746
+73 -14
View File
@@ -16,6 +16,7 @@ import tempfile
import urllib.request
import zipfile
import tarfile
import wave
_NODE_DIR = os.path.dirname(os.path.abspath(__file__))
_FFMPEG_DIR = os.path.join(_NODE_DIR, "ffmpeg_bin")
@@ -110,32 +111,33 @@ _COLOR_MGMT = [
VIDEO_FORMATS = {
"mp4": {"ext": ".mp4", "codec": ["-c:v", "libx264"],
"quality": "crf", "color_mgmt": True,
"quality": "crf", "color_mgmt": True, "acodec": "aac",
"extra": ["-movflags", "+faststart"]},
"h265-mp4": {"ext": ".mp4", "codec": ["-c:v", "libx265", "-vtag", "hvc1",
"-preset", "medium", "-x265-params", "log-level=quiet"],
"quality": "crf", "color_mgmt": True,
"quality": "crf", "color_mgmt": True, "acodec": "aac",
"extra": ["-movflags", "+faststart"]},
"av1-mp4": {"ext": ".mp4", "codec": ["-c:v", "libsvtav1"],
"quality": "crf", "color_mgmt": True,
"quality": "crf", "color_mgmt": True, "acodec": "aac",
"extra": ["-movflags", "+faststart"]},
"webm": {"ext": ".webm", "codec": ["-c:v", "libvpx-vp9"],
"quality": "crf", "zero_bitrate": True, "color_mgmt": True},
"quality": "crf", "zero_bitrate": True, "color_mgmt": True,
"acodec": "libopus"},
"gif": {"ext": ".gif", "special": "gif"},
"ffv1-mkv": {"ext": ".mkv", "codec": ["-c:v", "ffv1", "-level", "3",
"-coder", "1", "-context", "1", "-g", "1",
"-slices", "16", "-slicecrc", "1"],
"quality": "lossless"},
"quality": "lossless", "acodec": "flac"},
"prores-mov": {"ext": ".mov", "codec": ["-c:v", "prores_ks"],
"quality": "profile", "color_mgmt": True},
"quality": "profile", "color_mgmt": True, "acodec": "pcm_s16le"},
"nvenc_h264-mp4":{"ext": ".mp4", "codec": ["-c:v", "h264_nvenc"],
"quality": "bitrate", "color_mgmt": True,
"quality": "bitrate", "color_mgmt": True, "acodec": "aac",
"extra": ["-movflags", "+faststart"]},
"nvenc_hevc-mp4":{"ext": ".mp4", "codec": ["-c:v", "hevc_nvenc", "-vtag", "hvc1"],
"quality": "bitrate", "color_mgmt": True,
"quality": "bitrate", "color_mgmt": True, "acodec": "aac",
"extra": ["-movflags", "+faststart"]},
"nvenc_av1-mp4": {"ext": ".mp4", "codec": ["-c:v", "av1_nvenc"],
"quality": "bitrate", "color_mgmt": True,
"quality": "bitrate", "color_mgmt": True, "acodec": "aac",
"extra": ["-movflags", "+faststart"]},
}
@@ -182,6 +184,7 @@ class FastAbsoluteSaver:
},
"optional": {
"scores_info": ("STRING", {"forceInput": True}),
"audio": ("AUDIO", ),
},
# Hidden inputs used to capture the workflow graph
"hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},
@@ -277,10 +280,47 @@ class FastAbsoluteSaver:
print(f"xx- Error saving {full_path}: {e}")
return False
def _write_temp_wav(self, audio):
"""Write a ComfyUI AUDIO dict ({waveform, sample_rate}) to a temp WAV file.
waveform is a tensor of shape [batch, channels, samples] in float [-1, 1].
Returns the file path, or None if no usable audio is present."""
try:
waveform = audio.get("waveform")
sample_rate = int(audio.get("sample_rate", 0))
if waveform is None or sample_rate <= 0:
return None
# Take the first item in the batch -> [channels, samples]
wf = waveform[0]
if hasattr(wf, "cpu"):
wf = wf.cpu().numpy()
wf = np.asarray(wf, dtype=np.float32)
if wf.ndim == 1:
wf = wf[np.newaxis, :]
channels = wf.shape[0]
if channels == 0 or wf.shape[1] == 0:
return None
# Interleave channels: [channels, samples] -> [samples, channels]
interleaved = np.clip(wf.T, -1.0, 1.0)
pcm = (interleaved * 32767.0).astype(np.int16)
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
tmp.close()
with wave.open(tmp.name, "wb") as w:
w.setnchannels(channels)
w.setsampwidth(2)
w.setframerate(sample_rate)
w.writeframes(pcm.tobytes())
return tmp.name
except Exception as e:
print(f"xx- FastSaver: Could not prepare audio, skipping: {e}")
return None
def save_video(self, frames_np, output_path, filename_prefix, use_timestamp, fps, crf, pixel_format, video_format,
auto_increment=False, counter_digits=4,
scores_list=None, metadata_key="sharpness_score", save_workflow=False, prompt_data=None, extra_data=None,
bitrate=10, prores_profile="hq", gif_dither="sierra2_4a"):
bitrate=10, prores_profile="hq", gif_dither="sierra2_4a", audio=None):
"""Save image batch as a video file using ffmpeg. frames_np is a list/array of uint8 numpy arrays."""
ffmpeg_path = _get_ffmpeg()
fmt = VIDEO_FORMATS[video_format]
@@ -301,6 +341,8 @@ class FastAbsoluteSaver:
# --- GIF SPECIAL CASE ---
if fmt.get("special") == "gif":
if audio is not None:
print("xx- FastSaver: GIF format cannot carry audio, ignoring audio input.")
filter_str = (
"[0:v] split [a][b]; [a] palettegen=reserve_transparent=on"
":transparency_color=ffffff [p]; [b][p] paletteuse=dither=" + gif_dither
@@ -352,11 +394,18 @@ class FastAbsoluteSaver:
self._meta_tmpfile.close()
meta_file = self._meta_tmpfile.name
# --- AUDIO (optional) ---
audio_file = self._write_temp_wav(audio) if audio is not None else None
# --- BUILD FFMPEG COMMAND ---
# Input order: 0=rawvideo (stdin), 1=metadata, 2=audio (if present)
cmd = [ffmpeg_path, "-y",
"-f", "rawvideo", "-pix_fmt", "rgb24",
"-s", f"{w}x{h}", "-r", str(fps), "-i", "-",
"-i", meta_file, "-map_metadata", "1"]
"-i", meta_file]
if audio_file:
cmd.extend(["-i", audio_file])
cmd.extend(["-map_metadata", "1"])
# Codec args
cmd.extend(fmt["codec"])
@@ -398,6 +447,11 @@ class FastAbsoluteSaver:
if "extra" in fmt:
cmd.extend(fmt["extra"])
# Audio: map the video stream explicitly and mux in the audio track
if audio_file:
acodec = fmt.get("acodec", "aac")
cmd.extend(["-map", "0:v:0", "-map", "2:a:0", "-c:a", acodec, "-shortest"])
cmd.append(out_file)
codec_label = fmt["codec"][fmt["codec"].index("-c:v") + 1] if "-c:v" in fmt["codec"] else video_format
@@ -413,11 +467,16 @@ class FastAbsoluteSaver:
stderr = proc.stderr.read()
proc.wait()
# Clean up metadata temp file
# Clean up temp files
try:
os.remove(meta_file)
except OSError:
pass
if audio_file:
try:
os.remove(audio_file)
except OSError:
pass
if proc.returncode != 0:
raise RuntimeError(f"ffmpeg failed: {stderr.decode()}")
@@ -430,7 +489,7 @@ class FastAbsoluteSaver:
webp_lossless, webp_quality, webp_method,
video_fps, video_crf, video_pixel_format,
video_bitrate, prores_profile, gif_dither,
scores_info=None, prompt=None, extra_pnginfo=None):
scores_info=None, audio=None, prompt=None, extra_pnginfo=None):
output_path = output_path.strip('"')
if not os.path.isabs(output_path):
@@ -459,7 +518,7 @@ class FastAbsoluteSaver:
save_workflow=save_workflow_metadata, prompt_data=prompt,
extra_data=extra_pnginfo,
bitrate=video_bitrate, prores_profile=prores_profile,
gif_dither=gif_dither)
gif_dither=gif_dither, audio=audio)
# Save metadata sidecar PNG next to the video file
if save_metadata_png:
png_path = os.path.splitext(out_file)[0] + ".png"