Add optional audio input to FastAbsoluteSaver to mux audio into video
Adds an optional AUDIO input that is muxed into the encoded video via ffmpeg. Writes the waveform to a temp WAV (stdlib wave, no new deps), adds it as a third ffmpeg input, and maps streams explicitly with a per-format audio codec (aac/libopus/flac/pcm). Uses -shortest to match VideoHelperSuite behavior. GIF ignores audio. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+73
-14
@@ -16,6 +16,7 @@ import tempfile
|
|||||||
import urllib.request
|
import urllib.request
|
||||||
import zipfile
|
import zipfile
|
||||||
import tarfile
|
import tarfile
|
||||||
|
import wave
|
||||||
|
|
||||||
_NODE_DIR = os.path.dirname(os.path.abspath(__file__))
|
_NODE_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
_FFMPEG_DIR = os.path.join(_NODE_DIR, "ffmpeg_bin")
|
_FFMPEG_DIR = os.path.join(_NODE_DIR, "ffmpeg_bin")
|
||||||
@@ -110,32 +111,33 @@ _COLOR_MGMT = [
|
|||||||
|
|
||||||
VIDEO_FORMATS = {
|
VIDEO_FORMATS = {
|
||||||
"mp4": {"ext": ".mp4", "codec": ["-c:v", "libx264"],
|
"mp4": {"ext": ".mp4", "codec": ["-c:v", "libx264"],
|
||||||
"quality": "crf", "color_mgmt": True,
|
"quality": "crf", "color_mgmt": True, "acodec": "aac",
|
||||||
"extra": ["-movflags", "+faststart"]},
|
"extra": ["-movflags", "+faststart"]},
|
||||||
"h265-mp4": {"ext": ".mp4", "codec": ["-c:v", "libx265", "-vtag", "hvc1",
|
"h265-mp4": {"ext": ".mp4", "codec": ["-c:v", "libx265", "-vtag", "hvc1",
|
||||||
"-preset", "medium", "-x265-params", "log-level=quiet"],
|
"-preset", "medium", "-x265-params", "log-level=quiet"],
|
||||||
"quality": "crf", "color_mgmt": True,
|
"quality": "crf", "color_mgmt": True, "acodec": "aac",
|
||||||
"extra": ["-movflags", "+faststart"]},
|
"extra": ["-movflags", "+faststart"]},
|
||||||
"av1-mp4": {"ext": ".mp4", "codec": ["-c:v", "libsvtav1"],
|
"av1-mp4": {"ext": ".mp4", "codec": ["-c:v", "libsvtav1"],
|
||||||
"quality": "crf", "color_mgmt": True,
|
"quality": "crf", "color_mgmt": True, "acodec": "aac",
|
||||||
"extra": ["-movflags", "+faststart"]},
|
"extra": ["-movflags", "+faststart"]},
|
||||||
"webm": {"ext": ".webm", "codec": ["-c:v", "libvpx-vp9"],
|
"webm": {"ext": ".webm", "codec": ["-c:v", "libvpx-vp9"],
|
||||||
"quality": "crf", "zero_bitrate": True, "color_mgmt": True},
|
"quality": "crf", "zero_bitrate": True, "color_mgmt": True,
|
||||||
|
"acodec": "libopus"},
|
||||||
"gif": {"ext": ".gif", "special": "gif"},
|
"gif": {"ext": ".gif", "special": "gif"},
|
||||||
"ffv1-mkv": {"ext": ".mkv", "codec": ["-c:v", "ffv1", "-level", "3",
|
"ffv1-mkv": {"ext": ".mkv", "codec": ["-c:v", "ffv1", "-level", "3",
|
||||||
"-coder", "1", "-context", "1", "-g", "1",
|
"-coder", "1", "-context", "1", "-g", "1",
|
||||||
"-slices", "16", "-slicecrc", "1"],
|
"-slices", "16", "-slicecrc", "1"],
|
||||||
"quality": "lossless"},
|
"quality": "lossless", "acodec": "flac"},
|
||||||
"prores-mov": {"ext": ".mov", "codec": ["-c:v", "prores_ks"],
|
"prores-mov": {"ext": ".mov", "codec": ["-c:v", "prores_ks"],
|
||||||
"quality": "profile", "color_mgmt": True},
|
"quality": "profile", "color_mgmt": True, "acodec": "pcm_s16le"},
|
||||||
"nvenc_h264-mp4":{"ext": ".mp4", "codec": ["-c:v", "h264_nvenc"],
|
"nvenc_h264-mp4":{"ext": ".mp4", "codec": ["-c:v", "h264_nvenc"],
|
||||||
"quality": "bitrate", "color_mgmt": True,
|
"quality": "bitrate", "color_mgmt": True, "acodec": "aac",
|
||||||
"extra": ["-movflags", "+faststart"]},
|
"extra": ["-movflags", "+faststart"]},
|
||||||
"nvenc_hevc-mp4":{"ext": ".mp4", "codec": ["-c:v", "hevc_nvenc", "-vtag", "hvc1"],
|
"nvenc_hevc-mp4":{"ext": ".mp4", "codec": ["-c:v", "hevc_nvenc", "-vtag", "hvc1"],
|
||||||
"quality": "bitrate", "color_mgmt": True,
|
"quality": "bitrate", "color_mgmt": True, "acodec": "aac",
|
||||||
"extra": ["-movflags", "+faststart"]},
|
"extra": ["-movflags", "+faststart"]},
|
||||||
"nvenc_av1-mp4": {"ext": ".mp4", "codec": ["-c:v", "av1_nvenc"],
|
"nvenc_av1-mp4": {"ext": ".mp4", "codec": ["-c:v", "av1_nvenc"],
|
||||||
"quality": "bitrate", "color_mgmt": True,
|
"quality": "bitrate", "color_mgmt": True, "acodec": "aac",
|
||||||
"extra": ["-movflags", "+faststart"]},
|
"extra": ["-movflags", "+faststart"]},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -182,6 +184,7 @@ class FastAbsoluteSaver:
|
|||||||
},
|
},
|
||||||
"optional": {
|
"optional": {
|
||||||
"scores_info": ("STRING", {"forceInput": True}),
|
"scores_info": ("STRING", {"forceInput": True}),
|
||||||
|
"audio": ("AUDIO", ),
|
||||||
},
|
},
|
||||||
# Hidden inputs used to capture the workflow graph
|
# Hidden inputs used to capture the workflow graph
|
||||||
"hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},
|
"hidden": {"prompt": "PROMPT", "extra_pnginfo": "EXTRA_PNGINFO"},
|
||||||
@@ -277,10 +280,47 @@ class FastAbsoluteSaver:
|
|||||||
print(f"xx- Error saving {full_path}: {e}")
|
print(f"xx- Error saving {full_path}: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _write_temp_wav(self, audio):
|
||||||
|
"""Write a ComfyUI AUDIO dict ({waveform, sample_rate}) to a temp WAV file.
|
||||||
|
waveform is a tensor of shape [batch, channels, samples] in float [-1, 1].
|
||||||
|
Returns the file path, or None if no usable audio is present."""
|
||||||
|
try:
|
||||||
|
waveform = audio.get("waveform")
|
||||||
|
sample_rate = int(audio.get("sample_rate", 0))
|
||||||
|
if waveform is None or sample_rate <= 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Take the first item in the batch -> [channels, samples]
|
||||||
|
wf = waveform[0]
|
||||||
|
if hasattr(wf, "cpu"):
|
||||||
|
wf = wf.cpu().numpy()
|
||||||
|
wf = np.asarray(wf, dtype=np.float32)
|
||||||
|
if wf.ndim == 1:
|
||||||
|
wf = wf[np.newaxis, :]
|
||||||
|
channels = wf.shape[0]
|
||||||
|
if channels == 0 or wf.shape[1] == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Interleave channels: [channels, samples] -> [samples, channels]
|
||||||
|
interleaved = np.clip(wf.T, -1.0, 1.0)
|
||||||
|
pcm = (interleaved * 32767.0).astype(np.int16)
|
||||||
|
|
||||||
|
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
||||||
|
tmp.close()
|
||||||
|
with wave.open(tmp.name, "wb") as w:
|
||||||
|
w.setnchannels(channels)
|
||||||
|
w.setsampwidth(2)
|
||||||
|
w.setframerate(sample_rate)
|
||||||
|
w.writeframes(pcm.tobytes())
|
||||||
|
return tmp.name
|
||||||
|
except Exception as e:
|
||||||
|
print(f"xx- FastSaver: Could not prepare audio, skipping: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
def save_video(self, frames_np, output_path, filename_prefix, use_timestamp, fps, crf, pixel_format, video_format,
|
def save_video(self, frames_np, output_path, filename_prefix, use_timestamp, fps, crf, pixel_format, video_format,
|
||||||
auto_increment=False, counter_digits=4,
|
auto_increment=False, counter_digits=4,
|
||||||
scores_list=None, metadata_key="sharpness_score", save_workflow=False, prompt_data=None, extra_data=None,
|
scores_list=None, metadata_key="sharpness_score", save_workflow=False, prompt_data=None, extra_data=None,
|
||||||
bitrate=10, prores_profile="hq", gif_dither="sierra2_4a"):
|
bitrate=10, prores_profile="hq", gif_dither="sierra2_4a", audio=None):
|
||||||
"""Save image batch as a video file using ffmpeg. frames_np is a list/array of uint8 numpy arrays."""
|
"""Save image batch as a video file using ffmpeg. frames_np is a list/array of uint8 numpy arrays."""
|
||||||
ffmpeg_path = _get_ffmpeg()
|
ffmpeg_path = _get_ffmpeg()
|
||||||
fmt = VIDEO_FORMATS[video_format]
|
fmt = VIDEO_FORMATS[video_format]
|
||||||
@@ -301,6 +341,8 @@ class FastAbsoluteSaver:
|
|||||||
|
|
||||||
# --- GIF SPECIAL CASE ---
|
# --- GIF SPECIAL CASE ---
|
||||||
if fmt.get("special") == "gif":
|
if fmt.get("special") == "gif":
|
||||||
|
if audio is not None:
|
||||||
|
print("xx- FastSaver: GIF format cannot carry audio, ignoring audio input.")
|
||||||
filter_str = (
|
filter_str = (
|
||||||
"[0:v] split [a][b]; [a] palettegen=reserve_transparent=on"
|
"[0:v] split [a][b]; [a] palettegen=reserve_transparent=on"
|
||||||
":transparency_color=ffffff [p]; [b][p] paletteuse=dither=" + gif_dither
|
":transparency_color=ffffff [p]; [b][p] paletteuse=dither=" + gif_dither
|
||||||
@@ -352,11 +394,18 @@ class FastAbsoluteSaver:
|
|||||||
self._meta_tmpfile.close()
|
self._meta_tmpfile.close()
|
||||||
meta_file = self._meta_tmpfile.name
|
meta_file = self._meta_tmpfile.name
|
||||||
|
|
||||||
|
# --- AUDIO (optional) ---
|
||||||
|
audio_file = self._write_temp_wav(audio) if audio is not None else None
|
||||||
|
|
||||||
# --- BUILD FFMPEG COMMAND ---
|
# --- BUILD FFMPEG COMMAND ---
|
||||||
|
# Input order: 0=rawvideo (stdin), 1=metadata, 2=audio (if present)
|
||||||
cmd = [ffmpeg_path, "-y",
|
cmd = [ffmpeg_path, "-y",
|
||||||
"-f", "rawvideo", "-pix_fmt", "rgb24",
|
"-f", "rawvideo", "-pix_fmt", "rgb24",
|
||||||
"-s", f"{w}x{h}", "-r", str(fps), "-i", "-",
|
"-s", f"{w}x{h}", "-r", str(fps), "-i", "-",
|
||||||
"-i", meta_file, "-map_metadata", "1"]
|
"-i", meta_file]
|
||||||
|
if audio_file:
|
||||||
|
cmd.extend(["-i", audio_file])
|
||||||
|
cmd.extend(["-map_metadata", "1"])
|
||||||
|
|
||||||
# Codec args
|
# Codec args
|
||||||
cmd.extend(fmt["codec"])
|
cmd.extend(fmt["codec"])
|
||||||
@@ -398,6 +447,11 @@ class FastAbsoluteSaver:
|
|||||||
if "extra" in fmt:
|
if "extra" in fmt:
|
||||||
cmd.extend(fmt["extra"])
|
cmd.extend(fmt["extra"])
|
||||||
|
|
||||||
|
# Audio: map the video stream explicitly and mux in the audio track
|
||||||
|
if audio_file:
|
||||||
|
acodec = fmt.get("acodec", "aac")
|
||||||
|
cmd.extend(["-map", "0:v:0", "-map", "2:a:0", "-c:a", acodec, "-shortest"])
|
||||||
|
|
||||||
cmd.append(out_file)
|
cmd.append(out_file)
|
||||||
|
|
||||||
codec_label = fmt["codec"][fmt["codec"].index("-c:v") + 1] if "-c:v" in fmt["codec"] else video_format
|
codec_label = fmt["codec"][fmt["codec"].index("-c:v") + 1] if "-c:v" in fmt["codec"] else video_format
|
||||||
@@ -413,11 +467,16 @@ class FastAbsoluteSaver:
|
|||||||
stderr = proc.stderr.read()
|
stderr = proc.stderr.read()
|
||||||
proc.wait()
|
proc.wait()
|
||||||
|
|
||||||
# Clean up metadata temp file
|
# Clean up temp files
|
||||||
try:
|
try:
|
||||||
os.remove(meta_file)
|
os.remove(meta_file)
|
||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
|
if audio_file:
|
||||||
|
try:
|
||||||
|
os.remove(audio_file)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
if proc.returncode != 0:
|
if proc.returncode != 0:
|
||||||
raise RuntimeError(f"ffmpeg failed: {stderr.decode()}")
|
raise RuntimeError(f"ffmpeg failed: {stderr.decode()}")
|
||||||
@@ -430,7 +489,7 @@ class FastAbsoluteSaver:
|
|||||||
webp_lossless, webp_quality, webp_method,
|
webp_lossless, webp_quality, webp_method,
|
||||||
video_fps, video_crf, video_pixel_format,
|
video_fps, video_crf, video_pixel_format,
|
||||||
video_bitrate, prores_profile, gif_dither,
|
video_bitrate, prores_profile, gif_dither,
|
||||||
scores_info=None, prompt=None, extra_pnginfo=None):
|
scores_info=None, audio=None, prompt=None, extra_pnginfo=None):
|
||||||
|
|
||||||
output_path = output_path.strip('"')
|
output_path = output_path.strip('"')
|
||||||
if not os.path.isabs(output_path):
|
if not os.path.isabs(output_path):
|
||||||
@@ -459,7 +518,7 @@ class FastAbsoluteSaver:
|
|||||||
save_workflow=save_workflow_metadata, prompt_data=prompt,
|
save_workflow=save_workflow_metadata, prompt_data=prompt,
|
||||||
extra_data=extra_pnginfo,
|
extra_data=extra_pnginfo,
|
||||||
bitrate=video_bitrate, prores_profile=prores_profile,
|
bitrate=video_bitrate, prores_profile=prores_profile,
|
||||||
gif_dither=gif_dither)
|
gif_dither=gif_dither, audio=audio)
|
||||||
# Save metadata sidecar PNG next to the video file
|
# Save metadata sidecar PNG next to the video file
|
||||||
if save_metadata_png:
|
if save_metadata_png:
|
||||||
png_path = os.path.splitext(out_file)[0] + ".png"
|
png_path = os.path.splitext(out_file)[0] + ".png"
|
||||||
|
|||||||
Reference in New Issue
Block a user