diff --git a/README.md b/README.md index 3c3de82..25fde3c 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,8 @@ muffled or band‑limited audio gets believable "air" and detail back. - [Nodes](#nodes) - [UniverSR Model Loader](#universr-model-loader) - [UniverSR Super-Resolution](#universr-super-resolution) + - [UniverSR Load Video Audio](#universr-load-video-audio) + - [UniverSR Video Combiner](#universr-video-combiner) - [Choosing `input_sr`](#choosing-input_sr-the-one-setting-that-matters-most) - [Recommended settings](#recommended-settings) - [Long audio & chunking](#long-audio--chunking) @@ -47,6 +49,8 @@ muffled or band‑limited audio gets believable "air" and detail back. - πŸŽ›οΈ **Wet/dry blend** β€” full SR, or dial it back to gently brighten already-48 kHz audio (BWE). - 🎲 **Seed control** with **global-RNG isolation** (won't perturb other nodes' randomness). - πŸ“Š Optional **before/after spectrogram** image output. +- 🎬 **Video in / out** β€” extract a video's audio, super-resolve it, and remux it back onto the + original video (no video re-encode), all with `ffmpeg`. - πŸ“¦ **Self-contained** β€” the UniverSR inference code is vendored; the only extra dependency beyond ComfyUI's stack is `torchdiffeq`. @@ -74,6 +78,10 @@ that typically needs installing.) The `universr` package itself is **vendored** `pip`-installed copy is found it is preferred, otherwise the bundled one is used, so no `git+` install is required. +The **video** nodes additionally need **`ffmpeg`** on your `PATH` (`apt install ffmpeg` / +`brew install ffmpeg` / `conda install -c conda-forge ffmpeg`) and `soundfile` (in `requirements.txt`). +The audio SR nodes work without either. + > **GPU recommended.** Inference runs on CUDA if available and falls back to CPU (much slower). --- @@ -142,6 +150,46 @@ Runs the super-resolution. Outputs: **`AUDIO`** (48 kHz) and **`IMAGE`** (spectr | `unload_model` | bool | `false` | β€” | Free the model from VRAM after this run. | | `show_spectrogram` | bool | `true` | β€” | Also output a before/after spectrogram comparison image. | +### UniverSR Load Video Audio + +Extracts a video's audio track (native rate/channels, via `ffmpeg`) and keeps a reference to the +source video for remuxing. Outputs **`AUDIO`** and **`UNIVERSR_VIDEO`**, and previews the video inline. + +| Input | Type | Default | Description | +|---|---|---|---| +| `video_path` | string | `""` | Absolute path to a video. Takes priority over `video`. | +| `video` *(opt.)* | choice | β€” | Pick a file from ComfyUI's `input/` folder (used when `video_path` is empty). | +| `start_time` *(opt.)* | float | `0.0` | Trim start, seconds. | +| `duration` *(opt.)* | float | `0.0` | Trim length, seconds (`0` = to end). | + +### UniverSR Video Combiner + +Muxes an `AUDIO` track onto the source video **without re-encoding the video** (`-c:v copy`) and saves +the result. If the loader trimmed the clip, the same trim is applied to the video so A/V stay aligned. + +| Input | Type | Default | Description | +|---|---|---|---| +| `video` | UNIVERSR_VIDEO | β€” | From **UniverSR Load Video Audio**. | +| `audio` | AUDIO | β€” | The enhanced 48 kHz audio. | +| `filename_prefix` | string | `UniverSR` | Output name prefix (auto-incremented). | +| `audio_codec` *(opt.)* | choice | `aac` | `aac` / `flac` / `pcm_s16le` / `libopus` / `libmp3lame`. | +| `save_output` *(opt.)* | bool | `true` | Save to `output/` (else `temp/`). | + +Output: `output_path` (string) and an inline video preview. + +#### Video workflow + +``` +UniverSR Load Video Audio ──┬─ audio ─► UniverSR Super-Resolution ─ audio ─┐ + β”‚ β–Ό + └────────────── video ──────────────► UniverSR Video Combiner ─► .mp4 + UniverSR Model Loader ─► (Super-Resolution) +``` + +Load the video β†’ super-resolve its audio (set `input_sr` to the content bandwidth) β†’ feed the enhanced +audio **and** the `video` reference into the combiner. Ready-made graph: +[`example_workflows/universr_video.json`](example_workflows/universr_video.json). + --- ## Choosing `input_sr` (the one setting that matters most) diff --git a/__init__.py b/__init__.py index 067ddb3..3ef44aa 100644 --- a/__init__.py +++ b/__init__.py @@ -1,10 +1,20 @@ """ComfyUI-UniverSR β€” vocoder-free audio super-resolution (8/12/16/24 kHz -> 48 kHz).""" +NODE_CLASS_MAPPINGS = {} +NODE_DISPLAY_NAME_MAPPINGS = {} + try: - from .nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS -except Exception as e: # surface import errors in the ComfyUI log without crashing startup - print(f"[ComfyUI-UniverSR] Failed to load nodes: {e}") - NODE_CLASS_MAPPINGS = {} - NODE_DISPLAY_NAME_MAPPINGS = {} + from .nodes import NODE_CLASS_MAPPINGS as _sr_nodes, NODE_DISPLAY_NAME_MAPPINGS as _sr_display + NODE_CLASS_MAPPINGS.update(_sr_nodes) + NODE_DISPLAY_NAME_MAPPINGS.update(_sr_display) +except Exception as e: # surface errors in the ComfyUI log without crashing startup + print(f"[ComfyUI-UniverSR] Failed to load SR nodes: {e}") + +try: + from .nodes_video import NODE_CLASS_MAPPINGS as _vid_nodes, NODE_DISPLAY_NAME_MAPPINGS as _vid_display + NODE_CLASS_MAPPINGS.update(_vid_nodes) + NODE_DISPLAY_NAME_MAPPINGS.update(_vid_display) +except Exception as e: # video nodes are optional (need ffmpeg/soundfile) + print(f"[ComfyUI-UniverSR] Failed to load video nodes: {e}") __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"] diff --git a/example_workflows/universr_video.json b/example_workflows/universr_video.json new file mode 100644 index 0000000..e111db1 --- /dev/null +++ b/example_workflows/universr_video.json @@ -0,0 +1,84 @@ +{ + "last_node_id": 4, + "last_link_id": 5, + "nodes": [ + { + "id": 1, + "type": "UniverSRLoadVideoAudio", + "pos": [100, 200], + "size": [360, 150], + "flags": {}, + "order": 0, + "mode": 0, + "inputs": [], + "outputs": [ + {"name": "audio", "type": "AUDIO", "links": [1], "slot_index": 0}, + {"name": "video", "type": "UNIVERSR_VIDEO", "links": [2], "slot_index": 1} + ], + "properties": {"Node name for S&R": "UniverSRLoadVideoAudio"}, + "widgets_values": ["/path/to/video.mp4", "(none)", 0.0, 0.0] + }, + { + "id": 2, + "type": "UniverSRModelLoader", + "pos": [100, 400], + "size": [360, 130], + "flags": {}, + "order": 1, + "mode": 0, + "inputs": [], + "outputs": [ + {"name": "model", "type": "UNIVERSR_MODEL", "links": [3], "slot_index": 0} + ], + "properties": {"Node name for S&R": "UniverSRModelLoader"}, + "widgets_values": ["universr-audio", "auto", "", ""] + }, + { + "id": 3, + "type": "UniverSRSampler", + "pos": [520, 200], + "size": [340, 320], + "flags": {}, + "order": 2, + "mode": 0, + "inputs": [ + {"name": "audio", "type": "AUDIO", "link": 1}, + {"name": "model", "type": "UNIVERSR_MODEL", "link": 3} + ], + "outputs": [ + {"name": "audio", "type": "AUDIO", "links": [4], "slot_index": 0}, + {"name": "spectrogram", "type": "IMAGE", "links": [], "slot_index": 1} + ], + "properties": {"Node name for S&R": "UniverSRSampler"}, + "widgets_values": [16000, "midpoint", 4, 2.0, 0, "randomize", 10.0, 0.5, 1.0, false, true] + }, + { + "id": 4, + "type": "UniverSRVideoCombiner", + "pos": [920, 200], + "size": [340, 170], + "flags": {}, + "order": 3, + "mode": 0, + "inputs": [ + {"name": "video", "type": "UNIVERSR_VIDEO", "link": 2}, + {"name": "audio", "type": "AUDIO", "link": 4} + ], + "outputs": [ + {"name": "output_path", "type": "STRING", "links": [], "slot_index": 0} + ], + "properties": {"Node name for S&R": "UniverSRVideoCombiner"}, + "widgets_values": ["UniverSR", "aac", true] + } + ], + "links": [ + [1, 1, 0, 3, 0, "AUDIO"], + [2, 1, 1, 4, 0, "UNIVERSR_VIDEO"], + [3, 2, 0, 3, 1, "UNIVERSR_MODEL"], + [4, 3, 0, 4, 1, "AUDIO"] + ], + "groups": [], + "config": {}, + "extra": {}, + "version": 0.4 +} diff --git a/nodes_video.py b/nodes_video.py new file mode 100644 index 0000000..902235b --- /dev/null +++ b/nodes_video.py @@ -0,0 +1,296 @@ +"""Video helper nodes for ComfyUI-UniverSR. + +Adapted from the HunyuanVideo-FoleyTune video loader/combiner, but trimmed to +what audio super-resolution needs: pull the audio track out of a video, run it +through the UniverSR sampler, then mux the enhanced track back onto the video. + + UniverSR Load Video Audio -> AUDIO + UNIVERSR_VIDEO (ffmpeg audio extract + preview) + UniverSR Video Combiner -> STRING (output path) (ffmpeg mux, no video re-encode) + +ffmpeg must be on PATH. Audio is read through a WAV pipe with soundfile, avoiding +torchaudio's fragile torchcodec backend (same reasoning as the SR node). +""" + +import io +import os +import re +import shutil +import subprocess +import tempfile + +import torch + +try: + import folder_paths + HAS_FOLDER_PATHS = True +except Exception: # pragma: no cover + HAS_FOLDER_PATHS = False + +VIDEO_EXTENSIONS = {"webm", "mp4", "mkv", "gif", "mov", "avi", "flv", "wmv", "m4v", "mpg", "mpeg", "ts"} + + +# --------------------------------------------------------------------------- # +# ffmpeg helpers +# --------------------------------------------------------------------------- # +def _ffmpeg() -> str: + exe = shutil.which("ffmpeg") + if not exe: + raise RuntimeError( + "ffmpeg was not found on PATH. Install it (e.g. `apt install ffmpeg`, " + "`brew install ffmpeg`, or a conda/static build) to use the UniverSR video nodes." + ) + return exe + + +def _trim_args(start_time: float, duration: float) -> list: + args = [] + if start_time and start_time > 0: + args += ["-ss", f"{float(start_time):.6f}"] + if duration and duration > 0: + args += ["-t", f"{float(duration):.6f}"] + return args + + +def _extract_audio(path: str, start_time: float = 0.0, duration: float = 0.0): + """Extract a video's audio track -> (waveform [1, C, L] float32, sample_rate). + + Native sample rate / channel count, no resampling. Accurate (post-input) seek. + """ + import soundfile as sf + cmd = [_ffmpeg(), "-hide_banner", "-loglevel", "error", + "-i", str(path), *_trim_args(start_time, duration), + "-vn", "-f", "wav", "pipe:1"] + result = subprocess.run(cmd, capture_output=True, timeout=600) + if result.returncode != 0: + raise RuntimeError(f"ffmpeg audio extraction failed:\n{result.stderr.decode('utf-8', 'replace').strip()}") + if not result.stdout: + raise RuntimeError(f"No audio stream found in: {path}") + wav_np, sr = sf.read(io.BytesIO(result.stdout), dtype="float32", always_2d=True) # [L, C] + wav = torch.from_numpy(wav_np).T.unsqueeze(0).contiguous() # [1, C, L] + return wav, int(sr) + + +def _write_temp_wav(audio: dict) -> str: + """Write a ComfyUI AUDIO dict to a temp WAV, return the path.""" + import soundfile as sf + wav = audio["waveform"] + if wav.dim() == 3: + wav = wav[0] # [C, L] + elif wav.dim() == 1: + wav = wav.unsqueeze(0) # [1, L] + wav_np = wav.detach().cpu().float().numpy().T # [L, C] + fd, tmp = tempfile.mkstemp(suffix=".wav") + os.close(fd) + sf.write(tmp, wav_np, int(audio["sample_rate"])) + return tmp + + +# --------------------------------------------------------------------------- # +# Load Video Audio +# --------------------------------------------------------------------------- # +class UniverSRLoadVideoAudio: + """Extract a video's audio track and keep a reference to the source video. + + Outputs AUDIO (feed it to UniverSR Super-Resolution) and UNIVERSR_VIDEO + (feed it, with the enhanced audio, to UniverSR Video Combiner). + """ + + DESCRIPTION = "Extract audio from a video for super-resolution, keeping a handle to remux later." + CATEGORY = "audio/UniverSR" + + @classmethod + def INPUT_TYPES(cls): + files = [] + if HAS_FOLDER_PATHS: + try: + in_dir = folder_paths.get_input_directory() + files = sorted( + f for f in os.listdir(in_dir) + if os.path.isfile(os.path.join(in_dir, f)) + and f.rsplit(".", 1)[-1].lower() in VIDEO_EXTENSIONS + ) + except Exception: + files = [] + return { + "required": { + "video_path": ("STRING", { + "default": "", + "placeholder": "/path/to/video.mp4 (or pick from 'video' below)", + "tooltip": "Absolute path to a video file. Takes priority over the 'video' dropdown.", + }), + }, + "optional": { + "video": (files or ["(none)"], { + "video_upload": True, + "tooltip": "Pick a video from the ComfyUI input/ folder (used when video_path is empty).", + }), + "start_time": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 360000.0, "step": 0.1, + "tooltip": "Trim start in seconds."}), + "duration": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 360000.0, "step": 0.1, + "tooltip": "Trim length in seconds (0 = to end)."}), + }, + } + + RETURN_TYPES = ("AUDIO", "UNIVERSR_VIDEO") + RETURN_NAMES = ("audio", "video") + FUNCTION = "load" + OUTPUT_NODE = True + + def _resolve_path(self, video_path, video): + video_path = (video_path or "").strip() + if video_path: + if not os.path.isfile(video_path): + raise FileNotFoundError(f"Video not found: {video_path}") + return os.path.abspath(video_path) + if video and video != "(none)" and HAS_FOLDER_PATHS: + return os.path.abspath(folder_paths.get_annotated_filepath(video)) + raise ValueError("No video given β€” set video_path or pick a file in 'video'.") + + def load(self, video_path, video="(none)", start_time=0.0, duration=0.0): + path = self._resolve_path(video_path, video) + waveform, sr = _extract_audio(path, start_time, duration) + dur = waveform.shape[-1] / max(sr, 1) + print(f"[UniverSR] Loaded audio from {os.path.basename(path)}: " + f"{waveform.shape[1]}ch @ {sr} Hz ({dur:.2f}s)") + + audio = {"waveform": waveform, "sample_rate": sr} + info = {"video_path": path, "start_time": float(start_time), "duration": float(duration), + "source_sr": sr, "source_channels": int(waveform.shape[1])} + + ui = self._preview(path) + return {"ui": ui, "result": (audio, info)} + + def _preview(self, path): + """Symlink (or copy) the source video into temp/ for an inline preview.""" + if not HAS_FOLDER_PATHS: + return {} + try: + import hashlib + temp_dir = folder_paths.get_temp_directory() + os.makedirs(temp_dir, exist_ok=True) + ext = os.path.splitext(path)[1] or ".mp4" + name = f"universr_preview_{hashlib.md5(path.encode()).hexdigest()[:8]}{ext}" + dst = os.path.join(temp_dir, name) + if os.path.islink(dst) or os.path.exists(dst): + os.unlink(dst) + try: + os.symlink(os.path.abspath(path), dst) + except OSError: + shutil.copy(path, dst) # filesystems without symlink support + return {"gifs": [{"filename": name, "subfolder": "", "type": "temp", + "format": f"video/{ext.lstrip('.')}"}]} + except Exception as e: + print(f"[UniverSR] Video preview skipped: {e}") + return {} + + @classmethod + def IS_CHANGED(cls, video_path, video="(none)", start_time=0.0, duration=0.0): + try: + p = (video_path or "").strip() + if not p and video and video != "(none)" and HAS_FOLDER_PATHS: + p = folder_paths.get_annotated_filepath(video) + mtime = os.path.getmtime(p) if p and os.path.isfile(p) else 0 + except Exception: + mtime = 0 + return f"{video_path}:{video}:{start_time}:{duration}:{mtime}" + + +# --------------------------------------------------------------------------- # +# Video Combiner +# --------------------------------------------------------------------------- # +class UniverSRVideoCombiner: + """Mux audio onto the source video (no video re-encode) and save the result.""" + + DESCRIPTION = "Remux the enhanced audio onto the original video with ffmpeg (video stream copied)." + CATEGORY = "audio/UniverSR" + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "video": ("UNIVERSR_VIDEO",), + "audio": ("AUDIO",), + "filename_prefix": ("STRING", {"default": "UniverSR"}), + }, + "optional": { + "audio_codec": (["aac", "flac", "pcm_s16le", "libopus", "libmp3lame"], { + "default": "aac", + "tooltip": "Codec for the muxed audio track. aac is the safe default for MP4.", + }), + "save_output": ("BOOLEAN", { + "default": True, + "tooltip": "Save to the ComfyUI output/ folder (else temp/).", + }), + }, + } + + RETURN_TYPES = ("STRING",) + RETURN_NAMES = ("output_path",) + FUNCTION = "combine" + OUTPUT_NODE = True + + def combine(self, video, audio, filename_prefix="UniverSR", audio_codec="aac", save_output=True): + source_video = video["video_path"] + if not os.path.isfile(source_video): + raise FileNotFoundError(f"Source video not found: {source_video}") + src_ext = os.path.splitext(source_video)[1] or ".mp4" + + if HAS_FOLDER_PATHS: + out_dir = folder_paths.get_output_directory() if save_output else folder_paths.get_temp_directory() + out_type = "output" if save_output else "temp" + full_folder, filename, _, _, _ = folder_paths.get_save_image_path(filename_prefix, out_dir) + else: # standalone fallback + out_dir = os.path.abspath("universr_output") + out_type = "output" + full_folder, filename = out_dir, filename_prefix + os.makedirs(full_folder, exist_ok=True) + + # Auto-increment counter (VHS-style), scoped to this prefix. + max_counter = 0 + matcher = re.compile(rf"{re.escape(filename)}_(\d+)\..+", re.IGNORECASE) + for f in os.listdir(full_folder): + m = matcher.fullmatch(f) + if m: + max_counter = max(max_counter, int(m.group(1))) + out_name = f"{filename}_{max_counter + 1:05}{src_ext}" + out_path = os.path.join(full_folder, out_name) + + # Align the video to the same trim window the audio was extracted with. + start_time = float(video.get("start_time", 0.0) or 0.0) + duration = float(video.get("duration", 0.0) or 0.0) + + tmp_wav = _write_temp_wav(audio) + try: + cmd = [_ffmpeg(), "-hide_banner", "-loglevel", "error", "-y", + *_trim_args(start_time, duration), "-i", str(source_video), + "-i", tmp_wav, + "-c:v", "copy", "-c:a", audio_codec, + "-map", "0:v:0", "-map", "1:a:0", + "-shortest", str(out_path)] + result = subprocess.run(cmd, capture_output=True, timeout=600) + if result.returncode != 0: + raise RuntimeError(f"ffmpeg mux failed:\n{result.stderr.decode('utf-8', 'replace').strip()}") + finally: + if os.path.exists(tmp_wav): + os.unlink(tmp_wav) + + print(f"[UniverSR] Muxed enhanced audio -> {out_path}") + + ui = {} + if HAS_FOLDER_PATHS: + subfolder = os.path.relpath(full_folder, out_dir) + if subfolder == ".": + subfolder = "" + ui = {"gifs": [{"filename": out_name, "subfolder": subfolder, "type": out_type, + "format": f"video/{src_ext.lstrip('.')}"}]} + return {"ui": ui, "result": (str(out_path),)} + + +NODE_CLASS_MAPPINGS = { + "UniverSRLoadVideoAudio": UniverSRLoadVideoAudio, + "UniverSRVideoCombiner": UniverSRVideoCombiner, +} +NODE_DISPLAY_NAME_MAPPINGS = { + "UniverSRLoadVideoAudio": "UniverSR Load Video Audio", + "UniverSRVideoCombiner": "UniverSR Video Combiner", +} diff --git a/requirements.txt b/requirements.txt index 2cf1644..cae437f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,6 @@ einops>=0.7 timm>=0.9 huggingface_hub>=0.20 pyyaml>=6.0 +# Video loader/combiner nodes only β€” read/write WAV for ffmpeg muxing. +# (ffmpeg itself must be installed separately and on PATH.) +soundfile>=0.12