From 30f46fc3eff5a9abb3aa1389b2600edb26d91b04 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sun, 5 Apr 2026 17:35:54 +0200 Subject: [PATCH] Revert transformers cap; catch torchcodec ASR failure with clear message install.py: restore transformers>=5.0.0 (capping it would break other nodes). generator.py: catch the torchcodec RuntimeError that fires when ref_text is blank and transformers 5.x auto-transcription requires missing FFmpeg libs. Raises a human-readable error telling the user to fill in ref_text manually. Also updates the ref_text tooltip to recommend providing it explicitly. Co-Authored-By: Claude Sonnet 4.6 --- nodes/generator.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/nodes/generator.py b/nodes/generator.py index a93132a..508b4a7 100644 --- a/nodes/generator.py +++ b/nodes/generator.py @@ -60,7 +60,7 @@ class OmniVoiceGenerate: }), "ref_text": ("STRING", { "default": "", - "tooltip": "Transcription of ref_audio. Leave blank to auto-transcribe with Whisper.", + "tooltip": "Transcription of ref_audio. Strongly recommended: type it manually. Auto-transcription requires FFmpeg shared libraries; if absent (e.g. some Docker images), generation will fail with a clear error message.", }), "instruct": ("STRING", { "default": "", @@ -115,7 +115,16 @@ class OmniVoiceGenerate: kwargs["ref_audio"] = tmp_path if ref_text: kwargs["ref_text"] = ref_text - audio_tensors = model.generate(**kwargs) + try: + audio_tensors = model.generate(**kwargs) + except RuntimeError as e: + if "torchcodec" in str(e).lower() or "libtorchcodec" in str(e).lower(): + raise RuntimeError( + "Auto-transcription of the reference audio failed because FFmpeg is not " + "available in this environment (required by transformers 5.x for Whisper ASR). " + "Fix: type the transcript of your reference audio into the ref_text field." + ) from None + raise finally: try: os.unlink(tmp_path)