diff --git a/nodes/generator.py b/nodes/generator.py index 508b4a7..82de292 100644 --- a/nodes/generator.py +++ b/nodes/generator.py @@ -60,7 +60,7 @@ class OmniVoiceGenerate: }), "ref_text": ("STRING", { "default": "", - "tooltip": "Transcription of ref_audio. Strongly recommended: type it manually. Auto-transcription requires FFmpeg shared libraries; if absent (e.g. some Docker images), generation will fail with a clear error message.", + "tooltip": "Transcription of ref_audio. Connect a Whisper (or other STT) node for best results.", }), "instruct": ("STRING", { "default": "", @@ -115,16 +115,7 @@ class OmniVoiceGenerate: kwargs["ref_audio"] = tmp_path if ref_text: kwargs["ref_text"] = ref_text - try: - audio_tensors = model.generate(**kwargs) - except RuntimeError as e: - if "torchcodec" in str(e).lower() or "libtorchcodec" in str(e).lower(): - raise RuntimeError( - "Auto-transcription of the reference audio failed because FFmpeg is not " - "available in this environment (required by transformers 5.x for Whisper ASR). " - "Fix: type the transcript of your reference audio into the ref_text field." - ) from None - raise + audio_tensors = model.generate(**kwargs) finally: try: os.unlink(tmp_path)