Remove torchcodec workaround; recommend Whisper node for ref_text

Users should connect a ComfyUI Whisper node to ref_text instead of relying on omnivoice's internal ASR. Removes the error-catch workaround and updates the tooltip accordingly. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 17:49:25 +02:00
parent a3fb88e559
commit 8d77dd6cd5
1 changed files with 2 additions and 11 deletions
@@ -60,7 +60,7 @@ class OmniVoiceGenerate:
                }),
                "ref_text": ("STRING", {
                    "default": "",
-                    "tooltip": "Transcription of ref_audio. Strongly recommended: type it manually. Auto-transcription requires FFmpeg shared libraries; if absent (e.g. some Docker images), generation will fail with a clear error message.",
+                    "tooltip": "Transcription of ref_audio. Connect a Whisper (or other STT) node for best results.",
                }),
                "instruct": ("STRING", {
                    "default": "",
@@ -115,16 +115,7 @@ class OmniVoiceGenerate:
                kwargs["ref_audio"] = tmp_path
                if ref_text:
                    kwargs["ref_text"] = ref_text
-                try:
+                audio_tensors = model.generate(**kwargs)
                    audio_tensors = model.generate(**kwargs)
                except RuntimeError as e:
                    if "torchcodec" in str(e).lower() or "libtorchcodec" in str(e).lower():
                        raise RuntimeError(
                            "Auto-transcription of the reference audio failed because FFmpeg is not "
                            "available in this environment (required by transformers 5.x for Whisper ASR). "
                            "Fix: type the transcript of your reference audio into the ref_text field."
                        ) from None
                    raise
            finally:
                try:
                    os.unlink(tmp_path)