feat: add torch.compile option to Model Loader

Compiles the model graph on first generation (~30-60s warmup) then speeds up all subsequent generations in the session. Recommended for audiobook pipelines. Default off. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 19:16:39 +02:00
parent 2e1cba61fc
commit 46591553f9
2 changed files with 17 additions and 3 deletions
@@ -33,6 +33,18 @@ class OmniVoiceModelLoader:
                    ["float16", "bfloat16", "float32"],
                    {"default": "float16"},
                ),
+                "compile": (
+                    "BOOLEAN",
+                    {
+                        "default": False,
+                        "tooltip": (
+                            "Run torch.compile() on the model after loading. "
+                            "First generation will be slow (~30-60s warmup) while the graph is compiled, "
+                            "then every subsequent generation in the session will be faster. "
+                            "Recommended for audiobook pipelines. Requires PyTorch 2.0+."
+                        ),
+                    },
+                ),
            },
        }

@@ -41,7 +53,7 @@ class OmniVoiceModelLoader:
    FUNCTION = "load_model"
    CATEGORY = "OmniVoice"

-    def load_model(self, device, dtype):
+    def load_model(self, device, dtype, compile=False):
        if OmniVoice is None:
            raise ImportError(
                "omnivoice is not installed. Run: pip install omnivoice --no-deps"
@@ -53,4 +65,6 @@ class OmniVoiceModelLoader:
            dtype=DTYPE_MAP[dtype],
            cache_dir=CACHE_DIR,
        )
+        if compile:
+            model = torch.compile(model)
        return (model,)