From 056a7b973dd90e8a54a7bede349c1043a97de7f6 Mon Sep 17 00:00:00 2001
From: Ethanfel <ethan.fel@ts-pc.fr>
Date: Thu, 9 Apr 2026 18:15:27 +0200
Subject: [PATCH] =?UTF-8?q?fix:=20enable=20VAE=20encoder=20in=20model=20lo?=
 =?UTF-8?q?ader=20=E2=80=94=20required=20for=20DITTO=20reference=20encodin?=
 =?UTF-8?q?g?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

need_vae_encoder=False was deleting the encoder to save a small amount of VRAM.
DITTO now needs it to encode reference clips to latent space for style loss.
The spectrogram VAE encoder is small enough that the overhead is negligible.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 nodes/selva_model_loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nodes/selva_model_loader.py b/nodes/selva_model_loader.py
index 3fc8497..b1c6af6 100644
--- a/nodes/selva_model_loader.py
+++ b/nodes/selva_model_loader.py
@@ -149,7 +149,7 @@ class SelvaModelLoader:
             enable_conditions=True,
             mode=mode,
             bigvgan_vocoder_ckpt=bigvgan_path,
-            need_vae_encoder=False,
+            need_vae_encoder=True,
         ).to(device, dtype).eval()
 
         if strategy == "offload_to_cpu":