From 614a2e02aa676e81ea29d61ca5e43a07d6a7be93 Mon Sep 17 00:00:00 2001
From: Ethanfel <ethan.fel@ts-pc.fr>
Date: Sat, 4 Apr 2026 16:38:31 +0200
Subject: [PATCH] fix: weights_only=False for SelVA checkpoints (PyTorch 2.6
 compat)

PyTorch 2.6 changed the default to weights_only=True. SelVA checkpoints
contain non-tensor types (numpy scalars etc.) that fail strict unpickling.
All weights come from trusted sources (jnwnlee/selva HF repo).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 nodes/selva_model_loader.py               | 4 ++--
 selva_core/ext/autoencoder/autoencoder.py | 2 +-
 selva_core/ext/bigvgan/bigvgan.py         | 2 +-
 selva_core/model/utils/features_utils.py  | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/nodes/selva_model_loader.py b/nodes/selva_model_loader.py
index af40fd5..b0b6054 100644
--- a/nodes/selva_model_loader.py
+++ b/nodes/selva_model_loader.py
@@ -74,14 +74,14 @@ class SelvaModelLoader:
         print(f"[SelVA] Loading TextSynch from {video_enc_path}", flush=True)
         net_video_enc = get_my_textsynch("depth1").to(device, dtype).eval()
         net_video_enc.load_weights(
-            torch.load(video_enc_path, map_location="cpu", weights_only=True)
+            torch.load(video_enc_path, map_location="cpu", weights_only=False)
         )
 
         print(f"[SelVA] Loading MMAudio ({variant}) from {gen_path}", flush=True)
         seq_cfg = CONFIG_16K if mode == "16k" else CONFIG_44K
         net_generator = get_my_mmaudio(variant).to(device, dtype).eval()
         net_generator.load_weights(
-            torch.load(gen_path, map_location="cpu", weights_only=True)
+            torch.load(gen_path, map_location="cpu", weights_only=False)
         )
 
         print("[SelVA] Loading FeaturesUtils (CLIP + T5 + Synchformer + VAE)...", flush=True)
diff --git a/selva_core/ext/autoencoder/autoencoder.py b/selva_core/ext/autoencoder/autoencoder.py
index 5d6e718..bceb340 100644
--- a/selva_core/ext/autoencoder/autoencoder.py
+++ b/selva_core/ext/autoencoder/autoencoder.py
@@ -19,7 +19,7 @@ class AutoEncoderModule(nn.Module):
                  need_vae_encoder: bool = True):
         super().__init__()
         self.vae: VAE = get_my_vae(mode).eval()
-        vae_state_dict = torch.load(vae_ckpt_path, weights_only=True, map_location='cpu')
+        vae_state_dict = torch.load(vae_ckpt_path, weights_only=False, map_location='cpu')
         self.vae.load_state_dict(vae_state_dict)
         self.vae.remove_weight_norm()
 
diff --git a/selva_core/ext/bigvgan/bigvgan.py b/selva_core/ext/bigvgan/bigvgan.py
index 36954e0..39f5b04 100644
--- a/selva_core/ext/bigvgan/bigvgan.py
+++ b/selva_core/ext/bigvgan/bigvgan.py
@@ -15,7 +15,7 @@ class BigVGAN(nn.Module):
         super().__init__()
         vocoder_cfg = OmegaConf.load(config_path)
         self.vocoder = BigVGANVocoder(vocoder_cfg).eval()
-        vocoder_ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=True)['generator']
+        vocoder_ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)['generator']
         self.vocoder.load_state_dict(vocoder_ckpt)
 
         self.weight_norm_removed = False
diff --git a/selva_core/model/utils/features_utils.py b/selva_core/model/utils/features_utils.py
index 5bcd280..d654525 100644
--- a/selva_core/model/utils/features_utils.py
+++ b/selva_core/model/utils/features_utils.py
@@ -57,7 +57,7 @@ class FeaturesUtils(nn.Module):
 
             self.synchformer = Synchformer(video=True, audio=False)
             self.synchformer.load_state_dict(
-                torch.load(synchformer_ckpt, weights_only=True, map_location='cpu'))
+                torch.load(synchformer_ckpt, weights_only=False, map_location='cpu'))
             
             self.text_encoder_t5 = T5EncoderModel.from_pretrained('google/flan-t5-base')
             self.tokenizer_t5 = T5TokenizerFast.from_pretrained('google/flan-t5-base')