From 614a2e02aa676e81ea29d61ca5e43a07d6a7be93 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sat, 4 Apr 2026 16:38:31 +0200 Subject: [PATCH] fix: weights_only=False for SelVA checkpoints (PyTorch 2.6 compat) PyTorch 2.6 changed the default to weights_only=True. SelVA checkpoints contain non-tensor types (numpy scalars etc.) that fail strict unpickling. All weights come from trusted sources (jnwnlee/selva HF repo). Co-Authored-By: Claude Sonnet 4.6 --- nodes/selva_model_loader.py | 4 ++-- selva_core/ext/autoencoder/autoencoder.py | 2 +- selva_core/ext/bigvgan/bigvgan.py | 2 +- selva_core/model/utils/features_utils.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/nodes/selva_model_loader.py b/nodes/selva_model_loader.py index af40fd5..b0b6054 100644 --- a/nodes/selva_model_loader.py +++ b/nodes/selva_model_loader.py @@ -74,14 +74,14 @@ class SelvaModelLoader: print(f"[SelVA] Loading TextSynch from {video_enc_path}", flush=True) net_video_enc = get_my_textsynch("depth1").to(device, dtype).eval() net_video_enc.load_weights( - torch.load(video_enc_path, map_location="cpu", weights_only=True) + torch.load(video_enc_path, map_location="cpu", weights_only=False) ) print(f"[SelVA] Loading MMAudio ({variant}) from {gen_path}", flush=True) seq_cfg = CONFIG_16K if mode == "16k" else CONFIG_44K net_generator = get_my_mmaudio(variant).to(device, dtype).eval() net_generator.load_weights( - torch.load(gen_path, map_location="cpu", weights_only=True) + torch.load(gen_path, map_location="cpu", weights_only=False) ) print("[SelVA] Loading FeaturesUtils (CLIP + T5 + Synchformer + VAE)...", flush=True) diff --git a/selva_core/ext/autoencoder/autoencoder.py b/selva_core/ext/autoencoder/autoencoder.py index 5d6e718..bceb340 100644 --- a/selva_core/ext/autoencoder/autoencoder.py +++ b/selva_core/ext/autoencoder/autoencoder.py @@ -19,7 +19,7 @@ class AutoEncoderModule(nn.Module): need_vae_encoder: bool = True): super().__init__() self.vae: VAE = get_my_vae(mode).eval() - vae_state_dict = torch.load(vae_ckpt_path, weights_only=True, map_location='cpu') + vae_state_dict = torch.load(vae_ckpt_path, weights_only=False, map_location='cpu') self.vae.load_state_dict(vae_state_dict) self.vae.remove_weight_norm() diff --git a/selva_core/ext/bigvgan/bigvgan.py b/selva_core/ext/bigvgan/bigvgan.py index 36954e0..39f5b04 100644 --- a/selva_core/ext/bigvgan/bigvgan.py +++ b/selva_core/ext/bigvgan/bigvgan.py @@ -15,7 +15,7 @@ class BigVGAN(nn.Module): super().__init__() vocoder_cfg = OmegaConf.load(config_path) self.vocoder = BigVGANVocoder(vocoder_cfg).eval() - vocoder_ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=True)['generator'] + vocoder_ckpt = torch.load(ckpt_path, map_location='cpu', weights_only=False)['generator'] self.vocoder.load_state_dict(vocoder_ckpt) self.weight_norm_removed = False diff --git a/selva_core/model/utils/features_utils.py b/selva_core/model/utils/features_utils.py index 5bcd280..d654525 100644 --- a/selva_core/model/utils/features_utils.py +++ b/selva_core/model/utils/features_utils.py @@ -57,7 +57,7 @@ class FeaturesUtils(nn.Module): self.synchformer = Synchformer(video=True, audio=False) self.synchformer.load_state_dict( - torch.load(synchformer_ckpt, weights_only=True, map_location='cpu')) + torch.load(synchformer_ckpt, weights_only=False, map_location='cpu')) self.text_encoder_t5 = T5EncoderModel.from_pretrained('google/flan-t5-base') self.tokenizer_t5 = T5TokenizerFast.from_pretrained('google/flan-t5-base')