From 37a27160aa01dcb78fdad716291621d0a04e187a Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Fri, 10 Apr 2026 00:45:31 +0200 Subject: [PATCH] fix: match mel dtype to vocoder in baseline sample generation ref_mel is float32 (from mel_converter) but vocoder weights are bfloat16 before inference flag stripping. Cast mel to vocoder's dtype to prevent input/bias type mismatch during baseline sample save. Co-Authored-By: Claude Opus 4.6 --- nodes/selva_bigvgan_trainer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nodes/selva_bigvgan_trainer.py b/nodes/selva_bigvgan_trainer.py index bc6074a..f66d2cf 100644 --- a/nodes/selva_bigvgan_trainer.py +++ b/nodes/selva_bigvgan_trainer.py @@ -915,8 +915,8 @@ def _do_train(vocoder, mel_converter, clips, def _save_sample(label): try: - voc_device = next(vocoder.parameters()).device - mel = ref_mel.to(voc_device) + voc_p = next(vocoder.parameters()) + mel = ref_mel.to(voc_p.device, voc_p.dtype) with torch.no_grad(): wav = vocoder(mel) if wav.dim() == 2: