From 82e449681ca1cb2e89c644bd8ba838539cb5f113 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Thu, 9 Apr 2026 15:59:55 +0200 Subject: [PATCH] fix: cast mel_converter and wav to float32 before cuFFT in DITTO cuFFT does not support bfloat16. mel_converter was being moved to device without an explicit dtype, inheriting bfloat16 from the model context. Force float32 for both mel_converter.to() and wav.to() so the STFT inside the mel converter runs in a supported dtype. Co-Authored-By: Claude Sonnet 4.6 --- nodes/selva_ditto_optimizer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nodes/selva_ditto_optimizer.py b/nodes/selva_ditto_optimizer.py index 4036c27..bdba871 100644 --- a/nodes/selva_ditto_optimizer.py +++ b/nodes/selva_ditto_optimizer.py @@ -191,7 +191,7 @@ class SelvaDittoOptimizer: raise FileNotFoundError(f"[DITTO] No audio files in reference_dir: {ref_dir}") print(f"[DITTO] Loading {len(ref_files)} reference clips...", flush=True) - mel_converter.to(device) + mel_converter.to(device, torch.float32) # cuFFT requires float32 ref_mels = [] with torch.no_grad(): @@ -202,7 +202,7 @@ class SelvaDittoOptimizer: wav = wav.mean(0, keepdim=True) if sr != sample_rate: wav = torchaudio.functional.resample(wav, sr, sample_rate) - wav = wav.squeeze(0).to(device, dtype) + wav = wav.squeeze(0).to(device, torch.float32) # cuFFT requires float32 mel = mel_converter(wav.unsqueeze(0)) # [1, n_mels, T] ref_mels.append(mel) except Exception as e: