fix: move output waveform to CPU and cast sample_rate to int

This commit is contained in:
2026-04-05 10:34:53 +02:00
parent 49b1ee5c16
commit a2c542a2bc
+2 -2
View File
@@ -44,7 +44,7 @@ class OmniVoiceGenerate:
tmp.close() tmp.close()
try: try:
waveform = ref_audio["waveform"].squeeze(0).cpu() # (channels, samples) waveform = ref_audio["waveform"].squeeze(0).cpu() # (channels, samples)
torchaudio.save(tmp_path, waveform, ref_audio["sample_rate"]) torchaudio.save(tmp_path, waveform, int(ref_audio["sample_rate"]))
kwargs["ref_audio"] = tmp_path kwargs["ref_audio"] = tmp_path
if ref_text: if ref_text:
kwargs["ref_text"] = ref_text kwargs["ref_text"] = ref_text
@@ -60,7 +60,7 @@ class OmniVoiceGenerate:
audio_tensors = model.generate(**kwargs) audio_tensors = model.generate(**kwargs)
# Concatenate chunks: each tensor is (1, T) → concat along T → (1, T_total) # Concatenate chunks: each tensor is (1, T) → concat along T → (1, T_total)
combined = torch.cat(audio_tensors, dim=1) # (1, T_total) combined = torch.cat(audio_tensors, dim=1).cpu() # (1, T_total) on CPU
# ComfyUI AUDIO format: (batch, channels, samples) # ComfyUI AUDIO format: (batch, channels, samples)
waveform = combined.unsqueeze(0) # (1, 1, T_total) waveform = combined.unsqueeze(0) # (1, 1, T_total)