fix: force torch.cuda.empty_cache() after pre-generation and CLIP encoding

PyTorch's caching allocator reserves GPU memory from pre-generation (~90 GiB for generator + tod) and doesn't return it to CUDA/OS. soft_empty_cache may not call torch.cuda.empty_cache(). Force a full cache release after CLIP encoding and after LoRA mel pre-generation. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-10 01:42:45 +02:00
parent 89d6fccd28
commit 9af4bbdd91
1 changed files with 6 additions and 0 deletions
@@ -836,6 +836,8 @@ class SelvaBigvganTrainer:
                    if clip_model is not None:
                        clip_model.to("cpu")
                    soft_empty_cache()
+                    if device.type == "cuda":
+                        torch.cuda.empty_cache()
                print(f"[BigVGAN] Pre-encoded {len(text_clip_cache)} text CLIP embeddings", flush=True)

        pbar = comfy.utils.ProgressBar(steps)
@@ -875,6 +877,10 @@ class SelvaBigvganTrainer:
                            "could be generated. Check that data_dir contains .npz "
                            "files with matching audio files."
                        )
+                    # Force-release the CUDA memory pool from pre-generation.
+                    # soft_empty_cache may not call torch.cuda.empty_cache().
+                    if device.type == "cuda":
+                        torch.cuda.empty_cache()

                _result[0] = _do_train(
                    vocoder, mel_converter, clips,