From 4297715a08391fc4fe0fd130a09e48c8d180b437 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Fri, 10 Apr 2026 01:48:04 +0200 Subject: [PATCH] debug: add driver-level VRAM reporting + offload video_enc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit torch.cuda.memory_allocated only tracks PyTorch allocator. Added torch.cuda.mem_get_info to see actual CUDA driver memory usage. Also offload video_enc (TextSynch) which was missed in the original offload — stays on GPU when strategy != offload_to_cpu. Co-Authored-By: Claude Opus 4.6 --- nodes/selva_bigvgan_trainer.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/nodes/selva_bigvgan_trainer.py b/nodes/selva_bigvgan_trainer.py index b54923b..39954c6 100644 --- a/nodes/selva_bigvgan_trainer.py +++ b/nodes/selva_bigvgan_trainer.py @@ -776,8 +776,10 @@ class SelvaBigvganTrainer: if device.type == "cuda": alloc = torch.cuda.memory_allocated(device) / (1024**3) resrv = torch.cuda.memory_reserved(device) / (1024**3) - print(f"[BigVGAN VRAM] {label}: {alloc:.2f} GiB allocated, " - f"{resrv:.2f} GiB reserved", flush=True) + free_cuda, total_cuda = torch.cuda.mem_get_info(device) + used_driver = (total_cuda - free_cuda) / (1024**3) + print(f"[BigVGAN VRAM] {label}: alloc={alloc:.2f} reserved={resrv:.2f} " + f"driver_used={used_driver:.2f} GiB", flush=True) _vram_log("before unload") comfy.model_management.unload_all_models() @@ -791,6 +793,9 @@ class SelvaBigvganTrainer: if "generator" in model: model["generator"].to("cpu") _vram_log("after generator.to(cpu)") + if "video_enc" in model: + model["video_enc"].to("cpu") + _vram_log("after video_enc.to(cpu)") soft_empty_cache() _vram_log("after soft_empty_cache") @@ -1093,8 +1098,12 @@ def _do_train(vocoder, mel_converter, clips, if device.type == "cuda": alloc = torch.cuda.memory_allocated(device) / (1024**3) resrv = torch.cuda.memory_reserved(device) / (1024**3) - print(f"[BigVGAN VRAM] before training: {alloc:.2f} GiB allocated, " - f"{resrv:.2f} GiB reserved", flush=True) + free_cuda, total_cuda = torch.cuda.mem_get_info(device) + used_driver = (total_cuda - free_cuda) / (1024**3) + print(f"[BigVGAN VRAM] before training: " + f"pytorch_alloc={alloc:.2f} GiB, pytorch_reserved={resrv:.2f} GiB, " + f"driver_used={used_driver:.2f} GiB, driver_total={total_cuda/(1024**3):.2f} GiB", + flush=True) optimizer = torch.optim.AdamW(trainable_params, lr=lr, betas=(0.8, 0.99)) vocoder.train()