diff --git a/nodes/selva_bigvgan_trainer.py b/nodes/selva_bigvgan_trainer.py index 0ec9ee7..bc6074a 100644 --- a/nodes/selva_bigvgan_trainer.py +++ b/nodes/selva_bigvgan_trainer.py @@ -487,10 +487,16 @@ def _pregenerate_lora_mels(model, data_dir, lora_adapter_path, device, dtype, fm = FlowMatching(min_sigma=0, inference_mode="euler", num_steps=num_steps) rng = torch.Generator(device=device).manual_seed(seed) - # Move VAE+vocoder to device for decode + # Move only the components we need to GPU for generation: + # - tod (VAE+vocoder) for decode + # - clip_model for encode_text_clip tod = feature_utils.tod tod_orig_dev = next(tod.parameters()).device tod.to(device) + clip_model = feature_utils.clip_model + if clip_model is not None: + clip_orig_dev = next(clip_model.parameters()).device + clip_model.to(device) pairs = [] try: @@ -565,6 +571,8 @@ def _pregenerate_lora_mels(model, data_dir, lora_adapter_path, device, dtype, finally: tod.to(tod_orig_dev) + if clip_model is not None: + clip_model.to(clip_orig_dev) del generator soft_empty_cache() @@ -777,10 +785,10 @@ class SelvaBigvganTrainer: comfy.model_management.unload_all_models() soft_empty_cache() - if strategy == "offload_to_cpu": - feature_utils.to(device) - soft_empty_cache() - + # Only move mel_converter to GPU — it's tiny and needed for training. + # The rest of feature_utils (CLIP, synchformer, T5, VAE) stays on CPU; + # _pregenerate_lora_mels handles its own device management for the parts + # it needs temporarily. mel_converter.to(device) pbar = comfy.utils.ProgressBar(steps)