Load text encoder and VAE from ComfyUI model folders
Download OpenCLIP ViT-H-14 to models/text_encoders/ and SVD temporal VAE to models/vae/svd-temporal-vae/ instead of hidden library caches, so they're visible, reusable, and shared with other nodes. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
46
inference.py
46
inference.py
@@ -275,7 +275,23 @@ def load_model(model_path: str, precision: str, offload: str, device: torch.devi
|
|||||||
from video_to_video.utils.config import cfg
|
from video_to_video.utils.config import cfg
|
||||||
|
|
||||||
print("[STAR] Loading text encoder (OpenCLIP ViT-H-14)...")
|
print("[STAR] Loading text encoder (OpenCLIP ViT-H-14)...")
|
||||||
text_encoder = FrozenOpenCLIPEmbedder(device=device, pretrained="laion2b_s32b_b79k")
|
_models_dir = (SCRIPT_DIR / ".." / ".." / "models").resolve()
|
||||||
|
_te_filename = "open_clip_vit_h_14_laion2b.bin"
|
||||||
|
_te_dir = _models_dir / "text_encoders"
|
||||||
|
_te_path = _te_dir / _te_filename
|
||||||
|
if not _te_path.is_file():
|
||||||
|
from huggingface_hub import hf_hub_download
|
||||||
|
_te_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
print(f"[STAR] Downloading OpenCLIP ViT-H-14 text encoder to {_te_dir}...")
|
||||||
|
_dl = hf_hub_download(
|
||||||
|
repo_id="laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
|
||||||
|
filename="open_clip_pytorch_model.bin",
|
||||||
|
local_dir=str(_te_dir),
|
||||||
|
)
|
||||||
|
shutil.move(str(_dl), str(_te_path))
|
||||||
|
print(f"[STAR] Text encoder saved to {_te_path}")
|
||||||
|
|
||||||
|
text_encoder = FrozenOpenCLIPEmbedder(device=device, pretrained=str(_te_path))
|
||||||
text_encoder.model.to(device)
|
text_encoder.model.to(device)
|
||||||
negative_y = text_encoder(cfg.negative_prompt).detach()
|
negative_y = text_encoder(cfg.negative_prompt).detach()
|
||||||
text_encoder.model.to(keep_on)
|
text_encoder.model.to(keep_on)
|
||||||
@@ -304,9 +320,33 @@ def load_model(model_path: str, precision: str, offload: str, device: torch.devi
|
|||||||
from diffusers import AutoencoderKLTemporalDecoder
|
from diffusers import AutoencoderKLTemporalDecoder
|
||||||
|
|
||||||
print("[STAR] Loading temporal VAE...")
|
print("[STAR] Loading temporal VAE...")
|
||||||
|
_vae_dir_name = "svd-temporal-vae"
|
||||||
|
_vae_path = _models_dir / "vae" / _vae_dir_name
|
||||||
|
if not (_vae_path / "config.json").is_file():
|
||||||
|
from huggingface_hub import hf_hub_download
|
||||||
|
_vae_path.mkdir(parents=True, exist_ok=True)
|
||||||
|
print(f"[STAR] Downloading SVD temporal VAE to {_vae_path}...")
|
||||||
|
for _f in ["config.json", "diffusion_pytorch_model.fp16.safetensors"]:
|
||||||
|
_dl = hf_hub_download(
|
||||||
|
repo_id="stabilityai/stable-video-diffusion-img2vid",
|
||||||
|
subfolder="vae",
|
||||||
|
filename=_f,
|
||||||
|
local_dir=str(_vae_path),
|
||||||
|
)
|
||||||
|
_dest = _vae_path / _f
|
||||||
|
if Path(_dl) != _dest and Path(_dl).is_file():
|
||||||
|
shutil.move(str(_dl), str(_dest))
|
||||||
|
# Clean up empty vae/ subdirectory created by hf_hub_download
|
||||||
|
_vae_sub = _vae_path / "vae"
|
||||||
|
if _vae_sub.is_dir():
|
||||||
|
try:
|
||||||
|
_vae_sub.rmdir()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
print(f"[STAR] Temporal VAE saved to {_vae_path}")
|
||||||
|
|
||||||
vae = AutoencoderKLTemporalDecoder.from_pretrained(
|
vae = AutoencoderKLTemporalDecoder.from_pretrained(
|
||||||
"stabilityai/stable-video-diffusion-img2vid",
|
str(_vae_path), variant="fp16",
|
||||||
subfolder="vae", variant="fp16",
|
|
||||||
)
|
)
|
||||||
vae.eval()
|
vae.eval()
|
||||||
vae.requires_grad_(False)
|
vae.requires_grad_(False)
|
||||||
|
|||||||
57
nodes.py
57
nodes.py
@@ -205,8 +205,25 @@ class STARModelLoader:
|
|||||||
# ---- Text encoder (OpenCLIP ViT-H-14) ----
|
# ---- Text encoder (OpenCLIP ViT-H-14) ----
|
||||||
from video_to_video.modules.embedder import FrozenOpenCLIPEmbedder
|
from video_to_video.modules.embedder import FrozenOpenCLIPEmbedder
|
||||||
|
|
||||||
|
_te_filename = "open_clip_vit_h_14_laion2b.bin"
|
||||||
|
_te_path = folder_paths.get_full_path("text_encoders", _te_filename)
|
||||||
|
if _te_path is None:
|
||||||
|
from huggingface_hub import hf_hub_download
|
||||||
|
import shutil
|
||||||
|
_te_dir = folder_paths.get_folder_paths("text_encoders")[0]
|
||||||
|
os.makedirs(_te_dir, exist_ok=True)
|
||||||
|
print(f"[STAR] Downloading OpenCLIP ViT-H-14 text encoder to {_te_dir}...")
|
||||||
|
_dl = hf_hub_download(
|
||||||
|
repo_id="laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
|
||||||
|
filename="open_clip_pytorch_model.bin",
|
||||||
|
local_dir=_te_dir,
|
||||||
|
)
|
||||||
|
_te_path = os.path.join(_te_dir, _te_filename)
|
||||||
|
shutil.move(_dl, _te_path)
|
||||||
|
print(f"[STAR] Text encoder saved to {_te_path}")
|
||||||
|
|
||||||
text_encoder = FrozenOpenCLIPEmbedder(
|
text_encoder = FrozenOpenCLIPEmbedder(
|
||||||
device=device, pretrained="laion2b_s32b_b79k"
|
device=device, pretrained=_te_path
|
||||||
)
|
)
|
||||||
text_encoder.model.to(device)
|
text_encoder.model.to(device)
|
||||||
|
|
||||||
@@ -246,10 +263,42 @@ class STARModelLoader:
|
|||||||
# ---- Temporal VAE (from HuggingFace diffusers) ----
|
# ---- Temporal VAE (from HuggingFace diffusers) ----
|
||||||
from diffusers import AutoencoderKLTemporalDecoder
|
from diffusers import AutoencoderKLTemporalDecoder
|
||||||
|
|
||||||
vae = AutoencoderKLTemporalDecoder.from_pretrained(
|
_vae_dir_name = "svd-temporal-vae"
|
||||||
"stabilityai/stable-video-diffusion-img2vid",
|
_vae_path = None
|
||||||
|
for _d in folder_paths.get_folder_paths("vae"):
|
||||||
|
_candidate = os.path.join(_d, _vae_dir_name)
|
||||||
|
if os.path.isfile(os.path.join(_candidate, "config.json")):
|
||||||
|
_vae_path = _candidate
|
||||||
|
break
|
||||||
|
|
||||||
|
if _vae_path is None:
|
||||||
|
from huggingface_hub import hf_hub_download
|
||||||
|
import shutil
|
||||||
|
_vae_base = folder_paths.get_folder_paths("vae")[0]
|
||||||
|
_vae_path = os.path.join(_vae_base, _vae_dir_name)
|
||||||
|
os.makedirs(_vae_path, exist_ok=True)
|
||||||
|
print(f"[STAR] Downloading SVD temporal VAE to {_vae_path}...")
|
||||||
|
for _f in ["config.json", "diffusion_pytorch_model.fp16.safetensors"]:
|
||||||
|
_dl = hf_hub_download(
|
||||||
|
repo_id="stabilityai/stable-video-diffusion-img2vid",
|
||||||
subfolder="vae",
|
subfolder="vae",
|
||||||
variant="fp16",
|
filename=_f,
|
||||||
|
local_dir=_vae_path,
|
||||||
|
)
|
||||||
|
_dest = os.path.join(_vae_path, _f)
|
||||||
|
if _dl != _dest and os.path.isfile(_dl):
|
||||||
|
shutil.move(_dl, _dest)
|
||||||
|
# Clean up empty vae/ subdirectory created by hf_hub_download
|
||||||
|
_vae_sub = os.path.join(_vae_path, "vae")
|
||||||
|
if os.path.isdir(_vae_sub):
|
||||||
|
try:
|
||||||
|
os.rmdir(_vae_sub)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
print(f"[STAR] Temporal VAE saved to {_vae_path}")
|
||||||
|
|
||||||
|
vae = AutoencoderKLTemporalDecoder.from_pretrained(
|
||||||
|
_vae_path, variant="fp16",
|
||||||
)
|
)
|
||||||
vae.eval()
|
vae.eval()
|
||||||
vae.requires_grad_(False)
|
vae.requires_grad_(False)
|
||||||
|
|||||||
Reference in New Issue
Block a user