Add model dropdown (presets w/ VRAM) + manual override; list HauhauCS GGUF models

New model_select dropdown with suggested VRAM in each label: huihui Qwen3-VL
4B(local)/8B/30B-A3B (transformers, auto-download) and HauhauCS Qwen3.5-9B /
Qwen3.6-35B-A3B Uncensored Aggressive (GGUF). model_path is now the manual
override (empty = use dropdown). agent_bridge gains --model-select/--model-path.
The HauhauCS models are GGUF-only (no safetensors) so the transformers backend
can't load them yet — selecting one returns a clear 'GGUF backend pending'
message until the backend is added.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-27 09:24:28 +02:00
parent 887dfc0bbb
commit 34adb946a4
2 changed files with 56 additions and 8 deletions
+47 -6
View File
@@ -41,6 +41,26 @@ RECOMMENDED_MODELS = {
"4b": "huihui-ai/Huihui-Qwen3-VL-4B-Instruct-abliterated",
}
# Curated model dropdown (label shown in the node -> how to load it). The label
# carries the suggested VRAM. `backend`: "transformers" = safetensors via this node
# (auto-downloaded with snapshot_download); "gguf" = GGUF + mmproj (needs the GGUF
# backend). `model_path` (manual) overrides this when non-empty.
MANUAL_CHOICE = "(manual — use model_path below)"
MODEL_PRESETS = {
"Qwen3-VL-4B abliterated (huihui) · local bf16 ~9GB": {
"repo": DEFAULT_MODEL_PATH, "backend": "transformers", "precision": "bf16"},
"Qwen3-VL-4B abliterated (huihui) · local fp8 ~5GB": {
"repo": DEFAULT_MODEL_PATH_FP8, "backend": "transformers", "precision": "fp8"},
"Qwen3-VL-8B abliterated (huihui) · bf16 ~17GB": {
"repo": "huihui-ai/Huihui-Qwen3-VL-8B-Instruct-abliterated", "backend": "transformers", "precision": "bf16"},
"Qwen3-VL-30B-A3B abliterated (huihui) · nf4 ~18GB (slow)": {
"repo": "huihui-ai/Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated", "backend": "transformers", "precision": "nf4"},
"Qwen3.5-9B Uncensored Aggressive (HauhauCS) · GGUF Q4 ~8GB": {
"repo": "HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive", "backend": "gguf", "precision": "Q4_K_M"},
"Qwen3.6-35B-A3B Uncensored Aggressive (HauhauCS) · GGUF Q4 ~22GB": {
"repo": "HauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive", "backend": "gguf", "precision": "Q4_K_M"},
}
# Difference axes + a one-line definition each. Definitions are injected into the
# prompt so the model fills the right axis (e.g. gender_mix = a count, not a position)
# and the action/pose cluster is captured in detail. Fully configurable on the node;
@@ -665,7 +685,10 @@ class QwenVLImageJudge:
# Analysis profile: act-specialized axis set (distance-aware where it
# matters). `axes` below overrides it when non-empty.
"profile": (list(PROFILES.keys()), {"default": "general"}),
"model_path": ("STRING", {"default": DEFAULT_MODEL_PATH}),
# Curated model dropdown (label shows VRAM). model_path below overrides it.
"model_select": ([MANUAL_CHOICE] + list(MODEL_PRESETS.keys()),
{"default": list(MODEL_PRESETS.keys())[0]}),
"model_path": ("STRING", {"default": ""}), # manual override (local dir / HF repo / alias)
"precision": (["bf16", "fp16", "fp8", "nf4"], {"default": "bf16"}),
"axes": ("STRING", {"default": "", "multiline": True}),
"max_new_tokens": ("INT", {"default": 1024, "min": 64, "max": 4096}),
@@ -687,7 +710,8 @@ class QwenVLImageJudge:
}
def judge(self, reference_image, mode, model_path, precision, axes,
max_new_tokens, temperature, swap_eval, profile="general", generated_image=None,
max_new_tokens, temperature, swap_eval, profile="general",
model_select=MANUAL_CHOICE, generated_image=None,
keep_loaded=True, auto_download=True,
report_dir="", run_tag="", prompt_used="", reference_description=""):
# `axes` overrides the profile when provided; otherwise use the profile's axis set.
@@ -695,19 +719,36 @@ class QwenVLImageJudge:
if not axis_list:
axis_list = list(PROFILES.get(profile, PROFILES["general"]))
# Resolve the model: manual model_path overrides the dropdown preset.
if model_path.strip():
eff_repo, eff_precision = model_path.strip(), precision
eff_backend = "gguf" if eff_repo.lower().endswith(".gguf") else "transformers"
else:
preset = MODEL_PRESETS.get(model_select)
if not preset:
msg = "[QwenVLImageJudge] pick a model in model_select, or fill model_path."
print(msg); return (0.0, "{}", msg, msg, "")
eff_repo, eff_backend, eff_precision = preset["repo"], preset["backend"], preset["precision"]
if eff_backend == "gguf":
msg = (f"[QwenVLImageJudge] '{eff_repo}' is a GGUF model — the GGUF backend "
f"is not enabled yet (pending Ollama vs llama-cpp-python choice). "
f"Use a transformers (safetensors) model for now.")
print(msg); return (0.0, "{}", msg, msg, "")
try:
resolved_path = _resolve_model_source(model_path, auto_download)
resolved_path = _resolve_model_source(eff_repo, auto_download)
except Exception as e: # missing model / download failure -> surface as score 0
msg = str(e)
print(msg)
return (0.0, "{}", msg, msg, "")
ref_pil = _tensor_to_pil(reference_image)
model, processor = _load_model(resolved_path, precision)
model, processor = _load_model(resolved_path, eff_precision)
if mode == "describe":
return self._describe(model, processor, ref_pil, axis_list, max_new_tokens,
temperature, resolved_path, precision, keep_loaded,
temperature, resolved_path, eff_precision, keep_loaded,
report_dir, run_tag)
if generated_image is None:
@@ -735,7 +776,7 @@ class QwenVLImageJudge:
raw_all = raw1 + "\n--- SWAPPED ---\n" + raw2
if not keep_loaded:
_MODEL_CACHE.pop((resolved_path, precision), None)
_MODEL_CACHE.pop((resolved_path, eff_precision), None)
del model
torch.cuda.empty_cache()