Add model dropdown (presets w/ VRAM) + manual override; list HauhauCS GGUF models

New model_select dropdown with suggested VRAM in each label: huihui Qwen3-VL 4B(local)/8B/30B-A3B (transformers, auto-download) and HauhauCS Qwen3.5-9B / Qwen3.6-35B-A3B Uncensored Aggressive (GGUF). model_path is now the manual override (empty = use dropdown). agent_bridge gains --model-select/--model-path. The HauhauCS models are GGUF-only (no safetensors) so the transformers backend can't load them yet — selecting one returns a clear 'GGUF backend pending' message until the backend is added. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 09:24:28 +02:00
parent 887dfc0bbb
commit 34adb946a4
2 changed files with 56 additions and 8 deletions
@@ -48,7 +48,8 @@ def _http_json(url: str, payload: dict | None = None, timeout: int = 30):
 def _inject(graph: dict, prompt: str, negative: str, seed: int, run_tag: str, mode: str,
-            reference_description: str = "", profile: str = ""):
+            reference_description: str = "", profile: str = "", model_select: str = "",
            model_path: str = ""):
    """Set the receptor's prompt/seed and the judge's mode/run_tag in-place.
    compare mode needs a receptor (to inject the prompt). describe mode is the first
@@ -71,6 +72,10 @@ def _inject(graph: dict, prompt: str, negative: str, seed: int, run_tag: str, mo
                inputs["reference_description"] = reference_description
            if profile:
                inputs["profile"] = profile
            if model_select:
                inputs["model_select"] = model_select
            if model_path:
                inputs["model_path"] = model_path
    if mode == "compare" and not found_receptor:
        raise SystemExit(
            f"[agent_bridge] no '{RECEPTOR_CLASS}' node in the workflow — add the "
@@ -119,6 +124,8 @@ def main(argv=None):
    ap.add_argument("--run-tag", default="")
    ap.add_argument("--profile", default="",
                    help="analysis profile on the judge (general/oral/penetration/handjob/solo)")
    ap.add_argument("--model-select", default="", help="judge model dropdown label (overrides workflow)")
    ap.add_argument("--model-path", default="", help="manual judge model path/repo (overrides dropdown)")
    ap.add_argument("--ref-desc", default="",
                    help="canonical reference text to anchor compare on (from the describe pass)")
    ap.add_argument("--ref-desc-file", default="",
@@ -143,7 +150,7 @@ def main(argv=None):
        graph = json.load(f)
    _inject(graph, args.prompt, args.negative, args.seed, args.run_tag, args.mode, ref_desc,
-            args.profile)
+            args.profile, args.model_select, args.model_path)
    client_id = uuid.uuid4().hex
    try:
@@ -41,6 +41,26 @@ RECOMMENDED_MODELS = {
    "4b": "huihui-ai/Huihui-Qwen3-VL-4B-Instruct-abliterated",
 }
 # Curated model dropdown (label shown in the node -> how to load it). The label
 # carries the suggested VRAM. `backend`: "transformers" = safetensors via this node
 # (auto-downloaded with snapshot_download); "gguf" = GGUF + mmproj (needs the GGUF
 # backend). `model_path` (manual) overrides this when non-empty.
 MANUAL_CHOICE = "(manual — use model_path below)"
 MODEL_PRESETS = {
    "Qwen3-VL-4B abliterated (huihui) · local bf16 ~9GB": {
        "repo": DEFAULT_MODEL_PATH, "backend": "transformers", "precision": "bf16"},
    "Qwen3-VL-4B abliterated (huihui) · local fp8 ~5GB": {
        "repo": DEFAULT_MODEL_PATH_FP8, "backend": "transformers", "precision": "fp8"},
    "Qwen3-VL-8B abliterated (huihui) · bf16 ~17GB": {
        "repo": "huihui-ai/Huihui-Qwen3-VL-8B-Instruct-abliterated", "backend": "transformers", "precision": "bf16"},
    "Qwen3-VL-30B-A3B abliterated (huihui) · nf4 ~18GB (slow)": {
        "repo": "huihui-ai/Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated", "backend": "transformers", "precision": "nf4"},
    "Qwen3.5-9B Uncensored Aggressive (HauhauCS) · GGUF Q4 ~8GB": {
        "repo": "HauhauCS/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive", "backend": "gguf", "precision": "Q4_K_M"},
    "Qwen3.6-35B-A3B Uncensored Aggressive (HauhauCS) · GGUF Q4 ~22GB": {
        "repo": "HauhauCS/Qwen3.6-35B-A3B-Uncensored-HauhauCS-Aggressive", "backend": "gguf", "precision": "Q4_K_M"},
 }
 # Difference axes + a one-line definition each. Definitions are injected into the
 # prompt so the model fills the right axis (e.g. gender_mix = a count, not a position)
 # and the action/pose cluster is captured in detail. Fully configurable on the node;
@@ -665,7 +685,10 @@ class QwenVLImageJudge:
                # Analysis profile: act-specialized axis set (distance-aware where it
                # matters). `axes` below overrides it when non-empty.
                "profile": (list(PROFILES.keys()), {"default": "general"}),
-                "model_path": ("STRING", {"default": DEFAULT_MODEL_PATH}),
+                # Curated model dropdown (label shows VRAM). model_path below overrides it.
                "model_select": ([MANUAL_CHOICE] + list(MODEL_PRESETS.keys()),
                                 {"default": list(MODEL_PRESETS.keys())[0]}),
                "model_path": ("STRING", {"default": ""}),   # manual override (local dir / HF repo / alias)
                "precision": (["bf16", "fp16", "fp8", "nf4"], {"default": "bf16"}),
                "axes": ("STRING", {"default": "", "multiline": True}),
                "max_new_tokens": ("INT", {"default": 1024, "min": 64, "max": 4096}),
@@ -687,7 +710,8 @@ class QwenVLImageJudge:
        }
    def judge(self, reference_image, mode, model_path, precision, axes,
-              max_new_tokens, temperature, swap_eval, profile="general", generated_image=None,
+              max_new_tokens, temperature, swap_eval, profile="general",
              model_select=MANUAL_CHOICE, generated_image=None,
              keep_loaded=True, auto_download=True,
              report_dir="", run_tag="", prompt_used="", reference_description=""):
        # `axes` overrides the profile when provided; otherwise use the profile's axis set.
@@ -695,19 +719,36 @@ class QwenVLImageJudge:
        if not axis_list:
            axis_list = list(PROFILES.get(profile, PROFILES["general"]))
        # Resolve the model: manual model_path overrides the dropdown preset.
        if model_path.strip():
            eff_repo, eff_precision = model_path.strip(), precision
            eff_backend = "gguf" if eff_repo.lower().endswith(".gguf") else "transformers"
        else:
            preset = MODEL_PRESETS.get(model_select)
            if not preset:
                msg = "[QwenVLImageJudge] pick a model in model_select, or fill model_path."
                print(msg); return (0.0, "{}", msg, msg, "")
            eff_repo, eff_backend, eff_precision = preset["repo"], preset["backend"], preset["precision"]
        if eff_backend == "gguf":
            msg = (f"[QwenVLImageJudge] '{eff_repo}' is a GGUF model — the GGUF backend "
                   f"is not enabled yet (pending Ollama vs llama-cpp-python choice). "
                   f"Use a transformers (safetensors) model for now.")
            print(msg); return (0.0, "{}", msg, msg, "")
        try:
-            resolved_path = _resolve_model_source(model_path, auto_download)
+            resolved_path = _resolve_model_source(eff_repo, auto_download)
        except Exception as e:  # missing model / download failure -> surface as score 0
            msg = str(e)
            print(msg)
            return (0.0, "{}", msg, msg, "")
        ref_pil = _tensor_to_pil(reference_image)
-        model, processor = _load_model(resolved_path, precision)
+        model, processor = _load_model(resolved_path, eff_precision)
        if mode == "describe":
            return self._describe(model, processor, ref_pil, axis_list, max_new_tokens,
-                                  temperature, resolved_path, precision, keep_loaded,
+                                  temperature, resolved_path, eff_precision, keep_loaded,
                                  report_dir, run_tag)
        if generated_image is None:
@@ -735,7 +776,7 @@ class QwenVLImageJudge:
                raw_all = raw1 + "\n--- SWAPPED ---\n" + raw2
        if not keep_loaded:
-            _MODEL_CACHE.pop((resolved_path, precision), None)
+            _MODEL_CACHE.pop((resolved_path, eff_precision), None)
            del model
            torch.cuda.empty_cache()