Add Qwen3.5/3.6 abliterated (safetensors) + split model/quant selectors

No GGUF needed: huihui ships Qwen3.5-9B, Qwen3.6-27B, Qwen3.6-35B-A3B as multimodal SAFETENSORS (abliterated), loadable via transformers AutoModelForMultimodalLM. Added them to the model dropdown. _resolve_vl_classes now tries AutoModelForMultimodalLM (3.5/3.6) and AutoModelForImageTextToText (Qwen3-VL) in name-based order, with load fallback across candidates. model_select is now the model NAME only; precision is the separate quant dropdown applied to it (repo_by_precision routes e.g. the local fp8 dir). Aliases 3.5-9b/3.6-27b/3.6-35b. VRAM-by-quant table in README. Needs a recent transformers for 3.5/3.6. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-27 09:50:13 +02:00
parent e29df0b319
commit 5cff883914
3 changed files with 88 additions and 50 deletions
@@ -39,23 +39,36 @@ RECOMMENDED_MODELS = {
    "8b": "huihui-ai/Huihui-Qwen3-VL-8B-Instruct-abliterated",
    # Lightweight, already local.
    "4b": "huihui-ai/Huihui-Qwen3-VL-4B-Instruct-abliterated",
+    # Newer natively-multimodal Qwen3.5/3.6 abliterated (need a recent transformers).
+    "3.5-9b": "huihui-ai/Huihui-Qwen3.5-9B-abliterated",      # dense 10B, fast, newer
+    "3.6-27b": "huihui-ai/Huihui-Qwen3.6-27B-abliterated",    # dense 28B, strong (nf4)
+    "3.6-35b": "huihui-ai/Huihui-Qwen3.6-35B-A3B-abliterated",  # MoE, top (nf4)
 }

 # Curated model dropdown (label shown in the node -> how to load it). The label
-# carries the suggested VRAM. All entries are safetensors loaded via transformers
-# (auto-downloaded with snapshot_download). `model_path` (manual) overrides this.
-# GGUF-only models (e.g. HauhauCS Qwen3.5/3.6 Uncensored) are NOT listed — run those
-# in a dedicated GGUF node (1038lab/ComfyUI-QwenVL, KLL535 Simple-Qwen3-VL-gguf).
+# carries the suggested VRAM. ALL entries are multimodal safetensors loaded via
+# transformers (auto-downloaded). The Qwen3.5/3.6 entries are natively-multimodal and
+# need a recent transformers (AutoModelForMultimodalLM). `model_path` overrides this.
+# GGUF-only models still need a dedicated GGUF node — not run here (transformers only).
+# model_select picks the MODEL (name only); `precision` is the separate quant dropdown.
+# VRAM ≈ params × bytes/param: bf16 ≈ 2×, fp8 ≈ 1×, nf4 ≈ 0.6× (GB ≈ params·0.6). So on
+# 32 GB: 8-10B fits bf16; 27-35B need nf4 (or fp8 if an fp8 checkpoint). `repo_by_precision`
+# routes precisions to different checkpoints (the local 4B has separate bf16/fp8 dirs).
 MANUAL_CHOICE = "(manual — use model_path below)"
 MODEL_PRESETS = {
-    "Qwen3-VL-4B abliterated (huihui) · local bf16 ~9GB": {
-        "repo": DEFAULT_MODEL_PATH, "backend": "transformers", "precision": "bf16"},
-    "Qwen3-VL-4B abliterated (huihui) · local fp8 ~5GB": {
-        "repo": DEFAULT_MODEL_PATH_FP8, "backend": "transformers", "precision": "fp8"},
-    "Qwen3-VL-8B abliterated (huihui) · bf16 ~17GB": {
-        "repo": "huihui-ai/Huihui-Qwen3-VL-8B-Instruct-abliterated", "backend": "transformers", "precision": "bf16"},
-    "Qwen3-VL-30B-A3B abliterated (huihui) · nf4 ~18GB (slow)": {
-        "repo": "huihui-ai/Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated", "backend": "transformers", "precision": "nf4"},
+    "Qwen3-VL-4B abliterated (huihui, local) · 4B": {
+        "repo": DEFAULT_MODEL_PATH,
+        "repo_by_precision": {"fp8": DEFAULT_MODEL_PATH_FP8}},
+    "Qwen3-VL-8B abliterated (huihui) · 8B": {
+        "repo": "huihui-ai/Huihui-Qwen3-VL-8B-Instruct-abliterated"},
+    "Qwen3.5-9B abliterated (huihui) · 10B dense · newer": {
+        "repo": "huihui-ai/Huihui-Qwen3.5-9B-abliterated"},
+    "Qwen3-VL-30B-A3B abliterated (huihui) · 30B MoE": {
+        "repo": "huihui-ai/Huihui-Qwen3-VL-30B-A3B-Instruct-abliterated"},
+    "Qwen3.6-27B abliterated (huihui) · 28B dense": {
+        "repo": "huihui-ai/Huihui-Qwen3.6-27B-abliterated"},
+    "Qwen3.6-35B-A3B abliterated (huihui) · 35B MoE · top": {
+        "repo": "huihui-ai/Huihui-Qwen3.6-35B-A3B-abliterated"},
 }

 # Difference axes + a one-line definition each. Definitions are injected into the
@@ -231,22 +244,24 @@ def _tensor_to_pil(image: "torch.Tensor") -> Image.Image:
    return Image.fromarray(arr, mode="RGB")


-def _resolve_vl_class(model_path: str):
-    """Pick the right transformers class. AutoModelForImageTextToText reads the
-    checkpoint's `architectures` and instantiates the correct dense
-    (Qwen3VLForConditionalGeneration) or MoE (Qwen3VLMoeForConditionalGeneration)
-    class automatically — so 4B/8B *and* 30B-A3B all work without branching."""
-    try:
-        from transformers import AutoModelForImageTextToText as _Auto
-        return _Auto
-    except ImportError:  # pragma: no cover - older transformers
-        name = model_path.lower()
-        is_moe = any(t in name for t in ("a3b", "moe", "30b", "235b"))
-        if is_moe:
-            from transformers import Qwen3VLMoeForConditionalGeneration as _C
-        else:
-            from transformers import Qwen3VLForConditionalGeneration as _C
-        return _C
+def _resolve_vl_classes(model_path: str):
+    """Ordered list of candidate transformers auto classes to try. Qwen3-VL
+    (4B/8B/30B) loads via AutoModelForImageTextToText; the newer natively-multimodal
+    Qwen3.5/3.6 load via AutoModelForMultimodalLM. The two autos have separate
+    registries, so we try the one most likely for this model first (by name) and
+    fall back to the other, then to explicit Qwen3-VL classes on old transformers."""
+    import transformers
+    name = model_path.lower()
+    new_mm = any(t in name for t in ("3.5", "3.6", "qwen3_5", "qwen3_6", "qwen3.5", "qwen3.6"))
+    order = (["AutoModelForMultimodalLM", "AutoModelForImageTextToText"] if new_mm
+             else ["AutoModelForImageTextToText", "AutoModelForMultimodalLM"])
+    classes = [getattr(transformers, n) for n in order if getattr(transformers, n, None)]
+    is_moe = any(t in name for t in ("a3b", "moe", "30b", "235b"))
+    for n in (("Qwen3VLMoeForConditionalGeneration",) if is_moe else ("Qwen3VLForConditionalGeneration",)):
+        c = getattr(transformers, n, None)
+        if c:
+            classes.append(c)
+    return classes


 def _load_model(model_path: str, precision: str):
@@ -257,7 +272,7 @@ def _load_model(model_path: str, precision: str):
    # Imported lazily so the node can be registered even if transformers is old.
    from transformers import AutoProcessor

-    _VLModel = _resolve_vl_class(model_path)
+    candidates = _resolve_vl_classes(model_path)
    load_kwargs = dict(device_map="auto", trust_remote_code=True, low_cpu_mem_usage=True)

    if precision == "nf4":
@@ -275,7 +290,19 @@ def _load_model(model_path: str, precision: str):
    else:
        load_kwargs["dtype"] = torch.bfloat16 if precision == "bf16" else torch.float16

-    model = _VLModel.from_pretrained(model_path, **load_kwargs)
+    model, last_err = None, None
+    for cls in candidates:
+        try:
+            model = cls.from_pretrained(model_path, **load_kwargs)
+            break
+        except Exception as e:  # arch not in this auto class's registry -> try the next
+            last_err = e
+            model = None
+    if model is None:
+        raise RuntimeError(
+            f"[QwenVLImageJudge] could not load {model_path} with any of "
+            f"{[c.__name__ for c in candidates]}. Newer Qwen3.5/3.6 need a recent "
+            f"transformers (AutoModelForMultimodalLM). Last error: {last_err}")
    model.eval()
    processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
    _ensure_chat_template(processor, model_path)
@@ -716,18 +743,20 @@ class QwenVLImageJudge:
        if not axis_list:
            axis_list = list(PROFILES.get(profile, PROFILES["general"]))

-        # Resolve the model: manual model_path overrides the dropdown preset.
+        # Resolve the model: manual model_path overrides the dropdown. `precision` is the
+        # quant dropdown and applies to whichever model is chosen.
+        eff_precision = precision
        if model_path.strip():
-            eff_repo, eff_precision = model_path.strip(), precision
-            eff_backend = "gguf" if eff_repo.lower().endswith(".gguf") else "transformers"
+            eff_repo = model_path.strip()
        else:
            preset = MODEL_PRESETS.get(model_select)
            if not preset:
                msg = "[QwenVLImageJudge] pick a model in model_select, or fill model_path."
                print(msg); return (0.0, "{}", msg, msg, "")
-            eff_repo, eff_backend, eff_precision = preset["repo"], preset["backend"], preset["precision"]
+            # repo_by_precision routes a quant to a different checkpoint (e.g. local fp8 dir).
+            eff_repo = preset.get("repo_by_precision", {}).get(precision, preset["repo"])

-        if eff_backend == "gguf":
+        if eff_repo.lower().endswith(".gguf"):
            msg = (f"[QwenVLImageJudge] '{eff_repo}' is GGUF — this node is transformers "
                   f"(safetensors) only. Run GGUF models in a dedicated GGUF node "
                   f"(1038lab/ComfyUI-QwenVL or KLL535 Simple-Qwen3-VL-gguf).")