Fix: handle missing processor chat template

ComfyUI-converted checkpoints ship the template as chat_template.jinja (not on the processor), so apply_chat_template raised 'this processor does not have a chat template'. Backfill processor.chat_template from chat_template.jinja/.json or the tokenizer at load time, and fall back to a hand-built Qwen-VL ChatML prompt if none exists. Also keep *.jinja in the auto-download patterns. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-26 22:36:39 +02:00
parent 95198a15b5
commit aa3983d94a
1 changed files with 52 additions and 3 deletions
@@ -90,8 +90,8 @@ def _resolve_model_source(model_path: str, auto_download: bool) -> str:
        local = snapshot_download(
            repo_id=model_path,
            local_dir=target,
-            # weights + processor/tokenizer/config; skip duplicate GGUF/onnx blobs.
-            allow_patterns=["*.json", "*.safetensors", "*.txt", "*.model", "merges.txt", "*.py"],
+            # weights + processor/tokenizer/config/template; skip duplicate GGUF/onnx blobs.
+            allow_patterns=["*.json", "*.jinja", "*.safetensors", "*.txt", "*.model", "merges.txt", "*.py"],
        )
        print(f"[QwenVLImageJudge] download complete: {local}")
        return local
@@ -168,10 +168,33 @@ def _load_model(model_path: str, precision: str):
    model = _VLModel.from_pretrained(model_path, **load_kwargs)
    model.eval()
    processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
+    _ensure_chat_template(processor, model_path)
    _MODEL_CACHE[key] = (model, processor)
    return model, processor


+def _ensure_chat_template(processor, model_path: str):
+    """Some ComfyUI-converted checkpoints ship the template as chat_template.jinja
+    (or only on the tokenizer), which AutoProcessor doesn't always pick up. Backfill
+    processor.chat_template from those sources so apply_chat_template works."""
+    if getattr(processor, "chat_template", None):
+        return
+    for fn in ("chat_template.jinja", "chat_template.json"):
+        fp = os.path.join(model_path, fn)
+        if os.path.isfile(fp):
+            try:
+                with open(fp, "r", encoding="utf-8") as f:
+                    raw = f.read()
+                processor.chat_template = json.loads(raw).get("chat_template") if fn.endswith(".json") else raw
+                if processor.chat_template:
+                    return
+            except (OSError, ValueError):
+                pass
+    tok = getattr(processor, "tokenizer", None)
+    if tok is not None and getattr(tok, "chat_template", None):
+        processor.chat_template = tok.chat_template
+
+
 def _build_system_prompt(axes: list[str]) -> str:
    axis_lines = "\n".join(f'    "{a}": {{"score": <0..1>, "diff": "<short note>"}},' for a in axes)
    return (
@@ -197,6 +220,28 @@ def _build_system_prompt(axes: list[str]) -> str:
    )


+def _format_chatml_qwenvl(messages):
+    """Manual Qwen-VL ChatML prompt, used when the processor has no chat template
+    (e.g. checkpoints converted for ComfyUI that drop chat_template.json). Mirrors
+    apply_chat_template: each image -> <|vision_start|><|image_pad|><|vision_end|>,
+    which the processor then expands to the right number of image tokens."""
+    parts = []
+    for msg in messages:
+        parts.append(f"<|im_start|>{msg['role']}\n")
+        content = msg["content"]
+        if isinstance(content, str):
+            parts.append(content)
+        else:
+            for item in content:
+                if item.get("type") == "image":
+                    parts.append("<|vision_start|><|image_pad|><|vision_end|>")
+                elif item.get("type") == "text":
+                    parts.append(item.get("text", ""))
+        parts.append("<|im_end|>\n")
+    parts.append("<|im_start|>assistant\n")
+    return "".join(parts)
+
+
 def _run_once(model, processor, ref_pil, gen_pil, axes, max_new_tokens, temperature):
    """One forward pass; returns the raw decoded string."""
    messages = [
@@ -213,7 +258,11 @@ def _run_once(model, processor, ref_pil, gen_pil, axes, max_new_tokens, temperat
        },
    ]

-    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    try:
+        text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    except (ValueError, AttributeError):
+        # Processor/tokenizer carries no chat template -> build ChatML by hand.
+        text = _format_chatml_qwenvl(messages)
    inputs = processor(text=[text], images=[ref_pil, gen_pil], return_tensors="pt")
    inputs = inputs.to(model.device)