Fix: handle missing processor chat template

ComfyUI-converted checkpoints ship the template as chat_template.jinja (not on the processor), so apply_chat_template raised 'this processor does not have a chat template'. Backfill processor.chat_template from chat_template.jinja/.json or the tokenizer at load time, and fall back to a hand-built Qwen-VL ChatML prompt if none exists. Also keep *.jinja in the auto-download patterns. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-26 22:36:39 +02:00
parent 95198a15b5
commit aa3983d94a
1 changed files with 52 additions and 3 deletions
@@ -90,8 +90,8 @@ def _resolve_model_source(model_path: str, auto_download: bool) -> str:
        local = snapshot_download(
            repo_id=model_path,
            local_dir=target,
-            # weights + processor/tokenizer/config; skip duplicate GGUF/onnx blobs.
+            # weights + processor/tokenizer/config/template; skip duplicate GGUF/onnx blobs.
-            allow_patterns=["*.json", "*.safetensors", "*.txt", "*.model", "merges.txt", "*.py"],
+            allow_patterns=["*.json", "*.jinja", "*.safetensors", "*.txt", "*.model", "merges.txt", "*.py"],
        )
        print(f"[QwenVLImageJudge] download complete: {local}")
        return local
@@ -168,10 +168,33 @@ def _load_model(model_path: str, precision: str):
    model = _VLModel.from_pretrained(model_path, **load_kwargs)
    model.eval()
    processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
    _ensure_chat_template(processor, model_path)
    _MODEL_CACHE[key] = (model, processor)
    return model, processor
 def _ensure_chat_template(processor, model_path: str):
    """Some ComfyUI-converted checkpoints ship the template as chat_template.jinja
    (or only on the tokenizer), which AutoProcessor doesn't always pick up. Backfill
    processor.chat_template from those sources so apply_chat_template works."""
    if getattr(processor, "chat_template", None):
        return
    for fn in ("chat_template.jinja", "chat_template.json"):
        fp = os.path.join(model_path, fn)
        if os.path.isfile(fp):
            try:
                with open(fp, "r", encoding="utf-8") as f:
                    raw = f.read()
                processor.chat_template = json.loads(raw).get("chat_template") if fn.endswith(".json") else raw
                if processor.chat_template:
                    return
            except (OSError, ValueError):
                pass
    tok = getattr(processor, "tokenizer", None)
    if tok is not None and getattr(tok, "chat_template", None):
        processor.chat_template = tok.chat_template
 def _build_system_prompt(axes: list[str]) -> str:
    axis_lines = "\n".join(f'    "{a}": {{"score": <0..1>, "diff": "<short note>"}},' for a in axes)
    return (
@@ -197,6 +220,28 @@ def _build_system_prompt(axes: list[str]) -> str:
    )
 def _format_chatml_qwenvl(messages):
    """Manual Qwen-VL ChatML prompt, used when the processor has no chat template
    (e.g. checkpoints converted for ComfyUI that drop chat_template.json). Mirrors
    apply_chat_template: each image -> <|vision_start|><|image_pad|><|vision_end|>,
    which the processor then expands to the right number of image tokens."""
    parts = []
    for msg in messages:
        parts.append(f"<|im_start|>{msg['role']}\n")
        content = msg["content"]
        if isinstance(content, str):
            parts.append(content)
        else:
            for item in content:
                if item.get("type") == "image":
                    parts.append("<|vision_start|><|image_pad|><|vision_end|>")
                elif item.get("type") == "text":
                    parts.append(item.get("text", ""))
        parts.append("<|im_end|>\n")
    parts.append("<|im_start|>assistant\n")
    return "".join(parts)
 def _run_once(model, processor, ref_pil, gen_pil, axes, max_new_tokens, temperature):
    """One forward pass; returns the raw decoded string."""
    messages = [
@@ -213,7 +258,11 @@ def _run_once(model, processor, ref_pil, gen_pil, axes, max_new_tokens, temperat
        },
    ]
-    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    try:
        text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    except (ValueError, AttributeError):
        # Processor/tokenizer carries no chat template -> build ChatML by hand.
        text = _format_chatml_qwenvl(messages)
    inputs = processor(text=[text], images=[ref_pil, gen_pil], return_tensors="pt")
    inputs = inputs.to(model.device)