Fix: handle missing processor chat template
ComfyUI-converted checkpoints ship the template as chat_template.jinja (not on the processor), so apply_chat_template raised 'this processor does not have a chat template'. Backfill processor.chat_template from chat_template.jinja/.json or the tokenizer at load time, and fall back to a hand-built Qwen-VL ChatML prompt if none exists. Also keep *.jinja in the auto-download patterns. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+52
-3
@@ -90,8 +90,8 @@ def _resolve_model_source(model_path: str, auto_download: bool) -> str:
|
||||
local = snapshot_download(
|
||||
repo_id=model_path,
|
||||
local_dir=target,
|
||||
# weights + processor/tokenizer/config; skip duplicate GGUF/onnx blobs.
|
||||
allow_patterns=["*.json", "*.safetensors", "*.txt", "*.model", "merges.txt", "*.py"],
|
||||
# weights + processor/tokenizer/config/template; skip duplicate GGUF/onnx blobs.
|
||||
allow_patterns=["*.json", "*.jinja", "*.safetensors", "*.txt", "*.model", "merges.txt", "*.py"],
|
||||
)
|
||||
print(f"[QwenVLImageJudge] download complete: {local}")
|
||||
return local
|
||||
@@ -168,10 +168,33 @@ def _load_model(model_path: str, precision: str):
|
||||
model = _VLModel.from_pretrained(model_path, **load_kwargs)
|
||||
model.eval()
|
||||
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
|
||||
_ensure_chat_template(processor, model_path)
|
||||
_MODEL_CACHE[key] = (model, processor)
|
||||
return model, processor
|
||||
|
||||
|
||||
def _ensure_chat_template(processor, model_path: str):
|
||||
"""Some ComfyUI-converted checkpoints ship the template as chat_template.jinja
|
||||
(or only on the tokenizer), which AutoProcessor doesn't always pick up. Backfill
|
||||
processor.chat_template from those sources so apply_chat_template works."""
|
||||
if getattr(processor, "chat_template", None):
|
||||
return
|
||||
for fn in ("chat_template.jinja", "chat_template.json"):
|
||||
fp = os.path.join(model_path, fn)
|
||||
if os.path.isfile(fp):
|
||||
try:
|
||||
with open(fp, "r", encoding="utf-8") as f:
|
||||
raw = f.read()
|
||||
processor.chat_template = json.loads(raw).get("chat_template") if fn.endswith(".json") else raw
|
||||
if processor.chat_template:
|
||||
return
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
tok = getattr(processor, "tokenizer", None)
|
||||
if tok is not None and getattr(tok, "chat_template", None):
|
||||
processor.chat_template = tok.chat_template
|
||||
|
||||
|
||||
def _build_system_prompt(axes: list[str]) -> str:
|
||||
axis_lines = "\n".join(f' "{a}": {{"score": <0..1>, "diff": "<short note>"}},' for a in axes)
|
||||
return (
|
||||
@@ -197,6 +220,28 @@ def _build_system_prompt(axes: list[str]) -> str:
|
||||
)
|
||||
|
||||
|
||||
def _format_chatml_qwenvl(messages):
|
||||
"""Manual Qwen-VL ChatML prompt, used when the processor has no chat template
|
||||
(e.g. checkpoints converted for ComfyUI that drop chat_template.json). Mirrors
|
||||
apply_chat_template: each image -> <|vision_start|><|image_pad|><|vision_end|>,
|
||||
which the processor then expands to the right number of image tokens."""
|
||||
parts = []
|
||||
for msg in messages:
|
||||
parts.append(f"<|im_start|>{msg['role']}\n")
|
||||
content = msg["content"]
|
||||
if isinstance(content, str):
|
||||
parts.append(content)
|
||||
else:
|
||||
for item in content:
|
||||
if item.get("type") == "image":
|
||||
parts.append("<|vision_start|><|image_pad|><|vision_end|>")
|
||||
elif item.get("type") == "text":
|
||||
parts.append(item.get("text", ""))
|
||||
parts.append("<|im_end|>\n")
|
||||
parts.append("<|im_start|>assistant\n")
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def _run_once(model, processor, ref_pil, gen_pil, axes, max_new_tokens, temperature):
|
||||
"""One forward pass; returns the raw decoded string."""
|
||||
messages = [
|
||||
@@ -213,7 +258,11 @@ def _run_once(model, processor, ref_pil, gen_pil, axes, max_new_tokens, temperat
|
||||
},
|
||||
]
|
||||
|
||||
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
||||
try:
|
||||
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
||||
except (ValueError, AttributeError):
|
||||
# Processor/tokenizer carries no chat template -> build ChatML by hand.
|
||||
text = _format_chatml_qwenvl(messages)
|
||||
inputs = processor(text=[text], images=[ref_pil, gen_pil], return_tensors="pt")
|
||||
inputs = inputs.to(model.device)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user