Fix: handle missing processor chat template
ComfyUI-converted checkpoints ship the template as chat_template.jinja (not on the processor), so apply_chat_template raised 'this processor does not have a chat template'. Backfill processor.chat_template from chat_template.jinja/.json or the tokenizer at load time, and fall back to a hand-built Qwen-VL ChatML prompt if none exists. Also keep *.jinja in the auto-download patterns. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+52
-3
@@ -90,8 +90,8 @@ def _resolve_model_source(model_path: str, auto_download: bool) -> str:
|
|||||||
local = snapshot_download(
|
local = snapshot_download(
|
||||||
repo_id=model_path,
|
repo_id=model_path,
|
||||||
local_dir=target,
|
local_dir=target,
|
||||||
# weights + processor/tokenizer/config; skip duplicate GGUF/onnx blobs.
|
# weights + processor/tokenizer/config/template; skip duplicate GGUF/onnx blobs.
|
||||||
allow_patterns=["*.json", "*.safetensors", "*.txt", "*.model", "merges.txt", "*.py"],
|
allow_patterns=["*.json", "*.jinja", "*.safetensors", "*.txt", "*.model", "merges.txt", "*.py"],
|
||||||
)
|
)
|
||||||
print(f"[QwenVLImageJudge] download complete: {local}")
|
print(f"[QwenVLImageJudge] download complete: {local}")
|
||||||
return local
|
return local
|
||||||
@@ -168,10 +168,33 @@ def _load_model(model_path: str, precision: str):
|
|||||||
model = _VLModel.from_pretrained(model_path, **load_kwargs)
|
model = _VLModel.from_pretrained(model_path, **load_kwargs)
|
||||||
model.eval()
|
model.eval()
|
||||||
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
|
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
|
||||||
|
_ensure_chat_template(processor, model_path)
|
||||||
_MODEL_CACHE[key] = (model, processor)
|
_MODEL_CACHE[key] = (model, processor)
|
||||||
return model, processor
|
return model, processor
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_chat_template(processor, model_path: str):
|
||||||
|
"""Some ComfyUI-converted checkpoints ship the template as chat_template.jinja
|
||||||
|
(or only on the tokenizer), which AutoProcessor doesn't always pick up. Backfill
|
||||||
|
processor.chat_template from those sources so apply_chat_template works."""
|
||||||
|
if getattr(processor, "chat_template", None):
|
||||||
|
return
|
||||||
|
for fn in ("chat_template.jinja", "chat_template.json"):
|
||||||
|
fp = os.path.join(model_path, fn)
|
||||||
|
if os.path.isfile(fp):
|
||||||
|
try:
|
||||||
|
with open(fp, "r", encoding="utf-8") as f:
|
||||||
|
raw = f.read()
|
||||||
|
processor.chat_template = json.loads(raw).get("chat_template") if fn.endswith(".json") else raw
|
||||||
|
if processor.chat_template:
|
||||||
|
return
|
||||||
|
except (OSError, ValueError):
|
||||||
|
pass
|
||||||
|
tok = getattr(processor, "tokenizer", None)
|
||||||
|
if tok is not None and getattr(tok, "chat_template", None):
|
||||||
|
processor.chat_template = tok.chat_template
|
||||||
|
|
||||||
|
|
||||||
def _build_system_prompt(axes: list[str]) -> str:
|
def _build_system_prompt(axes: list[str]) -> str:
|
||||||
axis_lines = "\n".join(f' "{a}": {{"score": <0..1>, "diff": "<short note>"}},' for a in axes)
|
axis_lines = "\n".join(f' "{a}": {{"score": <0..1>, "diff": "<short note>"}},' for a in axes)
|
||||||
return (
|
return (
|
||||||
@@ -197,6 +220,28 @@ def _build_system_prompt(axes: list[str]) -> str:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _format_chatml_qwenvl(messages):
|
||||||
|
"""Manual Qwen-VL ChatML prompt, used when the processor has no chat template
|
||||||
|
(e.g. checkpoints converted for ComfyUI that drop chat_template.json). Mirrors
|
||||||
|
apply_chat_template: each image -> <|vision_start|><|image_pad|><|vision_end|>,
|
||||||
|
which the processor then expands to the right number of image tokens."""
|
||||||
|
parts = []
|
||||||
|
for msg in messages:
|
||||||
|
parts.append(f"<|im_start|>{msg['role']}\n")
|
||||||
|
content = msg["content"]
|
||||||
|
if isinstance(content, str):
|
||||||
|
parts.append(content)
|
||||||
|
else:
|
||||||
|
for item in content:
|
||||||
|
if item.get("type") == "image":
|
||||||
|
parts.append("<|vision_start|><|image_pad|><|vision_end|>")
|
||||||
|
elif item.get("type") == "text":
|
||||||
|
parts.append(item.get("text", ""))
|
||||||
|
parts.append("<|im_end|>\n")
|
||||||
|
parts.append("<|im_start|>assistant\n")
|
||||||
|
return "".join(parts)
|
||||||
|
|
||||||
|
|
||||||
def _run_once(model, processor, ref_pil, gen_pil, axes, max_new_tokens, temperature):
|
def _run_once(model, processor, ref_pil, gen_pil, axes, max_new_tokens, temperature):
|
||||||
"""One forward pass; returns the raw decoded string."""
|
"""One forward pass; returns the raw decoded string."""
|
||||||
messages = [
|
messages = [
|
||||||
@@ -213,7 +258,11 @@ def _run_once(model, processor, ref_pil, gen_pil, axes, max_new_tokens, temperat
|
|||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
try:
|
||||||
|
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
||||||
|
except (ValueError, AttributeError):
|
||||||
|
# Processor/tokenizer carries no chat template -> build ChatML by hand.
|
||||||
|
text = _format_chatml_qwenvl(messages)
|
||||||
inputs = processor(text=[text], images=[ref_pil, gen_pil], return_tensors="pt")
|
inputs = processor(text=[text], images=[ref_pil, gen_pil], return_tensors="pt")
|
||||||
inputs = inputs.to(model.device)
|
inputs = inputs.to(model.device)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user