diff --git a/nodes/qwen_judge.py b/nodes/qwen_judge.py index c3beb18..b82f591 100644 --- a/nodes/qwen_judge.py +++ b/nodes/qwen_judge.py @@ -90,8 +90,8 @@ def _resolve_model_source(model_path: str, auto_download: bool) -> str: local = snapshot_download( repo_id=model_path, local_dir=target, - # weights + processor/tokenizer/config; skip duplicate GGUF/onnx blobs. - allow_patterns=["*.json", "*.safetensors", "*.txt", "*.model", "merges.txt", "*.py"], + # weights + processor/tokenizer/config/template; skip duplicate GGUF/onnx blobs. + allow_patterns=["*.json", "*.jinja", "*.safetensors", "*.txt", "*.model", "merges.txt", "*.py"], ) print(f"[QwenVLImageJudge] download complete: {local}") return local @@ -168,10 +168,33 @@ def _load_model(model_path: str, precision: str): model = _VLModel.from_pretrained(model_path, **load_kwargs) model.eval() processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True) + _ensure_chat_template(processor, model_path) _MODEL_CACHE[key] = (model, processor) return model, processor +def _ensure_chat_template(processor, model_path: str): + """Some ComfyUI-converted checkpoints ship the template as chat_template.jinja + (or only on the tokenizer), which AutoProcessor doesn't always pick up. Backfill + processor.chat_template from those sources so apply_chat_template works.""" + if getattr(processor, "chat_template", None): + return + for fn in ("chat_template.jinja", "chat_template.json"): + fp = os.path.join(model_path, fn) + if os.path.isfile(fp): + try: + with open(fp, "r", encoding="utf-8") as f: + raw = f.read() + processor.chat_template = json.loads(raw).get("chat_template") if fn.endswith(".json") else raw + if processor.chat_template: + return + except (OSError, ValueError): + pass + tok = getattr(processor, "tokenizer", None) + if tok is not None and getattr(tok, "chat_template", None): + processor.chat_template = tok.chat_template + + def _build_system_prompt(axes: list[str]) -> str: axis_lines = "\n".join(f' "{a}": {{"score": <0..1>, "diff": ""}},' for a in axes) return ( @@ -197,6 +220,28 @@ def _build_system_prompt(axes: list[str]) -> str: ) +def _format_chatml_qwenvl(messages): + """Manual Qwen-VL ChatML prompt, used when the processor has no chat template + (e.g. checkpoints converted for ComfyUI that drop chat_template.json). Mirrors + apply_chat_template: each image -> <|vision_start|><|image_pad|><|vision_end|>, + which the processor then expands to the right number of image tokens.""" + parts = [] + for msg in messages: + parts.append(f"<|im_start|>{msg['role']}\n") + content = msg["content"] + if isinstance(content, str): + parts.append(content) + else: + for item in content: + if item.get("type") == "image": + parts.append("<|vision_start|><|image_pad|><|vision_end|>") + elif item.get("type") == "text": + parts.append(item.get("text", "")) + parts.append("<|im_end|>\n") + parts.append("<|im_start|>assistant\n") + return "".join(parts) + + def _run_once(model, processor, ref_pil, gen_pil, axes, max_new_tokens, temperature): """One forward pass; returns the raw decoded string.""" messages = [ @@ -213,7 +258,11 @@ def _run_once(model, processor, ref_pil, gen_pil, axes, max_new_tokens, temperat }, ] - text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + try: + text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) + except (ValueError, AttributeError): + # Processor/tokenizer carries no chat template -> build ChatML by hand. + text = _format_chatml_qwenvl(messages) inputs = processor(text=[text], images=[ref_pil, gen_pil], return_tensors="pt") inputs = inputs.to(model.device)