chat mode: json_output toggle to return clean extracted JSON
For JSON-producing system prompts (e.g. LTX prompt-relay), json_output=true pulls the JSON object out of the reply (strips reasoning/prose/code-fences via _parse_json, which handles nested schemas and reasoning-then-JSON) and returns it re-serialized; falls back to raw text if none parses. agent_bridge gains --json-output. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
+17
-4
@@ -825,6 +825,9 @@ class QwenVLImageJudge:
|
||||
# Reasoning models (Qwen3.5/3.6) judge verdicts FAR better with thinking on
|
||||
# (off -> they rubber-stamp 'match'). Costs more tokens; raise max_new_tokens.
|
||||
"enable_thinking": ("BOOLEAN", {"default": True}),
|
||||
# chat mode: extract the JSON object from the reply and return it clean
|
||||
# (strips reasoning/prose/code-fences). Falls back to raw text if no JSON.
|
||||
"json_output": ("BOOLEAN", {"default": False}),
|
||||
"keep_loaded": ("BOOLEAN", {"default": True}),
|
||||
"auto_download": ("BOOLEAN", {"default": True}),
|
||||
# Small config values stay as typeable fields.
|
||||
@@ -847,8 +850,8 @@ class QwenVLImageJudge:
|
||||
|
||||
def judge(self, reference_image, mode, model_path, precision,
|
||||
max_new_tokens, temperature, swap_eval, profile="general",
|
||||
enable_thinking=True, model_select=MANUAL_CHOICE, generated_image=None,
|
||||
keep_loaded=True, auto_download=True,
|
||||
enable_thinking=True, json_output=False, model_select=MANUAL_CHOICE,
|
||||
generated_image=None, keep_loaded=True, auto_download=True,
|
||||
report_dir="", run_tag="", axes="", reference_description="",
|
||||
system_prompt="", user_prompt="Describe this image."):
|
||||
# `axes` overrides the profile when provided; otherwise use the profile's axis set.
|
||||
@@ -889,7 +892,7 @@ class QwenVLImageJudge:
|
||||
gen_pil = _tensor_to_pil(generated_image) if generated_image is not None else None
|
||||
return self._chat(model, processor, ref_pil, gen_pil, system_prompt, user_prompt,
|
||||
max_new_tokens, temperature, resolved_path, eff_precision,
|
||||
keep_loaded, report_dir, run_tag, enable_thinking)
|
||||
keep_loaded, report_dir, run_tag, enable_thinking, json_output)
|
||||
|
||||
if mode == "describe":
|
||||
return self._describe(model, processor, ref_pil, axis_list, max_new_tokens,
|
||||
@@ -950,7 +953,7 @@ class QwenVLImageJudge:
|
||||
|
||||
def _chat(self, model, processor, ref_pil, gen_pil, system_prompt, user_prompt,
|
||||
max_new_tokens, temperature, resolved_path, precision, keep_loaded,
|
||||
report_dir, run_tag, think=True):
|
||||
report_dir, run_tag, think=True, json_output=False):
|
||||
"""General-VLM mode: not a judge — just runs your prompt over the image(s)."""
|
||||
images = [ref_pil] + ([gen_pil] if gen_pil is not None else [])
|
||||
text = _run_chat(model, processor, images, system_prompt, user_prompt,
|
||||
@@ -959,6 +962,16 @@ class QwenVLImageJudge:
|
||||
_MODEL_CACHE.pop((resolved_path, precision), None)
|
||||
del model
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
# json_output: pull the JSON object out (strips reasoning/prose/```fences) and
|
||||
# return it clean & re-serialized. Falls back to the raw text if none parses.
|
||||
if json_output:
|
||||
obj = _parse_json(text)
|
||||
if obj is not None:
|
||||
text = json.dumps(obj, ensure_ascii=False, indent=2)
|
||||
else:
|
||||
print("[QwenVLImageJudge] json_output: no JSON found, returning raw text.")
|
||||
|
||||
report_path = _write_chat_report(report_dir, run_tag, system_prompt, user_prompt, text)
|
||||
return (1.0, "{}", text, text, report_path)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user