From 8b567cb5311417d328f52397c5ff8b51fb0fa311 Mon Sep 17 00:00:00 2001
From: Ethanfel <ethan.fel@ts-pc.fr>
Date: Thu, 2 Jul 2026 02:09:36 +0200
Subject: [PATCH] chat mode: json_output toggle to return clean extracted JSON

For JSON-producing system prompts (e.g. LTX prompt-relay), json_output=true pulls
the JSON object out of the reply (strips reasoning/prose/code-fences via _parse_json,
which handles nested schemas and reasoning-then-JSON) and returns it re-serialized;
falls back to raw text if none parses. agent_bridge gains --json-output.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 README.md           |  3 +++
 agent_bridge.py     | 10 ++++++++--
 nodes/qwen_judge.py | 21 +++++++++++++++++----
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 8a8eff7..76dcbd7 100644
--- a/README.md
+++ b/README.md
@@ -58,6 +58,9 @@ node — feed an image (and optionally a second), write your own `system_prompt`
 and read the model's text from the `analysis` output. Reuses the same model dropdown, quant,
 and auto-download as the judge, so it's a one-node abliterated VLM for captioning, tagging,
 Q&A, prompt-from-image, etc. (CLI: `agent_bridge.py --mode chat --user-prompt "..."`).
+Set **`json_output=true`** for JSON-producing system prompts — it extracts the JSON object
+from the reply (stripping any reasoning, prose, or ```fences) and returns it clean and
+re-serialized (falls back to raw text if none parses). Works even with `enable_thinking` on.
 
 ## Performance / speed
 
diff --git a/agent_bridge.py b/agent_bridge.py
index 1471b07..c2e84e0 100644
--- a/agent_bridge.py
+++ b/agent_bridge.py
@@ -49,7 +49,8 @@ def _http_json(url: str, payload: dict | None = None, timeout: int = 30):
 
 def _inject(graph: dict, prompt: str, negative: str, seed: int, run_tag: str, mode: str,
             reference_description: str = "", profile: str = "", model_select: str = "",
-            model_path: str = "", system_prompt: str = "", user_prompt: str = ""):
+            model_path: str = "", system_prompt: str = "", user_prompt: str = "",
+            json_output: bool = False):
     """Set the receptor's prompt/seed and the judge's mode/run_tag in-place.
 
     compare mode needs a receptor (to inject the prompt). describe mode is the first
@@ -79,6 +80,8 @@ def _inject(graph: dict, prompt: str, negative: str, seed: int, run_tag: str, mo
                 inputs["system_prompt"] = system_prompt
             if user_prompt:
                 inputs["user_prompt"] = user_prompt
+            if json_output:
+                inputs["json_output"] = True
     if mode == "compare" and not found_receptor:
         raise SystemExit(
             f"[agent_bridge] no '{RECEPTOR_CLASS}' node in the workflow — add the "
@@ -123,6 +126,8 @@ def main(argv=None):
                     help="describe = first pass over the reference; chat = general VLM with your prompts")
     ap.add_argument("--system-prompt", default="", help="chat mode: system prompt")
     ap.add_argument("--user-prompt", default="", help="chat mode: user prompt over the image(s)")
+    ap.add_argument("--json-output", action="store_true",
+                    help="chat mode: extract & return clean JSON from the reply")
     ap.add_argument("--prompt", default="", help="generation prompt (required for compare)")
     ap.add_argument("--negative", default="")
     ap.add_argument("--seed", type=int, default=0)
@@ -155,7 +160,8 @@ def main(argv=None):
         graph = json.load(f)
 
     _inject(graph, args.prompt, args.negative, args.seed, args.run_tag, args.mode, ref_desc,
-            args.profile, args.model_select, args.model_path, args.system_prompt, args.user_prompt)
+            args.profile, args.model_select, args.model_path, args.system_prompt, args.user_prompt,
+            args.json_output)
 
     client_id = uuid.uuid4().hex
     try:
diff --git a/nodes/qwen_judge.py b/nodes/qwen_judge.py
index 2d3c4f7..9dbee58 100644
--- a/nodes/qwen_judge.py
+++ b/nodes/qwen_judge.py
@@ -825,6 +825,9 @@ class QwenVLImageJudge:
                 # Reasoning models (Qwen3.5/3.6) judge verdicts FAR better with thinking on
                 # (off -> they rubber-stamp 'match'). Costs more tokens; raise max_new_tokens.
                 "enable_thinking": ("BOOLEAN", {"default": True}),
+                # chat mode: extract the JSON object from the reply and return it clean
+                # (strips reasoning/prose/code-fences). Falls back to raw text if no JSON.
+                "json_output": ("BOOLEAN", {"default": False}),
                 "keep_loaded": ("BOOLEAN", {"default": True}),
                 "auto_download": ("BOOLEAN", {"default": True}),
                 # Small config values stay as typeable fields.
@@ -847,8 +850,8 @@ class QwenVLImageJudge:
 
     def judge(self, reference_image, mode, model_path, precision,
               max_new_tokens, temperature, swap_eval, profile="general",
-              enable_thinking=True, model_select=MANUAL_CHOICE, generated_image=None,
-              keep_loaded=True, auto_download=True,
+              enable_thinking=True, json_output=False, model_select=MANUAL_CHOICE,
+              generated_image=None, keep_loaded=True, auto_download=True,
               report_dir="", run_tag="", axes="", reference_description="",
               system_prompt="", user_prompt="Describe this image."):
         # `axes` overrides the profile when provided; otherwise use the profile's axis set.
@@ -889,7 +892,7 @@ class QwenVLImageJudge:
             gen_pil = _tensor_to_pil(generated_image) if generated_image is not None else None
             return self._chat(model, processor, ref_pil, gen_pil, system_prompt, user_prompt,
                               max_new_tokens, temperature, resolved_path, eff_precision,
-                              keep_loaded, report_dir, run_tag, enable_thinking)
+                              keep_loaded, report_dir, run_tag, enable_thinking, json_output)
 
         if mode == "describe":
             return self._describe(model, processor, ref_pil, axis_list, max_new_tokens,
@@ -950,7 +953,7 @@ class QwenVLImageJudge:
 
     def _chat(self, model, processor, ref_pil, gen_pil, system_prompt, user_prompt,
               max_new_tokens, temperature, resolved_path, precision, keep_loaded,
-              report_dir, run_tag, think=True):
+              report_dir, run_tag, think=True, json_output=False):
         """General-VLM mode: not a judge — just runs your prompt over the image(s)."""
         images = [ref_pil] + ([gen_pil] if gen_pil is not None else [])
         text = _run_chat(model, processor, images, system_prompt, user_prompt,
@@ -959,6 +962,16 @@ class QwenVLImageJudge:
             _MODEL_CACHE.pop((resolved_path, precision), None)
             del model
             torch.cuda.empty_cache()
+
+        # json_output: pull the JSON object out (strips reasoning/prose/```fences) and
+        # return it clean & re-serialized. Falls back to the raw text if none parses.
+        if json_output:
+            obj = _parse_json(text)
+            if obj is not None:
+                text = json.dumps(obj, ensure_ascii=False, indent=2)
+            else:
+                print("[QwenVLImageJudge] json_output: no JSON found, returning raw text.")
+
         report_path = _write_chat_report(report_dir, run_tag, system_prompt, user_prompt, text)
         return (1.0, "{}", text, text, report_path)