describe emits one canonical reference; compare can anchor on it

Describe mode now produces a single coherent, internally-consistent canonical scene description (paragraph + per-axis spec, written to canonical_reference in the report). Compare gains an optional reference_description input: when set, it anchors on that fixed text and shows only the generated image (no swap) — so the reference side never drifts or self-contradicts across iterations; only the generated image is re-described each turn. agent_bridge gains --ref-desc / --ref-desc-file (reads the describe report's canonical_reference). Docs + example workflow updated. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-26 23:22:57 +02:00
parent 53f1f9b9b4
commit 69c1d6deb4
6 changed files with 149 additions and 51 deletions
@@ -47,11 +47,13 @@ def _http_json(url: str, payload: dict | None = None, timeout: int = 30):
    return json.loads(body) if body else {}


-def _inject(graph: dict, prompt: str, negative: str, seed: int, run_tag: str, mode: str):
+def _inject(graph: dict, prompt: str, negative: str, seed: int, run_tag: str, mode: str,
+            reference_description: str = ""):
    """Set the receptor's prompt/seed and the judge's mode/run_tag in-place.

    compare mode needs a receptor (to inject the prompt). describe mode is the first
-    pass over the reference only, so no receptor is required."""
+    pass over the reference only, so no receptor is required. reference_description, if
+    given, anchors compare on the canonical reference text from the describe pass."""
    found_receptor = False
    for node in graph.values():
        ctype = node.get("class_type")
@@ -65,6 +67,8 @@ def _inject(graph: dict, prompt: str, negative: str, seed: int, run_tag: str, mo
            inputs["mode"] = mode
            inputs["run_tag"] = run_tag
            inputs["prompt_used"] = prompt
+            if reference_description:
+                inputs["reference_description"] = reference_description
    if mode == "compare" and not found_receptor:
        raise SystemExit(
            f"[agent_bridge] no '{RECEPTOR_CLASS}' node in the workflow — add the "
@@ -111,6 +115,10 @@ def main(argv=None):
    ap.add_argument("--negative", default="")
    ap.add_argument("--seed", type=int, default=0)
    ap.add_argument("--run-tag", default="")
+    ap.add_argument("--ref-desc", default="",
+                    help="canonical reference text to anchor compare on (from the describe pass)")
+    ap.add_argument("--ref-desc-file", default="",
+                    help="path to a describe report JSON; uses its canonical_reference to anchor compare")
    ap.add_argument("--analysis-file", default="",
                    help="explicit path to the report JSON the Judge writes")
    ap.add_argument("--analysis-dir", default="",
@@ -121,10 +129,16 @@ def main(argv=None):
    if args.mode == "compare" and not args.prompt:
        raise SystemExit("[agent_bridge] --prompt is required in compare mode.")

+    ref_desc = args.ref_desc
+    if args.ref_desc_file:
+        with open(args.ref_desc_file, "r", encoding="utf-8") as f:
+            rep = json.load(f)
+        ref_desc = rep.get("canonical_reference") or rep.get("caption") or ref_desc
+
    with open(args.workflow, "r", encoding="utf-8") as f:
        graph = json.load(f)

-    _inject(graph, args.prompt, args.negative, args.seed, args.run_tag, args.mode)
+    _inject(graph, args.prompt, args.negative, args.seed, args.run_tag, args.mode, ref_desc)

    client_id = uuid.uuid4().hex
    try: