Add describe (first-pass) mode to the judge node

New mode on QwenVLImageJudge: 'describe' looks at the reference alone and returns a prompt-ready caption + per-axis target spec to seed the very first prompt (the generator has nothing to reproduce yet). 'compare' is the existing ref-vs-gen scoring. generated_image is now optional (required only for compare); shared generation refactored into _generate_from_messages; third output renamed diff_analysis -> analysis (mode-agnostic). agent_bridge gains --mode (describe needs no receptor/prompt); added workflow_describe_api.json. Docs updated with the first-pass bootstrap step. Fixed error-return arity to 5-tuple. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-26 23:04:09 +02:00
parent 959ec70065
commit c7ef756a71
6 changed files with 211 additions and 47 deletions
@@ -47,8 +47,11 @@ def _http_json(url: str, payload: dict | None = None, timeout: int = 30):
    return json.loads(body) if body else {}


-def _inject(graph: dict, prompt: str, negative: str, seed: int, run_tag: str):
-    """Set the receptor's prompt/negative/seed and the judge's run_tag in-place."""
+def _inject(graph: dict, prompt: str, negative: str, seed: int, run_tag: str, mode: str):
+    """Set the receptor's prompt/seed and the judge's mode/run_tag in-place.
+
+    compare mode needs a receptor (to inject the prompt). describe mode is the first
+    pass over the reference only, so no receptor is required."""
    found_receptor = False
    for node in graph.values():
        ctype = node.get("class_type")
@@ -59,9 +62,10 @@ def _inject(graph: dict, prompt: str, negative: str, seed: int, run_tag: str):
            inputs["seed"] = int(seed)
            found_receptor = True
        elif ctype == JUDGE_CLASS:
+            inputs["mode"] = mode
            inputs["run_tag"] = run_tag
            inputs["prompt_used"] = prompt
-    if not found_receptor:
+    if mode == "compare" and not found_receptor:
        raise SystemExit(
            f"[agent_bridge] no '{RECEPTOR_CLASS}' node in the workflow — add the "
            f"'SxCP External Prompt (Receptor)' node and feed the sampler from it.")
@@ -101,7 +105,9 @@ def main(argv=None):
    ap = argparse.ArgumentParser(description="Drive one ComfyUI calibration iteration.")
    ap.add_argument("--server", default="127.0.0.1:8188")
    ap.add_argument("--workflow", required=True, help="API-format workflow JSON")
-    ap.add_argument("--prompt", required=True)
+    ap.add_argument("--mode", choices=["compare", "describe"], default="compare",
+                    help="describe = first pass over the reference only (no prompt needed)")
+    ap.add_argument("--prompt", default="", help="generation prompt (required for compare)")
    ap.add_argument("--negative", default="")
    ap.add_argument("--seed", type=int, default=0)
    ap.add_argument("--run-tag", default="")
@@ -112,10 +118,13 @@ def main(argv=None):
    ap.add_argument("--timeout", type=int, default=600)
    args = ap.parse_args(argv)

+    if args.mode == "compare" and not args.prompt:
+        raise SystemExit("[agent_bridge] --prompt is required in compare mode.")
+
    with open(args.workflow, "r", encoding="utf-8") as f:
        graph = json.load(f)

-    _inject(graph, args.prompt, args.negative, args.seed, args.run_tag)
+    _inject(graph, args.prompt, args.negative, args.seed, args.run_tag, args.mode)

    client_id = uuid.uuid4().hex
    try: