Add describe (first-pass) mode to the judge node

New mode on QwenVLImageJudge: 'describe' looks at the reference alone and returns
a prompt-ready caption + per-axis target spec to seed the very first prompt (the
generator has nothing to reproduce yet). 'compare' is the existing ref-vs-gen
scoring. generated_image is now optional (required only for compare); shared
generation refactored into _generate_from_messages; third output renamed
diff_analysis -> analysis (mode-agnostic). agent_bridge gains --mode (describe
needs no receptor/prompt); added workflow_describe_api.json. Docs updated with the
first-pass bootstrap step. Fixed error-return arity to 5-tuple.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-26 23:04:09 +02:00
parent 959ec70065
commit c7ef756a71
6 changed files with 211 additions and 47 deletions
+14 -5
View File
@@ -47,8 +47,11 @@ def _http_json(url: str, payload: dict | None = None, timeout: int = 30):
return json.loads(body) if body else {}
def _inject(graph: dict, prompt: str, negative: str, seed: int, run_tag: str):
"""Set the receptor's prompt/negative/seed and the judge's run_tag in-place."""
def _inject(graph: dict, prompt: str, negative: str, seed: int, run_tag: str, mode: str):
"""Set the receptor's prompt/seed and the judge's mode/run_tag in-place.
compare mode needs a receptor (to inject the prompt). describe mode is the first
pass over the reference only, so no receptor is required."""
found_receptor = False
for node in graph.values():
ctype = node.get("class_type")
@@ -59,9 +62,10 @@ def _inject(graph: dict, prompt: str, negative: str, seed: int, run_tag: str):
inputs["seed"] = int(seed)
found_receptor = True
elif ctype == JUDGE_CLASS:
inputs["mode"] = mode
inputs["run_tag"] = run_tag
inputs["prompt_used"] = prompt
if not found_receptor:
if mode == "compare" and not found_receptor:
raise SystemExit(
f"[agent_bridge] no '{RECEPTOR_CLASS}' node in the workflow — add the "
f"'SxCP External Prompt (Receptor)' node and feed the sampler from it.")
@@ -101,7 +105,9 @@ def main(argv=None):
ap = argparse.ArgumentParser(description="Drive one ComfyUI calibration iteration.")
ap.add_argument("--server", default="127.0.0.1:8188")
ap.add_argument("--workflow", required=True, help="API-format workflow JSON")
ap.add_argument("--prompt", required=True)
ap.add_argument("--mode", choices=["compare", "describe"], default="compare",
help="describe = first pass over the reference only (no prompt needed)")
ap.add_argument("--prompt", default="", help="generation prompt (required for compare)")
ap.add_argument("--negative", default="")
ap.add_argument("--seed", type=int, default=0)
ap.add_argument("--run-tag", default="")
@@ -112,10 +118,13 @@ def main(argv=None):
ap.add_argument("--timeout", type=int, default=600)
args = ap.parse_args(argv)
if args.mode == "compare" and not args.prompt:
raise SystemExit("[agent_bridge] --prompt is required in compare mode.")
with open(args.workflow, "r", encoding="utf-8") as f:
graph = json.load(f)
_inject(graph, args.prompt, args.negative, args.seed, args.run_tag)
_inject(graph, args.prompt, args.negative, args.seed, args.run_tag, args.mode)
client_id = uuid.uuid4().hex
try: