From 49d130467b420dda2f4f74cbea7dbc424a91d893 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Mon, 29 Jun 2026 09:32:36 +0200 Subject: [PATCH] Show eval template commands in Krea2 report --- docs/sxcp-eval-loop.md | 4 +++- krea2_tuning_report.py | 20 ++++++++++++++++++++ tools/prompt_smoke.py | 20 ++++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/docs/sxcp-eval-loop.md b/docs/sxcp-eval-loop.md index 2e858bf..8f77128 100644 --- a/docs/sxcp-eval-loop.md +++ b/docs/sxcp-eval-loop.md @@ -81,7 +81,9 @@ python tools/krea2_tuning_report.py The report includes atlas references plus prompt cues and avoid cues for the next fixed-seed test candidate. It also shows the latest durable evidence for variants that already have fixed-seed results, including the evidence id, seed, -decision, candidate prompt summary, and observation. +decision, candidate prompt summary, and observation. For each normal next-test +candidate, it prints a `krea2_record_eval.py --print-template` command; replace +`` with the seed from the run you are recording. ## Optional Command Hook diff --git a/krea2_tuning_report.py b/krea2_tuning_report.py index a1cb51a..0374600 100644 --- a/krea2_tuning_report.py +++ b/krea2_tuning_report.py @@ -228,6 +228,21 @@ def next_test_plans() -> list[dict[str, Any]]: return plans +def next_eval_template_commands(*, seed_token: str = "") -> list[dict[str, str]]: + commands: list[dict[str, str]] = [] + for plan in next_test_plans(): + key = str(plan.get("key") or "") + if not key: + continue + commands.append( + { + "key": key, + "command": f"python tools/krea2_record_eval.py --print-template --variant-key {key} --seed {seed_token}", + } + ) + return commands + + def markdown_report(atlas_root: str | Path | None = None) -> str: lines = [ "# Krea2 Pose Variant Coverage", @@ -264,6 +279,11 @@ def markdown_report(atlas_root: str | Path | None = None) -> str: *[f"- {key}" for key in summary["next_test_candidates"]], ] ) + template_commands = next_eval_template_commands() + if template_commands: + lines.extend(["", "## Eval Entry Template Commands", ""]) + for command in template_commands: + lines.append(f"- {command['key']}: `{command['command']}`") stronger_control_rows = [row for row in coverage_rows() if row.get("coverage_state") == "needs_stronger_control"] if stronger_control_rows: lines.extend(["", "## Stronger Control Cases", ""]) diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index 31d5b13..58850f7 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -7195,6 +7195,23 @@ def smoke_krea2_tuning_report_policy() -> None: "pov_sixty_nine_close_reversed_oral" not in [plan.get("key") for plan in plans], "Unstable sixty-nine route should not be queued as a normal fixed-seed candidate", ) + template_commands = krea2_tuning_report.next_eval_template_commands(seed_token="$SEED") + _expect( + [command.get("key") for command in template_commands] + == [plan.get("key") for plan in plans], + "Krea2 eval template commands should follow normal next-test candidates", + ) + first_template_command = template_commands[0] + _expect(first_template_command.get("key") == "pov_ballsucking_low_head", "First eval template command should target first next-test variant") + _expect( + "python tools/krea2_record_eval.py --print-template" in str(first_template_command.get("command") or ""), + "Krea2 eval template command should use the validated recorder", + ) + _expect("--seed $SEED" in str(first_template_command.get("command") or ""), "Krea2 eval template command should preserve seed placeholder") + _expect( + "pov_sixty_nine_close_reversed_oral" not in " ".join(str(command.get("command") or "") for command in template_commands), + "Krea2 eval template commands should exclude low-priority stronger-control routes", + ) _expect( [plan.get("key") for plan in plans] == [ @@ -7417,6 +7434,9 @@ def smoke_krea2_tuning_report_policy() -> None: _expect("hardest" in markdown, "Krea2 tuning report markdown lost hardest-route marker") _expect("low priority" in markdown, "Krea2 tuning report markdown lost low-priority marker") _expect("pose_or_image_guidance_first" in markdown, "Krea2 tuning report markdown lost control-first marker") + _expect("## Eval Entry Template Commands" in markdown, "Krea2 tuning report markdown lost eval template command section") + _expect("python tools/krea2_record_eval.py --print-template" in markdown, "Krea2 tuning report markdown lost recorder template command") + _expect("--seed " in markdown, "Krea2 tuning report markdown lost fixed-seed placeholder") _expect("pov_ballsucking_low_head" in markdown, "Krea2 tuning report markdown lost candidate variant") _expect("pov_footjob_frontal_sole_stroke" in markdown, "Krea2 tuning report markdown lost footjob candidate variant") _expect("pov_fingering_reclined_open_thighs" in markdown, "Krea2 tuning report markdown lost fingering candidate variant")