Show eval template commands in Krea2 report
This commit is contained in:
@@ -81,7 +81,9 @@ python tools/krea2_tuning_report.py
|
|||||||
The report includes atlas references plus prompt cues and avoid cues for the
|
The report includes atlas references plus prompt cues and avoid cues for the
|
||||||
next fixed-seed test candidate. It also shows the latest durable evidence for
|
next fixed-seed test candidate. It also shows the latest durable evidence for
|
||||||
variants that already have fixed-seed results, including the evidence id, seed,
|
variants that already have fixed-seed results, including the evidence id, seed,
|
||||||
decision, candidate prompt summary, and observation.
|
decision, candidate prompt summary, and observation. For each normal next-test
|
||||||
|
candidate, it prints a `krea2_record_eval.py --print-template` command; replace
|
||||||
|
`<fixed_seed>` with the seed from the run you are recording.
|
||||||
|
|
||||||
## Optional Command Hook
|
## Optional Command Hook
|
||||||
|
|
||||||
|
|||||||
@@ -228,6 +228,21 @@ def next_test_plans() -> list[dict[str, Any]]:
|
|||||||
return plans
|
return plans
|
||||||
|
|
||||||
|
|
||||||
|
def next_eval_template_commands(*, seed_token: str = "<fixed_seed>") -> list[dict[str, str]]:
|
||||||
|
commands: list[dict[str, str]] = []
|
||||||
|
for plan in next_test_plans():
|
||||||
|
key = str(plan.get("key") or "")
|
||||||
|
if not key:
|
||||||
|
continue
|
||||||
|
commands.append(
|
||||||
|
{
|
||||||
|
"key": key,
|
||||||
|
"command": f"python tools/krea2_record_eval.py --print-template --variant-key {key} --seed {seed_token}",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return commands
|
||||||
|
|
||||||
|
|
||||||
def markdown_report(atlas_root: str | Path | None = None) -> str:
|
def markdown_report(atlas_root: str | Path | None = None) -> str:
|
||||||
lines = [
|
lines = [
|
||||||
"# Krea2 Pose Variant Coverage",
|
"# Krea2 Pose Variant Coverage",
|
||||||
@@ -264,6 +279,11 @@ def markdown_report(atlas_root: str | Path | None = None) -> str:
|
|||||||
*[f"- {key}" for key in summary["next_test_candidates"]],
|
*[f"- {key}" for key in summary["next_test_candidates"]],
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
template_commands = next_eval_template_commands()
|
||||||
|
if template_commands:
|
||||||
|
lines.extend(["", "## Eval Entry Template Commands", ""])
|
||||||
|
for command in template_commands:
|
||||||
|
lines.append(f"- {command['key']}: `{command['command']}`")
|
||||||
stronger_control_rows = [row for row in coverage_rows() if row.get("coverage_state") == "needs_stronger_control"]
|
stronger_control_rows = [row for row in coverage_rows() if row.get("coverage_state") == "needs_stronger_control"]
|
||||||
if stronger_control_rows:
|
if stronger_control_rows:
|
||||||
lines.extend(["", "## Stronger Control Cases", ""])
|
lines.extend(["", "## Stronger Control Cases", ""])
|
||||||
|
|||||||
@@ -7195,6 +7195,23 @@ def smoke_krea2_tuning_report_policy() -> None:
|
|||||||
"pov_sixty_nine_close_reversed_oral" not in [plan.get("key") for plan in plans],
|
"pov_sixty_nine_close_reversed_oral" not in [plan.get("key") for plan in plans],
|
||||||
"Unstable sixty-nine route should not be queued as a normal fixed-seed candidate",
|
"Unstable sixty-nine route should not be queued as a normal fixed-seed candidate",
|
||||||
)
|
)
|
||||||
|
template_commands = krea2_tuning_report.next_eval_template_commands(seed_token="$SEED")
|
||||||
|
_expect(
|
||||||
|
[command.get("key") for command in template_commands]
|
||||||
|
== [plan.get("key") for plan in plans],
|
||||||
|
"Krea2 eval template commands should follow normal next-test candidates",
|
||||||
|
)
|
||||||
|
first_template_command = template_commands[0]
|
||||||
|
_expect(first_template_command.get("key") == "pov_ballsucking_low_head", "First eval template command should target first next-test variant")
|
||||||
|
_expect(
|
||||||
|
"python tools/krea2_record_eval.py --print-template" in str(first_template_command.get("command") or ""),
|
||||||
|
"Krea2 eval template command should use the validated recorder",
|
||||||
|
)
|
||||||
|
_expect("--seed $SEED" in str(first_template_command.get("command") or ""), "Krea2 eval template command should preserve seed placeholder")
|
||||||
|
_expect(
|
||||||
|
"pov_sixty_nine_close_reversed_oral" not in " ".join(str(command.get("command") or "") for command in template_commands),
|
||||||
|
"Krea2 eval template commands should exclude low-priority stronger-control routes",
|
||||||
|
)
|
||||||
_expect(
|
_expect(
|
||||||
[plan.get("key") for plan in plans]
|
[plan.get("key") for plan in plans]
|
||||||
== [
|
== [
|
||||||
@@ -7417,6 +7434,9 @@ def smoke_krea2_tuning_report_policy() -> None:
|
|||||||
_expect("hardest" in markdown, "Krea2 tuning report markdown lost hardest-route marker")
|
_expect("hardest" in markdown, "Krea2 tuning report markdown lost hardest-route marker")
|
||||||
_expect("low priority" in markdown, "Krea2 tuning report markdown lost low-priority marker")
|
_expect("low priority" in markdown, "Krea2 tuning report markdown lost low-priority marker")
|
||||||
_expect("pose_or_image_guidance_first" in markdown, "Krea2 tuning report markdown lost control-first marker")
|
_expect("pose_or_image_guidance_first" in markdown, "Krea2 tuning report markdown lost control-first marker")
|
||||||
|
_expect("## Eval Entry Template Commands" in markdown, "Krea2 tuning report markdown lost eval template command section")
|
||||||
|
_expect("python tools/krea2_record_eval.py --print-template" in markdown, "Krea2 tuning report markdown lost recorder template command")
|
||||||
|
_expect("--seed <fixed_seed>" in markdown, "Krea2 tuning report markdown lost fixed-seed placeholder")
|
||||||
_expect("pov_ballsucking_low_head" in markdown, "Krea2 tuning report markdown lost candidate variant")
|
_expect("pov_ballsucking_low_head" in markdown, "Krea2 tuning report markdown lost candidate variant")
|
||||||
_expect("pov_footjob_frontal_sole_stroke" in markdown, "Krea2 tuning report markdown lost footjob candidate variant")
|
_expect("pov_footjob_frontal_sole_stroke" in markdown, "Krea2 tuning report markdown lost footjob candidate variant")
|
||||||
_expect("pov_fingering_reclined_open_thighs" in markdown, "Krea2 tuning report markdown lost fingering candidate variant")
|
_expect("pov_fingering_reclined_open_thighs" in markdown, "Krea2 tuning report markdown lost fingering candidate variant")
|
||||||
|
|||||||
Reference in New Issue
Block a user