Add route simulation quality summary

This commit is contained in:
2026-06-27 20:08:11 +02:00
parent eb1bdbf305
commit de6615c024
4 changed files with 192 additions and 0 deletions
+16
View File
@@ -7926,11 +7926,20 @@ def smoke_seed_config_policy() -> None:
def smoke_prompt_route_simulation_policy() -> None:
report = prompt_route_simulation.run_simulation(seed=3901, include_prompts=False)
summary = report.get("summary") or {}
quality = report.get("quality") or {}
_expect(summary.get("cases") == 14, "Prompt route simulation case count changed unexpectedly")
_expect(summary.get("coverage_checks") == 2, "Prompt route simulation lost family coverage checks")
_expect(summary.get("axis_checks") == 6, "Prompt route simulation lost axis check coverage")
_expect(summary.get("pair_seed_checks") == 7, "Prompt route simulation lost pair seed check coverage")
_expect(summary.get("issues") == 0, f"Prompt route simulation reported issues: {report.get('issues')}")
_expect(quality.get("route_cases") == 14, "Prompt route simulation quality summary lost route case count")
_expect(quality.get("route_issues") == 0, f"Prompt route simulation quality reported route issues: {quality}")
_expect(quality.get("check_issues") == 0, f"Prompt route simulation quality reported check issues: {quality}")
_expect((quality.get("targets") or {}).get("single", {}).get("cases") == 10, "Prompt route simulation quality lost single target count")
_expect((quality.get("targets") or {}).get("softcore", {}).get("cases") == 2, "Prompt route simulation quality lost softcore target count")
_expect((quality.get("targets") or {}).get("hardcore", {}).get("cases") == 2, "Prompt route simulation quality lost hardcore target count")
_expect(not quality.get("issue_buckets"), "Prompt route simulation quality should have no issue buckets on clean baseline")
_expect(not quality.get("weakest_cases"), "Prompt route simulation quality should have no weak cases on clean baseline")
cases = {case.get("name"): case for case in report.get("cases") or []}
for route_name in (
"hardcore.single.oral",
@@ -8027,10 +8036,17 @@ def smoke_prompt_route_simulation_policy() -> None:
)
sweep = prompt_route_simulation.run_simulation_sweep(seed=3901, count=3, seed_step=101, include_prompts=False)
sweep_summary = sweep.get("summary") or {}
sweep_quality = sweep.get("quality") or {}
_expect(sweep_summary.get("runs") == 3, "Prompt route simulation sweep lost run coverage")
_expect(sweep_summary.get("seeds") == [3901, 4002, 4103], "Prompt route simulation sweep seed sequence changed")
_expect(sweep_summary.get("cases") == 42, "Prompt route simulation sweep case count changed")
_expect(sweep_summary.get("issues") == 0, f"Prompt route simulation sweep reported issues: {sweep.get('issues')}")
_expect(sweep_quality.get("route_cases") == 42, "Prompt route simulation sweep quality lost route case count")
_expect(sweep_quality.get("route_issues") == 0, f"Prompt route simulation sweep quality reported route issues: {sweep_quality}")
_expect(sweep_quality.get("check_issues") == 0, f"Prompt route simulation sweep quality reported check issues: {sweep_quality}")
_expect((sweep_quality.get("targets") or {}).get("single", {}).get("cases") == 30, "Prompt route simulation sweep quality lost single target count")
_expect((sweep_quality.get("targets") or {}).get("softcore", {}).get("cases") == 6, "Prompt route simulation sweep quality lost softcore target count")
_expect((sweep_quality.get("targets") or {}).get("hardcore", {}).get("cases") == 6, "Prompt route simulation sweep quality lost hardcore target count")
def smoke_node_camera_registration() -> None: