Add Krea2 tuning coverage report

This commit is contained in:
2026-06-29 03:46:42 +02:00
parent fae5423513
commit 333f4752f6
4 changed files with 148 additions and 2 deletions
+7
View File
@@ -43,6 +43,13 @@ should remain tied to a catalog variant. Image paths in that log point at
external ComfyUI artifacts and may be cleaned; the durable evidence is the fixed external ComfyUI artifacts and may be cleaned; the durable evidence is the fixed
seed, prompt summaries, observation, decision, and commit. seed, prompt summaries, observation, decision, and commit.
To see catalog coverage and the next variants that still need controlled
testing, run:
```bash
python tools/krea2_tuning_report.py
```
## Optional Command Hook ## Optional Command Hook
If you have a one-shot Codex command you want to run automatically, set: If you have a one-shot Codex command you want to run automatically, set:
+91
View File
@@ -0,0 +1,91 @@
from __future__ import annotations
from collections import Counter
from typing import Any
try:
from . import krea2_eval_log, krea2_pose_variant_catalog
except ImportError: # Allows local smoke tests from the repository root.
import krea2_eval_log
import krea2_pose_variant_catalog
def _coverage_state(status: str, accepted_count: int) -> str:
if status == "proven" and accepted_count > 0:
return "proven_with_evidence"
if status == "proven":
return "proven_missing_evidence"
if status == "candidate" and accepted_count == 0:
return "needs_fixed_seed_tests"
if status == "unstable":
return "needs_stronger_control"
return "tracked"
def coverage_rows() -> list[dict[str, Any]]:
rows: list[dict[str, Any]] = []
for variant in krea2_pose_variant_catalog.variants():
key = str(variant.get("key") or "")
evidence = krea2_eval_log.entries_for_variant(key)
accepted = [entry for entry in evidence if entry.get("result") == "accepted"]
status = str(variant.get("status") or "")
rows.append(
{
"key": key,
"family": variant.get("family") or "",
"action_family": variant.get("action_family") or "",
"status": status,
"coverage_state": _coverage_state(status, len(accepted)),
"accepted_evidence_count": len(accepted),
"total_evidence_count": len(evidence),
"reference_count": len(variant.get("reference_images") or []),
"guide_section": (variant.get("evidence") or {}).get("guide_section", ""),
}
)
return rows
def coverage_summary() -> dict[str, Any]:
rows = coverage_rows()
status_counts = Counter(row.get("status") for row in rows)
state_counts = Counter(row.get("coverage_state") for row in rows)
return {
"variant_count": len(rows),
"status_counts": dict(status_counts),
"coverage_state_counts": dict(state_counts),
"variants_without_accepted_evidence": [
str(row.get("key"))
for row in rows
if int(row.get("accepted_evidence_count") or 0) == 0
],
"next_test_candidates": [
str(row.get("key"))
for row in rows
if row.get("coverage_state") in {"needs_fixed_seed_tests", "proven_missing_evidence"}
],
}
def markdown_report() -> str:
lines = [
"# Krea2 Pose Variant Coverage",
"",
"| Variant | Status | Evidence | State |",
"| --- | --- | ---: | --- |",
]
for row in coverage_rows():
lines.append(
f"| {row['key']} | {row['status']} | {row['accepted_evidence_count']}/{row['total_evidence_count']} | {row['coverage_state']} |"
)
summary = coverage_summary()
if summary["next_test_candidates"]:
lines.extend(
[
"",
"## Next Fixed-Seed Tests",
"",
*[f"- {key}" for key in summary["next_test_candidates"]],
]
)
return "\n".join(lines)
+22
View File
@@ -0,0 +1,22 @@
#!/usr/bin/env python3
from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) in sys.path:
sys.path.remove(str(ROOT))
sys.path.insert(0, str(ROOT))
import krea2_tuning_report # noqa: E402
def main() -> int:
print(krea2_tuning_report.markdown_report())
return 0
if __name__ == "__main__":
raise SystemExit(main())
+27 -1
View File
@@ -21,7 +21,8 @@ from typing import Any, Callable
ROOT = Path(__file__).resolve().parents[1] ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path: if str(ROOT) in sys.path:
sys.path.remove(str(ROOT))
sys.path.insert(0, str(ROOT)) sys.path.insert(0, str(ROOT))
import caption_naturalizer # noqa: E402 import caption_naturalizer # noqa: E402
@@ -63,6 +64,7 @@ import krea_normal_formatter # noqa: E402
import krea_pair_formatter # noqa: E402 import krea_pair_formatter # noqa: E402
import krea2_eval_log # noqa: E402 import krea2_eval_log # noqa: E402
import krea2_pose_variant_catalog # noqa: E402 import krea2_pose_variant_catalog # noqa: E402
import krea2_tuning_report # noqa: E402
import krea_row_fields # noqa: E402 import krea_row_fields # noqa: E402
import location_config # noqa: E402 import location_config # noqa: E402
import loop_nodes # noqa: E402 import loop_nodes # noqa: E402
@@ -6843,6 +6845,29 @@ def smoke_krea2_eval_log_policy() -> None:
_expect(clean.get("observation") != "mutation should not leak", "Krea2 eval log leaked caller mutation") _expect(clean.get("observation") != "mutation should not leak", "Krea2 eval log leaked caller mutation")
def smoke_krea2_tuning_report_policy() -> None:
rows = krea2_tuning_report.coverage_rows()
catalog_keys = krea2_pose_variant_catalog.variant_keys()
_expect([row.get("key") for row in rows] == catalog_keys, "Krea2 tuning report row order should follow catalog order")
by_key = {row.get("key"): row for row in rows}
boobjob = by_key.get("pov_boobjob_upright_cleavage") or {}
_expect(boobjob.get("coverage_state") == "proven_with_evidence", "Boobjob report should be proven with evidence")
_expect(boobjob.get("accepted_evidence_count", 0) >= 1, "Boobjob report lost accepted evidence count")
ballsucking = by_key.get("pov_ballsucking_low_head") or {}
_expect(ballsucking.get("coverage_state") == "needs_fixed_seed_tests", "Ballsucking report should need fixed-seed tests")
_expect(ballsucking.get("accepted_evidence_count") == 0, "Ballsucking report should not have accepted evidence yet")
summary = krea2_tuning_report.coverage_summary()
_expect(summary.get("status_counts", {}).get("proven") == 3, "Krea2 tuning report proven count changed")
_expect(summary.get("status_counts", {}).get("candidate") == 1, "Krea2 tuning report candidate count changed")
_expect(
summary.get("variants_without_accepted_evidence") == ["pov_ballsucking_low_head"],
f"Krea2 tuning report missing-evidence set changed: {summary.get('variants_without_accepted_evidence')}",
)
markdown = krea2_tuning_report.markdown_report()
_expect("pov_ballsucking_low_head" in markdown, "Krea2 tuning report markdown lost candidate variant")
_expect("needs_fixed_seed_tests" in markdown, "Krea2 tuning report markdown lost coverage state")
def smoke_krea_pov_penetration_route() -> None: def smoke_krea_pov_penetration_route() -> None:
pair = pb.build_insta_of_pair( pair = pb.build_insta_of_pair(
row_number=1, row_number=1,
@@ -9849,6 +9874,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
("krea2_pov_pose_variant_catalog", smoke_krea2_pov_pose_variant_catalog), ("krea2_pov_pose_variant_catalog", smoke_krea2_pov_pose_variant_catalog),
("krea2_pose_variant_catalog_policy", smoke_krea2_pose_variant_catalog_policy), ("krea2_pose_variant_catalog_policy", smoke_krea2_pose_variant_catalog_policy),
("krea2_eval_log_policy", smoke_krea2_eval_log_policy), ("krea2_eval_log_policy", smoke_krea2_eval_log_policy),
("krea2_tuning_report_policy", smoke_krea2_tuning_report_policy),
("krea_pov_penetration_route", smoke_krea_pov_penetration_route), ("krea_pov_penetration_route", smoke_krea_pov_penetration_route),
("pov_outercourse_position_routes", smoke_pov_outercourse_position_routes), ("pov_outercourse_position_routes", smoke_pov_outercourse_position_routes),
("pov_oral_position_routes", smoke_pov_oral_position_routes), ("pov_oral_position_routes", smoke_pov_oral_position_routes),