Add Krea2 tuning coverage report
This commit is contained in:
@@ -43,6 +43,13 @@ should remain tied to a catalog variant. Image paths in that log point at
|
||||
external ComfyUI artifacts and may be cleaned; the durable evidence is the fixed
|
||||
seed, prompt summaries, observation, decision, and commit.
|
||||
|
||||
To see catalog coverage and the next variants that still need controlled
|
||||
testing, run:
|
||||
|
||||
```bash
|
||||
python tools/krea2_tuning_report.py
|
||||
```
|
||||
|
||||
## Optional Command Hook
|
||||
|
||||
If you have a one-shot Codex command you want to run automatically, set:
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
from . import krea2_eval_log, krea2_pose_variant_catalog
|
||||
except ImportError: # Allows local smoke tests from the repository root.
|
||||
import krea2_eval_log
|
||||
import krea2_pose_variant_catalog
|
||||
|
||||
|
||||
def _coverage_state(status: str, accepted_count: int) -> str:
|
||||
if status == "proven" and accepted_count > 0:
|
||||
return "proven_with_evidence"
|
||||
if status == "proven":
|
||||
return "proven_missing_evidence"
|
||||
if status == "candidate" and accepted_count == 0:
|
||||
return "needs_fixed_seed_tests"
|
||||
if status == "unstable":
|
||||
return "needs_stronger_control"
|
||||
return "tracked"
|
||||
|
||||
|
||||
def coverage_rows() -> list[dict[str, Any]]:
|
||||
rows: list[dict[str, Any]] = []
|
||||
for variant in krea2_pose_variant_catalog.variants():
|
||||
key = str(variant.get("key") or "")
|
||||
evidence = krea2_eval_log.entries_for_variant(key)
|
||||
accepted = [entry for entry in evidence if entry.get("result") == "accepted"]
|
||||
status = str(variant.get("status") or "")
|
||||
rows.append(
|
||||
{
|
||||
"key": key,
|
||||
"family": variant.get("family") or "",
|
||||
"action_family": variant.get("action_family") or "",
|
||||
"status": status,
|
||||
"coverage_state": _coverage_state(status, len(accepted)),
|
||||
"accepted_evidence_count": len(accepted),
|
||||
"total_evidence_count": len(evidence),
|
||||
"reference_count": len(variant.get("reference_images") or []),
|
||||
"guide_section": (variant.get("evidence") or {}).get("guide_section", ""),
|
||||
}
|
||||
)
|
||||
return rows
|
||||
|
||||
|
||||
def coverage_summary() -> dict[str, Any]:
|
||||
rows = coverage_rows()
|
||||
status_counts = Counter(row.get("status") for row in rows)
|
||||
state_counts = Counter(row.get("coverage_state") for row in rows)
|
||||
return {
|
||||
"variant_count": len(rows),
|
||||
"status_counts": dict(status_counts),
|
||||
"coverage_state_counts": dict(state_counts),
|
||||
"variants_without_accepted_evidence": [
|
||||
str(row.get("key"))
|
||||
for row in rows
|
||||
if int(row.get("accepted_evidence_count") or 0) == 0
|
||||
],
|
||||
"next_test_candidates": [
|
||||
str(row.get("key"))
|
||||
for row in rows
|
||||
if row.get("coverage_state") in {"needs_fixed_seed_tests", "proven_missing_evidence"}
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def markdown_report() -> str:
|
||||
lines = [
|
||||
"# Krea2 Pose Variant Coverage",
|
||||
"",
|
||||
"| Variant | Status | Evidence | State |",
|
||||
"| --- | --- | ---: | --- |",
|
||||
]
|
||||
for row in coverage_rows():
|
||||
lines.append(
|
||||
f"| {row['key']} | {row['status']} | {row['accepted_evidence_count']}/{row['total_evidence_count']} | {row['coverage_state']} |"
|
||||
)
|
||||
summary = coverage_summary()
|
||||
if summary["next_test_candidates"]:
|
||||
lines.extend(
|
||||
[
|
||||
"",
|
||||
"## Next Fixed-Seed Tests",
|
||||
"",
|
||||
*[f"- {key}" for key in summary["next_test_candidates"]],
|
||||
]
|
||||
)
|
||||
return "\n".join(lines)
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(ROOT) in sys.path:
|
||||
sys.path.remove(str(ROOT))
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
import krea2_tuning_report # noqa: E402
|
||||
|
||||
|
||||
def main() -> int:
|
||||
print(krea2_tuning_report.markdown_report())
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
+28
-2
@@ -21,8 +21,9 @@ from typing import Any, Callable
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
if str(ROOT) in sys.path:
|
||||
sys.path.remove(str(ROOT))
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
import caption_naturalizer # noqa: E402
|
||||
import caption_format_route # noqa: E402
|
||||
@@ -63,6 +64,7 @@ import krea_normal_formatter # noqa: E402
|
||||
import krea_pair_formatter # noqa: E402
|
||||
import krea2_eval_log # noqa: E402
|
||||
import krea2_pose_variant_catalog # noqa: E402
|
||||
import krea2_tuning_report # noqa: E402
|
||||
import krea_row_fields # noqa: E402
|
||||
import location_config # noqa: E402
|
||||
import loop_nodes # noqa: E402
|
||||
@@ -6843,6 +6845,29 @@ def smoke_krea2_eval_log_policy() -> None:
|
||||
_expect(clean.get("observation") != "mutation should not leak", "Krea2 eval log leaked caller mutation")
|
||||
|
||||
|
||||
def smoke_krea2_tuning_report_policy() -> None:
|
||||
rows = krea2_tuning_report.coverage_rows()
|
||||
catalog_keys = krea2_pose_variant_catalog.variant_keys()
|
||||
_expect([row.get("key") for row in rows] == catalog_keys, "Krea2 tuning report row order should follow catalog order")
|
||||
by_key = {row.get("key"): row for row in rows}
|
||||
boobjob = by_key.get("pov_boobjob_upright_cleavage") or {}
|
||||
_expect(boobjob.get("coverage_state") == "proven_with_evidence", "Boobjob report should be proven with evidence")
|
||||
_expect(boobjob.get("accepted_evidence_count", 0) >= 1, "Boobjob report lost accepted evidence count")
|
||||
ballsucking = by_key.get("pov_ballsucking_low_head") or {}
|
||||
_expect(ballsucking.get("coverage_state") == "needs_fixed_seed_tests", "Ballsucking report should need fixed-seed tests")
|
||||
_expect(ballsucking.get("accepted_evidence_count") == 0, "Ballsucking report should not have accepted evidence yet")
|
||||
summary = krea2_tuning_report.coverage_summary()
|
||||
_expect(summary.get("status_counts", {}).get("proven") == 3, "Krea2 tuning report proven count changed")
|
||||
_expect(summary.get("status_counts", {}).get("candidate") == 1, "Krea2 tuning report candidate count changed")
|
||||
_expect(
|
||||
summary.get("variants_without_accepted_evidence") == ["pov_ballsucking_low_head"],
|
||||
f"Krea2 tuning report missing-evidence set changed: {summary.get('variants_without_accepted_evidence')}",
|
||||
)
|
||||
markdown = krea2_tuning_report.markdown_report()
|
||||
_expect("pov_ballsucking_low_head" in markdown, "Krea2 tuning report markdown lost candidate variant")
|
||||
_expect("needs_fixed_seed_tests" in markdown, "Krea2 tuning report markdown lost coverage state")
|
||||
|
||||
|
||||
def smoke_krea_pov_penetration_route() -> None:
|
||||
pair = pb.build_insta_of_pair(
|
||||
row_number=1,
|
||||
@@ -9849,6 +9874,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
|
||||
("krea2_pov_pose_variant_catalog", smoke_krea2_pov_pose_variant_catalog),
|
||||
("krea2_pose_variant_catalog_policy", smoke_krea2_pose_variant_catalog_policy),
|
||||
("krea2_eval_log_policy", smoke_krea2_eval_log_policy),
|
||||
("krea2_tuning_report_policy", smoke_krea2_tuning_report_policy),
|
||||
("krea_pov_penetration_route", smoke_krea_pov_penetration_route),
|
||||
("pov_outercourse_position_routes", smoke_pov_outercourse_position_routes),
|
||||
("pov_oral_position_routes", smoke_pov_oral_position_routes),
|
||||
|
||||
Reference in New Issue
Block a user