Show latest Krea2 evidence in tuning report
This commit is contained in:
@@ -51,7 +51,9 @@ python tools/krea2_tuning_report.py
|
|||||||
```
|
```
|
||||||
|
|
||||||
The report includes atlas references plus prompt cues and avoid cues for the
|
The report includes atlas references plus prompt cues and avoid cues for the
|
||||||
next fixed-seed test candidate.
|
next fixed-seed test candidate. It also shows the latest durable evidence for
|
||||||
|
variants that already have fixed-seed results, including the evidence id, seed,
|
||||||
|
decision, candidate prompt summary, and observation.
|
||||||
|
|
||||||
## Optional Command Hook
|
## Optional Command Hook
|
||||||
|
|
||||||
|
|||||||
@@ -23,6 +23,23 @@ def _coverage_state(status: str, accepted_count: int) -> str:
|
|||||||
return "tracked"
|
return "tracked"
|
||||||
|
|
||||||
|
|
||||||
|
def _latest_evidence(entries: list[dict[str, Any]], *, result: str | None = None) -> dict[str, Any]:
|
||||||
|
filtered = [entry for entry in entries if result is None or entry.get("result") == result]
|
||||||
|
if not filtered:
|
||||||
|
return {}
|
||||||
|
entry = filtered[-1]
|
||||||
|
return {
|
||||||
|
"id": entry.get("id") or "",
|
||||||
|
"seed": entry.get("seed"),
|
||||||
|
"result": entry.get("result") or "",
|
||||||
|
"decision": entry.get("decision") or "",
|
||||||
|
"baseline_prompt_summary": entry.get("baseline_prompt_summary") or "",
|
||||||
|
"candidate_prompt_summary": entry.get("candidate_prompt_summary") or "",
|
||||||
|
"observation": entry.get("observation") or "",
|
||||||
|
"commit": entry.get("commit") or "",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def coverage_rows() -> list[dict[str, Any]]:
|
def coverage_rows() -> list[dict[str, Any]]:
|
||||||
rows: list[dict[str, Any]] = []
|
rows: list[dict[str, Any]] = []
|
||||||
for variant in krea2_pose_variant_catalog.variants():
|
for variant in krea2_pose_variant_catalog.variants():
|
||||||
@@ -39,6 +56,8 @@ def coverage_rows() -> list[dict[str, Any]]:
|
|||||||
"coverage_state": _coverage_state(status, len(accepted)),
|
"coverage_state": _coverage_state(status, len(accepted)),
|
||||||
"accepted_evidence_count": len(accepted),
|
"accepted_evidence_count": len(accepted),
|
||||||
"total_evidence_count": len(evidence),
|
"total_evidence_count": len(evidence),
|
||||||
|
"latest_evidence": _latest_evidence(evidence),
|
||||||
|
"latest_accepted_evidence": _latest_evidence(evidence, result="accepted"),
|
||||||
"reference_count": len(variant.get("reference_images") or []),
|
"reference_count": len(variant.get("reference_images") or []),
|
||||||
"guide_section": (variant.get("evidence") or {}).get("guide_section", ""),
|
"guide_section": (variant.get("evidence") or {}).get("guide_section", ""),
|
||||||
}
|
}
|
||||||
@@ -212,6 +231,21 @@ def markdown_report(atlas_root: str | Path | None = None) -> str:
|
|||||||
lines.append(
|
lines.append(
|
||||||
f"| {row['key']} | {row['status']} | {row['accepted_evidence_count']}/{row['total_evidence_count']} | {row['coverage_state']} |"
|
f"| {row['key']} | {row['status']} | {row['accepted_evidence_count']}/{row['total_evidence_count']} | {row['coverage_state']} |"
|
||||||
)
|
)
|
||||||
|
evidence_rows = [row for row in coverage_rows() if row.get("latest_evidence")]
|
||||||
|
if evidence_rows:
|
||||||
|
lines.extend(["", "## Latest Evidence", ""])
|
||||||
|
for row in evidence_rows:
|
||||||
|
evidence = row.get("latest_evidence") or {}
|
||||||
|
seed = evidence.get("seed")
|
||||||
|
seed_text = f"seed {seed}" if isinstance(seed, int) else "seed unknown"
|
||||||
|
commit = evidence.get("commit") or "uncommitted"
|
||||||
|
lines.append(
|
||||||
|
f"- {row['key']}: {evidence.get('id') or 'unnamed'} ({evidence.get('result') or 'unknown'}, {seed_text}, {evidence.get('decision') or 'unknown'}, commit {commit})"
|
||||||
|
)
|
||||||
|
if evidence.get("candidate_prompt_summary"):
|
||||||
|
lines.append(f" Candidate: {evidence['candidate_prompt_summary']}")
|
||||||
|
if evidence.get("observation"):
|
||||||
|
lines.append(f" Observation: {evidence['observation']}")
|
||||||
summary = coverage_summary()
|
summary = coverage_summary()
|
||||||
if summary["next_test_candidates"]:
|
if summary["next_test_candidates"]:
|
||||||
lines.extend(
|
lines.extend(
|
||||||
|
|||||||
@@ -6998,6 +6998,12 @@ def smoke_krea2_tuning_report_policy() -> None:
|
|||||||
boobjob = by_key.get("pov_boobjob_upright_cleavage") or {}
|
boobjob = by_key.get("pov_boobjob_upright_cleavage") or {}
|
||||||
_expect(boobjob.get("coverage_state") == "proven_with_evidence", "Boobjob report should be proven with evidence")
|
_expect(boobjob.get("coverage_state") == "proven_with_evidence", "Boobjob report should be proven with evidence")
|
||||||
_expect(boobjob.get("accepted_evidence_count", 0) >= 1, "Boobjob report lost accepted evidence count")
|
_expect(boobjob.get("accepted_evidence_count", 0) >= 1, "Boobjob report lost accepted evidence count")
|
||||||
|
boobjob_latest = boobjob.get("latest_evidence") or {}
|
||||||
|
_expect(boobjob_latest.get("id") == "boobjob-7302-upright-cleavage", "Boobjob report lost latest evidence id")
|
||||||
|
_expect(boobjob_latest.get("seed") == 7302, "Boobjob report lost latest fixed seed")
|
||||||
|
_expect(boobjob_latest.get("result") == "accepted", "Boobjob report lost latest evidence result")
|
||||||
|
_expect(boobjob_latest.get("decision") == "generator_patch", "Boobjob report lost latest evidence decision")
|
||||||
|
_expect("upright frontal boobjob geometry" in str(boobjob_latest.get("candidate_prompt_summary") or ""), "Boobjob report lost latest candidate summary")
|
||||||
ballsucking = by_key.get("pov_ballsucking_low_head") or {}
|
ballsucking = by_key.get("pov_ballsucking_low_head") or {}
|
||||||
_expect(ballsucking.get("coverage_state") == "needs_fixed_seed_tests", "Ballsucking report should need fixed-seed tests")
|
_expect(ballsucking.get("coverage_state") == "needs_fixed_seed_tests", "Ballsucking report should need fixed-seed tests")
|
||||||
_expect(ballsucking.get("accepted_evidence_count") == 0, "Ballsucking report should not have accepted evidence yet")
|
_expect(ballsucking.get("accepted_evidence_count") == 0, "Ballsucking report should not have accepted evidence yet")
|
||||||
@@ -7293,6 +7299,11 @@ def smoke_krea2_tuning_report_policy() -> None:
|
|||||||
_expect("custom_pose" in atlas_markdown, "Krea2 tuning report markdown lost unmapped atlas folder")
|
_expect("custom_pose" in atlas_markdown, "Krea2 tuning report markdown lost unmapped atlas folder")
|
||||||
_expect("pov_custom_pose_candidate" in atlas_markdown, "Krea2 tuning report markdown lost suggested gap key")
|
_expect("pov_custom_pose_candidate" in atlas_markdown, "Krea2 tuning report markdown lost suggested gap key")
|
||||||
markdown = krea2_tuning_report.markdown_report()
|
markdown = krea2_tuning_report.markdown_report()
|
||||||
|
_expect("## Latest Evidence" in markdown, "Krea2 tuning report markdown lost latest evidence section")
|
||||||
|
_expect("boobjob-7302-upright-cleavage" in markdown, "Krea2 tuning report markdown lost boobjob evidence id")
|
||||||
|
_expect("seed 7302" in markdown, "Krea2 tuning report markdown lost evidence seed")
|
||||||
|
_expect("generator_patch" in markdown, "Krea2 tuning report markdown lost evidence decision")
|
||||||
|
_expect("upright frontal boobjob geometry" in markdown, "Krea2 tuning report markdown lost evidence prompt summary")
|
||||||
_expect("pov_ballsucking_low_head" in markdown, "Krea2 tuning report markdown lost candidate variant")
|
_expect("pov_ballsucking_low_head" in markdown, "Krea2 tuning report markdown lost candidate variant")
|
||||||
_expect("pov_footjob_frontal_sole_stroke" in markdown, "Krea2 tuning report markdown lost footjob candidate variant")
|
_expect("pov_footjob_frontal_sole_stroke" in markdown, "Krea2 tuning report markdown lost footjob candidate variant")
|
||||||
_expect("pov_fingering_reclined_open_thighs" in markdown, "Krea2 tuning report markdown lost fingering candidate variant")
|
_expect("pov_fingering_reclined_open_thighs" in markdown, "Krea2 tuning report markdown lost fingering candidate variant")
|
||||||
|
|||||||
Reference in New Issue
Block a user