Add Krea2 fixed-seed eval log
This commit is contained in:
@@ -61,6 +61,7 @@ import krea_format_route # noqa: E402
|
||||
import krea_formatter # noqa: E402
|
||||
import krea_normal_formatter # noqa: E402
|
||||
import krea_pair_formatter # noqa: E402
|
||||
import krea2_eval_log # noqa: E402
|
||||
import krea2_pose_variant_catalog # noqa: E402
|
||||
import krea_row_fields # noqa: E402
|
||||
import location_config # noqa: E402
|
||||
@@ -6803,6 +6804,44 @@ def smoke_krea2_pose_variant_catalog_policy() -> None:
|
||||
_expect(missing == {}, "Missing pose variant should return an empty mapping")
|
||||
|
||||
|
||||
def smoke_krea2_eval_log_policy() -> None:
|
||||
log = krea2_eval_log.load_eval_log()
|
||||
_expect(log.get("version") == 1, "Krea2 eval log version changed unexpectedly")
|
||||
entries = krea2_eval_log.entries()
|
||||
_expect(entries, "Krea2 eval log has no entries")
|
||||
catalog_keys = set(krea2_pose_variant_catalog.variant_keys())
|
||||
proven_keys = set(krea2_pose_variant_catalog.variant_keys(status="proven"))
|
||||
accepted_keys = set(krea2_eval_log.variant_keys(result="accepted"))
|
||||
_expect(proven_keys.issubset(accepted_keys), "Krea2 eval log does not cover every proven pose variant")
|
||||
seen_ids: set[str] = set()
|
||||
for entry in entries:
|
||||
entry_id = _expect_text("krea2_eval_log.entry.id", entry.get("id"), 6)
|
||||
_expect(entry_id not in seen_ids, f"Krea2 eval log has duplicate entry id {entry_id!r}")
|
||||
seen_ids.add(entry_id)
|
||||
variant_key = _expect_text(f"{entry_id}.variant_key", entry.get("variant_key"), 8)
|
||||
_expect(variant_key in catalog_keys, f"{entry_id} references unknown variant {variant_key!r}")
|
||||
_expect(isinstance(entry.get("seed"), int), f"{entry_id} has no integer fixed seed")
|
||||
_expect(entry.get("result") in {"accepted", "rejected", "inconclusive"}, f"{entry_id} has unknown result")
|
||||
_expect(
|
||||
entry.get("decision") in {"generator_patch", "prompt_guide_rule", "prompt_only_retry", "needs_more_tests"},
|
||||
f"{entry_id} has unknown decision",
|
||||
)
|
||||
_expect_text(f"{entry_id}.baseline_prompt_summary", entry.get("baseline_prompt_summary"), 20)
|
||||
_expect_text(f"{entry_id}.candidate_prompt_summary", entry.get("candidate_prompt_summary"), 20)
|
||||
_expect_text(f"{entry_id}.observation", entry.get("observation"), 30)
|
||||
for image_key in ("baseline_image", "candidate_image"):
|
||||
image_path = str(entry.get(image_key) or "")
|
||||
if image_path:
|
||||
_expect(Path(image_path).is_absolute(), f"{entry_id}.{image_key} should be absolute when present")
|
||||
_expect(Path(image_path).is_file(), f"{entry_id}.{image_key} is missing: {image_path}")
|
||||
boobjob_entries = krea2_eval_log.entries_for_variant("pov_boobjob_upright_cleavage", result="accepted")
|
||||
_expect(boobjob_entries and boobjob_entries[0].get("seed") == 7302, "Boobjob accepted eval evidence changed")
|
||||
mutation = krea2_eval_log.entries_for_variant("pov_handjob_upright_centered")[0]
|
||||
mutation["observation"] = "mutation should not leak"
|
||||
clean = krea2_eval_log.entries_for_variant("pov_handjob_upright_centered")[0]
|
||||
_expect(clean.get("observation") != "mutation should not leak", "Krea2 eval log leaked caller mutation")
|
||||
|
||||
|
||||
def smoke_krea_pov_penetration_route() -> None:
|
||||
pair = pb.build_insta_of_pair(
|
||||
row_number=1,
|
||||
@@ -9764,6 +9803,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
|
||||
("pov_camera_scene", smoke_pov_camera_scene),
|
||||
("krea2_pov_pose_variant_catalog", smoke_krea2_pov_pose_variant_catalog),
|
||||
("krea2_pose_variant_catalog_policy", smoke_krea2_pose_variant_catalog_policy),
|
||||
("krea2_eval_log_policy", smoke_krea2_eval_log_policy),
|
||||
("krea_pov_penetration_route", smoke_krea_pov_penetration_route),
|
||||
("pov_outercourse_position_routes", smoke_pov_outercourse_position_routes),
|
||||
("pov_oral_position_routes", smoke_pov_oral_position_routes),
|
||||
|
||||
Reference in New Issue
Block a user