From 742281f48fdf8a9c35306c2e5feda06b95b434c4 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Mon, 29 Jun 2026 02:49:01 +0200 Subject: [PATCH] Add Krea2 fixed-seed eval log --- docs/krea2-eval-log.json | 51 +++++++++++++++++++++++++ docs/sxcp-eval-loop.md | 5 +++ krea2_eval_log.py | 80 ++++++++++++++++++++++++++++++++++++++++ tools/prompt_smoke.py | 40 ++++++++++++++++++++ 4 files changed, 176 insertions(+) create mode 100644 docs/krea2-eval-log.json create mode 100644 krea2_eval_log.py diff --git a/docs/krea2-eval-log.json b/docs/krea2-eval-log.json new file mode 100644 index 0000000..3dc0d69 --- /dev/null +++ b/docs/krea2-eval-log.json @@ -0,0 +1,51 @@ +{ + "version": 1, + "purpose": "Structured fixed-seed Krea2 prompt/image evidence for SxCP atlas pose variants.", + "entries": [ + { + "id": "doggy-52-climax-target-structural", + "date": "2026-06-29", + "variant_key": "pov_doggy_top_down_rear_entry", + "seed": 52, + "source": "CodexMCPTest", + "result": "accepted", + "decision": "generator_patch", + "baseline_prompt_summary": "Rear-view on-all-fours POV doggy prompt targeted ejaculation onto face and chest.", + "candidate_prompt_summary": "Rear-entry climax target changed to ass, thighs, and lower back before expression tokens.", + "observation": "The prompt was structurally contradictory before rendering: rear-entry body geometry conflicted with a face/chest fluid target. The generator target policy was patched for doggy, rear-entry, bent-over, face-down, and raised-ass contexts.", + "baseline_image": "/media/unraid/comfyui/output/CodexMCPTest/52-hard.png", + "candidate_image": "", + "commit": "bb53967" + }, + { + "id": "boobjob-7302-upright-cleavage", + "date": "2026-06-29", + "variant_key": "pov_boobjob_upright_cleavage", + "seed": 7302, + "source": "sxcp_eval_mcp", + "result": "accepted", + "decision": "generator_patch", + "baseline_prompt_summary": "Current route used low kneeling and torso bent forward over the viewer's pelvis, with generic hand ownership.", + "candidate_prompt_summary": "Atlas-aligned route used upright frontal boobjob geometry, vertical shaft, pressed-together breasts, and explicit woman's fingers.", + "observation": "Same-seed A/B showed the candidate moved the shaft closer to compressed cleavage and exposed the hand-ownership problem. The generator route was patched to frontal upright geometry with the woman's own fingers named.", + "baseline_image": "/media/unraid/comfyui/output/agent_bridge/img_7edff903e4f449c4add925a20583231b.png", + "candidate_image": "/media/unraid/comfyui/output/agent_bridge/img_87e5e4a1ad604c91b1e329bc69f6c966.png", + "commit": "11b7c2a" + }, + { + "id": "handjob-7401-woman-hand-ownership", + "date": "2026-06-29", + "variant_key": "pov_handjob_upright_centered", + "seed": 7401, + "source": "sxcp_eval_mcp", + "result": "accepted", + "decision": "generator_patch", + "baseline_prompt_summary": "Current route said one hand grips and another steadies while the POV camera allowed foreground hands.", + "candidate_prompt_summary": "Candidate named the woman's right hand and left hand as the active hands and blocked viewer hands from covering the action.", + "observation": "Same-seed A/B made both visible hands read as the woman's hands instead of a competing viewer hand. The generator route was patched to explicit woman-hand ownership.", + "baseline_image": "/media/unraid/comfyui/output/agent_bridge/img_e21f615e5f5246d486167ae5a1c03527.png", + "candidate_image": "/media/unraid/comfyui/output/agent_bridge/img_7263741723d64271b04dce8ed63f560b.png", + "commit": "a484783" + } + ] +} diff --git a/docs/sxcp-eval-loop.md b/docs/sxcp-eval-loop.md index a03623d..cc981f0 100644 --- a/docs/sxcp-eval-loop.md +++ b/docs/sxcp-eval-loop.md @@ -36,6 +36,11 @@ Every three minutes it prints a structured request asking Codex to: Runtime logs are written under `.sxcp_eval/` and ignored by git. +Durable fixed-seed findings that justify a guide rule, generator patch, or pose +variant promotion are recorded in [`krea2-eval-log.json`](krea2-eval-log.json). +Use runtime logs for scratch notes; use the JSON log only for evidence that +should remain tied to a catalog variant. + ## Optional Command Hook If you have a one-shot Codex command you want to run automatically, set: diff --git a/krea2_eval_log.py b/krea2_eval_log.py new file mode 100644 index 0000000..951fa0a --- /dev/null +++ b/krea2_eval_log.py @@ -0,0 +1,80 @@ +from __future__ import annotations + +import copy +import json +from functools import lru_cache +from pathlib import Path +from typing import Any + + +ROOT = Path(__file__).resolve().parent +DEFAULT_EVAL_LOG_PATH = ROOT / "docs" / "krea2-eval-log.json" + + +def _path_key(path: str | Path | None = None) -> str: + return str(Path(path or DEFAULT_EVAL_LOG_PATH).resolve()) + + +@lru_cache(maxsize=8) +def _load_raw_eval_log(path_key: str) -> dict[str, Any]: + with Path(path_key).open("r", encoding="utf-8") as handle: + data = json.load(handle) + return data if isinstance(data, dict) else {} + + +def clear_cache() -> None: + _load_raw_eval_log.cache_clear() + + +def load_eval_log(path: str | Path | None = None) -> dict[str, Any]: + return copy.deepcopy(_load_raw_eval_log(_path_key(path))) + + +def entries( + *, + variant_key: str | None = None, + result: str | None = None, + decision: str | None = None, + path: str | Path | None = None, +) -> list[dict[str, Any]]: + log = load_eval_log(path) + rows = log.get("entries") or [] + if not isinstance(rows, list): + return [] + filtered: list[dict[str, Any]] = [] + for row in rows: + if not isinstance(row, dict): + continue + if variant_key is not None and row.get("variant_key") != variant_key: + continue + if result is not None and row.get("result") != result: + continue + if decision is not None and row.get("decision") != decision: + continue + filtered.append(row) + return filtered + + +def entries_for_variant( + variant_key: str, + *, + result: str | None = None, + decision: str | None = None, + path: str | Path | None = None, +) -> list[dict[str, Any]]: + return entries(variant_key=variant_key, result=result, decision=decision, path=path) + + +def variant_keys( + *, + result: str | None = None, + decision: str | None = None, + path: str | Path | None = None, +) -> list[str]: + keys: list[str] = [] + for row in entries(result=result, decision=decision, path=path): + key = row.get("variant_key") + if key and key not in keys: + keys.append(str(key)) + return keys + diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index 1a85b97..47737fd 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -61,6 +61,7 @@ import krea_format_route # noqa: E402 import krea_formatter # noqa: E402 import krea_normal_formatter # noqa: E402 import krea_pair_formatter # noqa: E402 +import krea2_eval_log # noqa: E402 import krea2_pose_variant_catalog # noqa: E402 import krea_row_fields # noqa: E402 import location_config # noqa: E402 @@ -6803,6 +6804,44 @@ def smoke_krea2_pose_variant_catalog_policy() -> None: _expect(missing == {}, "Missing pose variant should return an empty mapping") +def smoke_krea2_eval_log_policy() -> None: + log = krea2_eval_log.load_eval_log() + _expect(log.get("version") == 1, "Krea2 eval log version changed unexpectedly") + entries = krea2_eval_log.entries() + _expect(entries, "Krea2 eval log has no entries") + catalog_keys = set(krea2_pose_variant_catalog.variant_keys()) + proven_keys = set(krea2_pose_variant_catalog.variant_keys(status="proven")) + accepted_keys = set(krea2_eval_log.variant_keys(result="accepted")) + _expect(proven_keys.issubset(accepted_keys), "Krea2 eval log does not cover every proven pose variant") + seen_ids: set[str] = set() + for entry in entries: + entry_id = _expect_text("krea2_eval_log.entry.id", entry.get("id"), 6) + _expect(entry_id not in seen_ids, f"Krea2 eval log has duplicate entry id {entry_id!r}") + seen_ids.add(entry_id) + variant_key = _expect_text(f"{entry_id}.variant_key", entry.get("variant_key"), 8) + _expect(variant_key in catalog_keys, f"{entry_id} references unknown variant {variant_key!r}") + _expect(isinstance(entry.get("seed"), int), f"{entry_id} has no integer fixed seed") + _expect(entry.get("result") in {"accepted", "rejected", "inconclusive"}, f"{entry_id} has unknown result") + _expect( + entry.get("decision") in {"generator_patch", "prompt_guide_rule", "prompt_only_retry", "needs_more_tests"}, + f"{entry_id} has unknown decision", + ) + _expect_text(f"{entry_id}.baseline_prompt_summary", entry.get("baseline_prompt_summary"), 20) + _expect_text(f"{entry_id}.candidate_prompt_summary", entry.get("candidate_prompt_summary"), 20) + _expect_text(f"{entry_id}.observation", entry.get("observation"), 30) + for image_key in ("baseline_image", "candidate_image"): + image_path = str(entry.get(image_key) or "") + if image_path: + _expect(Path(image_path).is_absolute(), f"{entry_id}.{image_key} should be absolute when present") + _expect(Path(image_path).is_file(), f"{entry_id}.{image_key} is missing: {image_path}") + boobjob_entries = krea2_eval_log.entries_for_variant("pov_boobjob_upright_cleavage", result="accepted") + _expect(boobjob_entries and boobjob_entries[0].get("seed") == 7302, "Boobjob accepted eval evidence changed") + mutation = krea2_eval_log.entries_for_variant("pov_handjob_upright_centered")[0] + mutation["observation"] = "mutation should not leak" + clean = krea2_eval_log.entries_for_variant("pov_handjob_upright_centered")[0] + _expect(clean.get("observation") != "mutation should not leak", "Krea2 eval log leaked caller mutation") + + def smoke_krea_pov_penetration_route() -> None: pair = pb.build_insta_of_pair( row_number=1, @@ -9764,6 +9803,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [ ("pov_camera_scene", smoke_pov_camera_scene), ("krea2_pov_pose_variant_catalog", smoke_krea2_pov_pose_variant_catalog), ("krea2_pose_variant_catalog_policy", smoke_krea2_pose_variant_catalog_policy), + ("krea2_eval_log_policy", smoke_krea2_eval_log_policy), ("krea_pov_penetration_route", smoke_krea_pov_penetration_route), ("pov_outercourse_position_routes", smoke_pov_outercourse_position_routes), ("pov_oral_position_routes", smoke_pov_oral_position_routes),