Add Krea2 fixed-seed eval log
This commit is contained in:
@@ -0,0 +1,51 @@
|
|||||||
|
{
|
||||||
|
"version": 1,
|
||||||
|
"purpose": "Structured fixed-seed Krea2 prompt/image evidence for SxCP atlas pose variants.",
|
||||||
|
"entries": [
|
||||||
|
{
|
||||||
|
"id": "doggy-52-climax-target-structural",
|
||||||
|
"date": "2026-06-29",
|
||||||
|
"variant_key": "pov_doggy_top_down_rear_entry",
|
||||||
|
"seed": 52,
|
||||||
|
"source": "CodexMCPTest",
|
||||||
|
"result": "accepted",
|
||||||
|
"decision": "generator_patch",
|
||||||
|
"baseline_prompt_summary": "Rear-view on-all-fours POV doggy prompt targeted ejaculation onto face and chest.",
|
||||||
|
"candidate_prompt_summary": "Rear-entry climax target changed to ass, thighs, and lower back before expression tokens.",
|
||||||
|
"observation": "The prompt was structurally contradictory before rendering: rear-entry body geometry conflicted with a face/chest fluid target. The generator target policy was patched for doggy, rear-entry, bent-over, face-down, and raised-ass contexts.",
|
||||||
|
"baseline_image": "/media/unraid/comfyui/output/CodexMCPTest/52-hard.png",
|
||||||
|
"candidate_image": "",
|
||||||
|
"commit": "bb53967"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "boobjob-7302-upright-cleavage",
|
||||||
|
"date": "2026-06-29",
|
||||||
|
"variant_key": "pov_boobjob_upright_cleavage",
|
||||||
|
"seed": 7302,
|
||||||
|
"source": "sxcp_eval_mcp",
|
||||||
|
"result": "accepted",
|
||||||
|
"decision": "generator_patch",
|
||||||
|
"baseline_prompt_summary": "Current route used low kneeling and torso bent forward over the viewer's pelvis, with generic hand ownership.",
|
||||||
|
"candidate_prompt_summary": "Atlas-aligned route used upright frontal boobjob geometry, vertical shaft, pressed-together breasts, and explicit woman's fingers.",
|
||||||
|
"observation": "Same-seed A/B showed the candidate moved the shaft closer to compressed cleavage and exposed the hand-ownership problem. The generator route was patched to frontal upright geometry with the woman's own fingers named.",
|
||||||
|
"baseline_image": "/media/unraid/comfyui/output/agent_bridge/img_7edff903e4f449c4add925a20583231b.png",
|
||||||
|
"candidate_image": "/media/unraid/comfyui/output/agent_bridge/img_87e5e4a1ad604c91b1e329bc69f6c966.png",
|
||||||
|
"commit": "11b7c2a"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": "handjob-7401-woman-hand-ownership",
|
||||||
|
"date": "2026-06-29",
|
||||||
|
"variant_key": "pov_handjob_upright_centered",
|
||||||
|
"seed": 7401,
|
||||||
|
"source": "sxcp_eval_mcp",
|
||||||
|
"result": "accepted",
|
||||||
|
"decision": "generator_patch",
|
||||||
|
"baseline_prompt_summary": "Current route said one hand grips and another steadies while the POV camera allowed foreground hands.",
|
||||||
|
"candidate_prompt_summary": "Candidate named the woman's right hand and left hand as the active hands and blocked viewer hands from covering the action.",
|
||||||
|
"observation": "Same-seed A/B made both visible hands read as the woman's hands instead of a competing viewer hand. The generator route was patched to explicit woman-hand ownership.",
|
||||||
|
"baseline_image": "/media/unraid/comfyui/output/agent_bridge/img_e21f615e5f5246d486167ae5a1c03527.png",
|
||||||
|
"candidate_image": "/media/unraid/comfyui/output/agent_bridge/img_7263741723d64271b04dce8ed63f560b.png",
|
||||||
|
"commit": "a484783"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -36,6 +36,11 @@ Every three minutes it prints a structured request asking Codex to:
|
|||||||
|
|
||||||
Runtime logs are written under `.sxcp_eval/` and ignored by git.
|
Runtime logs are written under `.sxcp_eval/` and ignored by git.
|
||||||
|
|
||||||
|
Durable fixed-seed findings that justify a guide rule, generator patch, or pose
|
||||||
|
variant promotion are recorded in [`krea2-eval-log.json`](krea2-eval-log.json).
|
||||||
|
Use runtime logs for scratch notes; use the JSON log only for evidence that
|
||||||
|
should remain tied to a catalog variant.
|
||||||
|
|
||||||
## Optional Command Hook
|
## Optional Command Hook
|
||||||
|
|
||||||
If you have a one-shot Codex command you want to run automatically, set:
|
If you have a one-shot Codex command you want to run automatically, set:
|
||||||
|
|||||||
@@ -0,0 +1,80 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import copy
|
||||||
|
import json
|
||||||
|
from functools import lru_cache
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
|
||||||
|
ROOT = Path(__file__).resolve().parent
|
||||||
|
DEFAULT_EVAL_LOG_PATH = ROOT / "docs" / "krea2-eval-log.json"
|
||||||
|
|
||||||
|
|
||||||
|
def _path_key(path: str | Path | None = None) -> str:
|
||||||
|
return str(Path(path or DEFAULT_EVAL_LOG_PATH).resolve())
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=8)
|
||||||
|
def _load_raw_eval_log(path_key: str) -> dict[str, Any]:
|
||||||
|
with Path(path_key).open("r", encoding="utf-8") as handle:
|
||||||
|
data = json.load(handle)
|
||||||
|
return data if isinstance(data, dict) else {}
|
||||||
|
|
||||||
|
|
||||||
|
def clear_cache() -> None:
|
||||||
|
_load_raw_eval_log.cache_clear()
|
||||||
|
|
||||||
|
|
||||||
|
def load_eval_log(path: str | Path | None = None) -> dict[str, Any]:
|
||||||
|
return copy.deepcopy(_load_raw_eval_log(_path_key(path)))
|
||||||
|
|
||||||
|
|
||||||
|
def entries(
|
||||||
|
*,
|
||||||
|
variant_key: str | None = None,
|
||||||
|
result: str | None = None,
|
||||||
|
decision: str | None = None,
|
||||||
|
path: str | Path | None = None,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
log = load_eval_log(path)
|
||||||
|
rows = log.get("entries") or []
|
||||||
|
if not isinstance(rows, list):
|
||||||
|
return []
|
||||||
|
filtered: list[dict[str, Any]] = []
|
||||||
|
for row in rows:
|
||||||
|
if not isinstance(row, dict):
|
||||||
|
continue
|
||||||
|
if variant_key is not None and row.get("variant_key") != variant_key:
|
||||||
|
continue
|
||||||
|
if result is not None and row.get("result") != result:
|
||||||
|
continue
|
||||||
|
if decision is not None and row.get("decision") != decision:
|
||||||
|
continue
|
||||||
|
filtered.append(row)
|
||||||
|
return filtered
|
||||||
|
|
||||||
|
|
||||||
|
def entries_for_variant(
|
||||||
|
variant_key: str,
|
||||||
|
*,
|
||||||
|
result: str | None = None,
|
||||||
|
decision: str | None = None,
|
||||||
|
path: str | Path | None = None,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
return entries(variant_key=variant_key, result=result, decision=decision, path=path)
|
||||||
|
|
||||||
|
|
||||||
|
def variant_keys(
|
||||||
|
*,
|
||||||
|
result: str | None = None,
|
||||||
|
decision: str | None = None,
|
||||||
|
path: str | Path | None = None,
|
||||||
|
) -> list[str]:
|
||||||
|
keys: list[str] = []
|
||||||
|
for row in entries(result=result, decision=decision, path=path):
|
||||||
|
key = row.get("variant_key")
|
||||||
|
if key and key not in keys:
|
||||||
|
keys.append(str(key))
|
||||||
|
return keys
|
||||||
|
|
||||||
@@ -61,6 +61,7 @@ import krea_format_route # noqa: E402
|
|||||||
import krea_formatter # noqa: E402
|
import krea_formatter # noqa: E402
|
||||||
import krea_normal_formatter # noqa: E402
|
import krea_normal_formatter # noqa: E402
|
||||||
import krea_pair_formatter # noqa: E402
|
import krea_pair_formatter # noqa: E402
|
||||||
|
import krea2_eval_log # noqa: E402
|
||||||
import krea2_pose_variant_catalog # noqa: E402
|
import krea2_pose_variant_catalog # noqa: E402
|
||||||
import krea_row_fields # noqa: E402
|
import krea_row_fields # noqa: E402
|
||||||
import location_config # noqa: E402
|
import location_config # noqa: E402
|
||||||
@@ -6803,6 +6804,44 @@ def smoke_krea2_pose_variant_catalog_policy() -> None:
|
|||||||
_expect(missing == {}, "Missing pose variant should return an empty mapping")
|
_expect(missing == {}, "Missing pose variant should return an empty mapping")
|
||||||
|
|
||||||
|
|
||||||
|
def smoke_krea2_eval_log_policy() -> None:
|
||||||
|
log = krea2_eval_log.load_eval_log()
|
||||||
|
_expect(log.get("version") == 1, "Krea2 eval log version changed unexpectedly")
|
||||||
|
entries = krea2_eval_log.entries()
|
||||||
|
_expect(entries, "Krea2 eval log has no entries")
|
||||||
|
catalog_keys = set(krea2_pose_variant_catalog.variant_keys())
|
||||||
|
proven_keys = set(krea2_pose_variant_catalog.variant_keys(status="proven"))
|
||||||
|
accepted_keys = set(krea2_eval_log.variant_keys(result="accepted"))
|
||||||
|
_expect(proven_keys.issubset(accepted_keys), "Krea2 eval log does not cover every proven pose variant")
|
||||||
|
seen_ids: set[str] = set()
|
||||||
|
for entry in entries:
|
||||||
|
entry_id = _expect_text("krea2_eval_log.entry.id", entry.get("id"), 6)
|
||||||
|
_expect(entry_id not in seen_ids, f"Krea2 eval log has duplicate entry id {entry_id!r}")
|
||||||
|
seen_ids.add(entry_id)
|
||||||
|
variant_key = _expect_text(f"{entry_id}.variant_key", entry.get("variant_key"), 8)
|
||||||
|
_expect(variant_key in catalog_keys, f"{entry_id} references unknown variant {variant_key!r}")
|
||||||
|
_expect(isinstance(entry.get("seed"), int), f"{entry_id} has no integer fixed seed")
|
||||||
|
_expect(entry.get("result") in {"accepted", "rejected", "inconclusive"}, f"{entry_id} has unknown result")
|
||||||
|
_expect(
|
||||||
|
entry.get("decision") in {"generator_patch", "prompt_guide_rule", "prompt_only_retry", "needs_more_tests"},
|
||||||
|
f"{entry_id} has unknown decision",
|
||||||
|
)
|
||||||
|
_expect_text(f"{entry_id}.baseline_prompt_summary", entry.get("baseline_prompt_summary"), 20)
|
||||||
|
_expect_text(f"{entry_id}.candidate_prompt_summary", entry.get("candidate_prompt_summary"), 20)
|
||||||
|
_expect_text(f"{entry_id}.observation", entry.get("observation"), 30)
|
||||||
|
for image_key in ("baseline_image", "candidate_image"):
|
||||||
|
image_path = str(entry.get(image_key) or "")
|
||||||
|
if image_path:
|
||||||
|
_expect(Path(image_path).is_absolute(), f"{entry_id}.{image_key} should be absolute when present")
|
||||||
|
_expect(Path(image_path).is_file(), f"{entry_id}.{image_key} is missing: {image_path}")
|
||||||
|
boobjob_entries = krea2_eval_log.entries_for_variant("pov_boobjob_upright_cleavage", result="accepted")
|
||||||
|
_expect(boobjob_entries and boobjob_entries[0].get("seed") == 7302, "Boobjob accepted eval evidence changed")
|
||||||
|
mutation = krea2_eval_log.entries_for_variant("pov_handjob_upright_centered")[0]
|
||||||
|
mutation["observation"] = "mutation should not leak"
|
||||||
|
clean = krea2_eval_log.entries_for_variant("pov_handjob_upright_centered")[0]
|
||||||
|
_expect(clean.get("observation") != "mutation should not leak", "Krea2 eval log leaked caller mutation")
|
||||||
|
|
||||||
|
|
||||||
def smoke_krea_pov_penetration_route() -> None:
|
def smoke_krea_pov_penetration_route() -> None:
|
||||||
pair = pb.build_insta_of_pair(
|
pair = pb.build_insta_of_pair(
|
||||||
row_number=1,
|
row_number=1,
|
||||||
@@ -9764,6 +9803,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
|
|||||||
("pov_camera_scene", smoke_pov_camera_scene),
|
("pov_camera_scene", smoke_pov_camera_scene),
|
||||||
("krea2_pov_pose_variant_catalog", smoke_krea2_pov_pose_variant_catalog),
|
("krea2_pov_pose_variant_catalog", smoke_krea2_pov_pose_variant_catalog),
|
||||||
("krea2_pose_variant_catalog_policy", smoke_krea2_pose_variant_catalog_policy),
|
("krea2_pose_variant_catalog_policy", smoke_krea2_pose_variant_catalog_policy),
|
||||||
|
("krea2_eval_log_policy", smoke_krea2_eval_log_policy),
|
||||||
("krea_pov_penetration_route", smoke_krea_pov_penetration_route),
|
("krea_pov_penetration_route", smoke_krea_pov_penetration_route),
|
||||||
("pov_outercourse_position_routes", smoke_pov_outercourse_position_routes),
|
("pov_outercourse_position_routes", smoke_pov_outercourse_position_routes),
|
||||||
("pov_oral_position_routes", smoke_pov_oral_position_routes),
|
("pov_oral_position_routes", smoke_pov_oral_position_routes),
|
||||||
|
|||||||
Reference in New Issue
Block a user