Add Krea2 eval entry templates

This commit is contained in:
2026-06-29 09:20:20 +02:00
parent 2aafab03bd
commit 6a37c807bc
4 changed files with 103 additions and 1 deletions
+1
View File
@@ -46,6 +46,7 @@ seed, prompt summaries, observation, decision, and commit.
Record durable findings with the checked helper instead of hand-editing the log: Record durable findings with the checked helper instead of hand-editing the log:
```bash ```bash
python tools/krea2_record_eval.py --print-template --variant-key pov_footjob_frontal_sole_stroke --seed 1234 > /tmp/krea2-entry.json
python tools/krea2_record_eval.py --entry-json /tmp/krea2-entry.json --dry-run python tools/krea2_record_eval.py --entry-json /tmp/krea2-entry.json --dry-run
python tools/krea2_record_eval.py --entry-json /tmp/krea2-entry.json python tools/krea2_record_eval.py --entry-json /tmp/krea2-entry.json
``` ```
+41
View File
@@ -42,6 +42,47 @@ def _require_text(errors: list[str], entry: dict[str, Any], key: str, min_len: i
errors.append(f"{key} must be at least {min_len} characters") errors.append(f"{key} must be at least {min_len} characters")
def _entry_id_slug(variant_key: str) -> str:
value = variant_key.removeprefix("pov_")
chars = [char.lower() if char.isalnum() else "-" for char in value]
slug = "".join(chars).strip("-")
while "--" in slug:
slug = slug.replace("--", "-")
return slug or "krea2-eval"
def entry_template(
variant_key: str,
*,
seed: int,
source: str = "sxcp_eval_mcp",
date: str = "",
result: str = "inconclusive",
decision: str = "needs_more_tests",
commit: str = "pending",
) -> dict[str, Any]:
if not isinstance(seed, int) or isinstance(seed, bool):
raise ValueError("seed must be an integer")
variant = _text(variant_key).strip()
if not variant:
raise ValueError("variant_key is required")
return {
"id": f"{_entry_id_slug(variant)}-{seed}-eval",
"date": date,
"variant_key": variant,
"seed": seed,
"source": source,
"result": result,
"decision": decision,
"baseline_prompt_summary": f"Replace this with what the generated {variant} prompt did before the edit.",
"candidate_prompt_summary": f"Replace this with what the same-seed candidate prompt changed for {variant}.",
"observation": f"Replace this with the fixed-seed Krea2 image comparison observation for {variant}.",
"baseline_image": "",
"candidate_image": "",
"commit": commit,
}
def validate_entry( def validate_entry(
entry: dict[str, Any], entry: dict[str, Any],
*, *,
+20 -1
View File
@@ -2,6 +2,7 @@
from __future__ import annotations from __future__ import annotations
import argparse import argparse
from datetime import date
import json import json
import sys import sys
from pathlib import Path from pathlib import Path
@@ -25,12 +26,30 @@ def _load_entry(path: Path) -> dict:
def main() -> int: def main() -> int:
parser = argparse.ArgumentParser(description="Validate and append one durable Krea2 fixed-seed eval entry.") parser = argparse.ArgumentParser(description="Validate and append one durable Krea2 fixed-seed eval entry.")
parser.add_argument("--entry-json", required=True, help="Path to a JSON object containing one eval entry.") parser.add_argument("--entry-json", help="Path to a JSON object containing one eval entry.")
parser.add_argument("--print-template", action="store_true", help="Print a valid eval entry template instead of recording.")
parser.add_argument("--variant-key", help="Catalog variant key for --print-template.")
parser.add_argument("--seed", type=int, help="Fixed seed for --print-template.")
parser.add_argument("--source", default="sxcp_eval_mcp", help="Source label for --print-template.")
parser.add_argument("--date", default=date.today().isoformat(), help="Date for --print-template.")
parser.add_argument("--log-path", default=str(krea2_eval_log.DEFAULT_EVAL_LOG_PATH), help="Eval log path to update.") parser.add_argument("--log-path", default=str(krea2_eval_log.DEFAULT_EVAL_LOG_PATH), help="Eval log path to update.")
parser.add_argument("--dry-run", action="store_true", help="Validate without writing the log.") parser.add_argument("--dry-run", action="store_true", help="Validate without writing the log.")
args = parser.parse_args() args = parser.parse_args()
try: try:
if args.print_template:
if not args.variant_key or args.seed is None:
raise ValueError("--print-template requires --variant-key and --seed")
entry = krea2_eval_log.entry_template(
args.variant_key,
seed=args.seed,
source=args.source,
date=args.date,
)
print(json.dumps(entry, ensure_ascii=True, indent=2))
return 0
if not args.entry_json:
raise ValueError("--entry-json is required unless --print-template is used")
entry = _load_entry(Path(args.entry_json)) entry = _load_entry(Path(args.entry_json))
log = krea2_eval_log.append_entry(entry, path=args.log_path, dry_run=args.dry_run) log = krea2_eval_log.append_entry(entry, path=args.log_path, dry_run=args.dry_run)
except Exception as exc: except Exception as exc:
+41
View File
@@ -13,6 +13,7 @@ import argparse
import json import json
import random import random
import re import re
import subprocess
import sys import sys
import tempfile import tempfile
from dataclasses import dataclass, field from dataclasses import dataclass, field
@@ -7037,6 +7038,46 @@ def smoke_krea2_eval_log_policy() -> None:
catalog_keys=set(krea2_pose_variant_catalog.variant_keys()), catalog_keys=set(krea2_pose_variant_catalog.variant_keys()),
) )
_expect(any("unknown variant" in error for error in bad_variant_errors), "Krea2 eval validation should reject unknown variants") _expect(any("unknown variant" in error for error in bad_variant_errors), "Krea2 eval validation should reject unknown variants")
template = krea2_eval_log.entry_template(
"pov_footjob_frontal_sole_stroke",
seed=9102,
source="smoke",
date="2026-06-29",
)
_expect(template.get("variant_key") == "pov_footjob_frontal_sole_stroke", "Krea2 eval template lost variant key")
_expect(template.get("seed") == 9102, "Krea2 eval template lost fixed seed")
_expect(template.get("result") == "inconclusive", "Krea2 eval template should default to inconclusive")
_expect(template.get("decision") == "needs_more_tests", "Krea2 eval template should default to needs_more_tests")
_expect("footjob" in str(template.get("id") or ""), "Krea2 eval template id should include variant family")
template_errors = krea2_eval_log.validate_entry(
template,
existing_entries=appended_log.get("entries") or [],
catalog_keys=set(krea2_pose_variant_catalog.variant_keys()),
)
_expect(template_errors == [], f"Krea2 eval template should validate immediately: {template_errors}")
cli_result = subprocess.run(
[
sys.executable,
str(ROOT / "tools" / "krea2_record_eval.py"),
"--print-template",
"--variant-key",
"pov_fingering_reclined_open_thighs",
"--seed",
"9103",
"--source",
"smoke",
"--date",
"2026-06-29",
],
cwd=str(ROOT),
capture_output=True,
text=True,
check=False,
)
_expect(cli_result.returncode == 0, f"Krea2 eval template CLI failed: {cli_result.stderr}")
cli_template = json.loads(cli_result.stdout)
_expect(cli_template.get("variant_key") == "pov_fingering_reclined_open_thighs", "Krea2 eval template CLI lost variant")
_expect(cli_template.get("seed") == 9103, "Krea2 eval template CLI lost seed")
def smoke_krea2_prompt_guide_policy() -> None: def smoke_krea2_prompt_guide_policy() -> None: