Add Krea2 eval entry templates
This commit is contained in:
@@ -46,6 +46,7 @@ seed, prompt summaries, observation, decision, and commit.
|
||||
Record durable findings with the checked helper instead of hand-editing the log:
|
||||
|
||||
```bash
|
||||
python tools/krea2_record_eval.py --print-template --variant-key pov_footjob_frontal_sole_stroke --seed 1234 > /tmp/krea2-entry.json
|
||||
python tools/krea2_record_eval.py --entry-json /tmp/krea2-entry.json --dry-run
|
||||
python tools/krea2_record_eval.py --entry-json /tmp/krea2-entry.json
|
||||
```
|
||||
|
||||
@@ -42,6 +42,47 @@ def _require_text(errors: list[str], entry: dict[str, Any], key: str, min_len: i
|
||||
errors.append(f"{key} must be at least {min_len} characters")
|
||||
|
||||
|
||||
def _entry_id_slug(variant_key: str) -> str:
|
||||
value = variant_key.removeprefix("pov_")
|
||||
chars = [char.lower() if char.isalnum() else "-" for char in value]
|
||||
slug = "".join(chars).strip("-")
|
||||
while "--" in slug:
|
||||
slug = slug.replace("--", "-")
|
||||
return slug or "krea2-eval"
|
||||
|
||||
|
||||
def entry_template(
|
||||
variant_key: str,
|
||||
*,
|
||||
seed: int,
|
||||
source: str = "sxcp_eval_mcp",
|
||||
date: str = "",
|
||||
result: str = "inconclusive",
|
||||
decision: str = "needs_more_tests",
|
||||
commit: str = "pending",
|
||||
) -> dict[str, Any]:
|
||||
if not isinstance(seed, int) or isinstance(seed, bool):
|
||||
raise ValueError("seed must be an integer")
|
||||
variant = _text(variant_key).strip()
|
||||
if not variant:
|
||||
raise ValueError("variant_key is required")
|
||||
return {
|
||||
"id": f"{_entry_id_slug(variant)}-{seed}-eval",
|
||||
"date": date,
|
||||
"variant_key": variant,
|
||||
"seed": seed,
|
||||
"source": source,
|
||||
"result": result,
|
||||
"decision": decision,
|
||||
"baseline_prompt_summary": f"Replace this with what the generated {variant} prompt did before the edit.",
|
||||
"candidate_prompt_summary": f"Replace this with what the same-seed candidate prompt changed for {variant}.",
|
||||
"observation": f"Replace this with the fixed-seed Krea2 image comparison observation for {variant}.",
|
||||
"baseline_image": "",
|
||||
"candidate_image": "",
|
||||
"commit": commit,
|
||||
}
|
||||
|
||||
|
||||
def validate_entry(
|
||||
entry: dict[str, Any],
|
||||
*,
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from datetime import date
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
@@ -25,12 +26,30 @@ def _load_entry(path: Path) -> dict:
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Validate and append one durable Krea2 fixed-seed eval entry.")
|
||||
parser.add_argument("--entry-json", required=True, help="Path to a JSON object containing one eval entry.")
|
||||
parser.add_argument("--entry-json", help="Path to a JSON object containing one eval entry.")
|
||||
parser.add_argument("--print-template", action="store_true", help="Print a valid eval entry template instead of recording.")
|
||||
parser.add_argument("--variant-key", help="Catalog variant key for --print-template.")
|
||||
parser.add_argument("--seed", type=int, help="Fixed seed for --print-template.")
|
||||
parser.add_argument("--source", default="sxcp_eval_mcp", help="Source label for --print-template.")
|
||||
parser.add_argument("--date", default=date.today().isoformat(), help="Date for --print-template.")
|
||||
parser.add_argument("--log-path", default=str(krea2_eval_log.DEFAULT_EVAL_LOG_PATH), help="Eval log path to update.")
|
||||
parser.add_argument("--dry-run", action="store_true", help="Validate without writing the log.")
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
if args.print_template:
|
||||
if not args.variant_key or args.seed is None:
|
||||
raise ValueError("--print-template requires --variant-key and --seed")
|
||||
entry = krea2_eval_log.entry_template(
|
||||
args.variant_key,
|
||||
seed=args.seed,
|
||||
source=args.source,
|
||||
date=args.date,
|
||||
)
|
||||
print(json.dumps(entry, ensure_ascii=True, indent=2))
|
||||
return 0
|
||||
if not args.entry_json:
|
||||
raise ValueError("--entry-json is required unless --print-template is used")
|
||||
entry = _load_entry(Path(args.entry_json))
|
||||
log = krea2_eval_log.append_entry(entry, path=args.log_path, dry_run=args.dry_run)
|
||||
except Exception as exc:
|
||||
|
||||
@@ -13,6 +13,7 @@ import argparse
|
||||
import json
|
||||
import random
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from dataclasses import dataclass, field
|
||||
@@ -7037,6 +7038,46 @@ def smoke_krea2_eval_log_policy() -> None:
|
||||
catalog_keys=set(krea2_pose_variant_catalog.variant_keys()),
|
||||
)
|
||||
_expect(any("unknown variant" in error for error in bad_variant_errors), "Krea2 eval validation should reject unknown variants")
|
||||
template = krea2_eval_log.entry_template(
|
||||
"pov_footjob_frontal_sole_stroke",
|
||||
seed=9102,
|
||||
source="smoke",
|
||||
date="2026-06-29",
|
||||
)
|
||||
_expect(template.get("variant_key") == "pov_footjob_frontal_sole_stroke", "Krea2 eval template lost variant key")
|
||||
_expect(template.get("seed") == 9102, "Krea2 eval template lost fixed seed")
|
||||
_expect(template.get("result") == "inconclusive", "Krea2 eval template should default to inconclusive")
|
||||
_expect(template.get("decision") == "needs_more_tests", "Krea2 eval template should default to needs_more_tests")
|
||||
_expect("footjob" in str(template.get("id") or ""), "Krea2 eval template id should include variant family")
|
||||
template_errors = krea2_eval_log.validate_entry(
|
||||
template,
|
||||
existing_entries=appended_log.get("entries") or [],
|
||||
catalog_keys=set(krea2_pose_variant_catalog.variant_keys()),
|
||||
)
|
||||
_expect(template_errors == [], f"Krea2 eval template should validate immediately: {template_errors}")
|
||||
cli_result = subprocess.run(
|
||||
[
|
||||
sys.executable,
|
||||
str(ROOT / "tools" / "krea2_record_eval.py"),
|
||||
"--print-template",
|
||||
"--variant-key",
|
||||
"pov_fingering_reclined_open_thighs",
|
||||
"--seed",
|
||||
"9103",
|
||||
"--source",
|
||||
"smoke",
|
||||
"--date",
|
||||
"2026-06-29",
|
||||
],
|
||||
cwd=str(ROOT),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=False,
|
||||
)
|
||||
_expect(cli_result.returncode == 0, f"Krea2 eval template CLI failed: {cli_result.stderr}")
|
||||
cli_template = json.loads(cli_result.stdout)
|
||||
_expect(cli_template.get("variant_key") == "pov_fingering_reclined_open_thighs", "Krea2 eval template CLI lost variant")
|
||||
_expect(cli_template.get("seed") == 9103, "Krea2 eval template CLI lost seed")
|
||||
|
||||
|
||||
def smoke_krea2_prompt_guide_policy() -> None:
|
||||
|
||||
Reference in New Issue
Block a user