diff --git a/docs/sxcp-eval-loop.md b/docs/sxcp-eval-loop.md index 43b2952..2e858bf 100644 --- a/docs/sxcp-eval-loop.md +++ b/docs/sxcp-eval-loop.md @@ -46,6 +46,7 @@ seed, prompt summaries, observation, decision, and commit. Record durable findings with the checked helper instead of hand-editing the log: ```bash +python tools/krea2_record_eval.py --print-template --variant-key pov_footjob_frontal_sole_stroke --seed 1234 > /tmp/krea2-entry.json python tools/krea2_record_eval.py --entry-json /tmp/krea2-entry.json --dry-run python tools/krea2_record_eval.py --entry-json /tmp/krea2-entry.json ``` diff --git a/krea2_eval_log.py b/krea2_eval_log.py index c1c54f7..058cc24 100644 --- a/krea2_eval_log.py +++ b/krea2_eval_log.py @@ -42,6 +42,47 @@ def _require_text(errors: list[str], entry: dict[str, Any], key: str, min_len: i errors.append(f"{key} must be at least {min_len} characters") +def _entry_id_slug(variant_key: str) -> str: + value = variant_key.removeprefix("pov_") + chars = [char.lower() if char.isalnum() else "-" for char in value] + slug = "".join(chars).strip("-") + while "--" in slug: + slug = slug.replace("--", "-") + return slug or "krea2-eval" + + +def entry_template( + variant_key: str, + *, + seed: int, + source: str = "sxcp_eval_mcp", + date: str = "", + result: str = "inconclusive", + decision: str = "needs_more_tests", + commit: str = "pending", +) -> dict[str, Any]: + if not isinstance(seed, int) or isinstance(seed, bool): + raise ValueError("seed must be an integer") + variant = _text(variant_key).strip() + if not variant: + raise ValueError("variant_key is required") + return { + "id": f"{_entry_id_slug(variant)}-{seed}-eval", + "date": date, + "variant_key": variant, + "seed": seed, + "source": source, + "result": result, + "decision": decision, + "baseline_prompt_summary": f"Replace this with what the generated {variant} prompt did before the edit.", + "candidate_prompt_summary": f"Replace this with what the same-seed candidate prompt changed for {variant}.", + "observation": f"Replace this with the fixed-seed Krea2 image comparison observation for {variant}.", + "baseline_image": "", + "candidate_image": "", + "commit": commit, + } + + def validate_entry( entry: dict[str, Any], *, diff --git a/tools/krea2_record_eval.py b/tools/krea2_record_eval.py index ef6817e..2b7df96 100644 --- a/tools/krea2_record_eval.py +++ b/tools/krea2_record_eval.py @@ -2,6 +2,7 @@ from __future__ import annotations import argparse +from datetime import date import json import sys from pathlib import Path @@ -25,12 +26,30 @@ def _load_entry(path: Path) -> dict: def main() -> int: parser = argparse.ArgumentParser(description="Validate and append one durable Krea2 fixed-seed eval entry.") - parser.add_argument("--entry-json", required=True, help="Path to a JSON object containing one eval entry.") + parser.add_argument("--entry-json", help="Path to a JSON object containing one eval entry.") + parser.add_argument("--print-template", action="store_true", help="Print a valid eval entry template instead of recording.") + parser.add_argument("--variant-key", help="Catalog variant key for --print-template.") + parser.add_argument("--seed", type=int, help="Fixed seed for --print-template.") + parser.add_argument("--source", default="sxcp_eval_mcp", help="Source label for --print-template.") + parser.add_argument("--date", default=date.today().isoformat(), help="Date for --print-template.") parser.add_argument("--log-path", default=str(krea2_eval_log.DEFAULT_EVAL_LOG_PATH), help="Eval log path to update.") parser.add_argument("--dry-run", action="store_true", help="Validate without writing the log.") args = parser.parse_args() try: + if args.print_template: + if not args.variant_key or args.seed is None: + raise ValueError("--print-template requires --variant-key and --seed") + entry = krea2_eval_log.entry_template( + args.variant_key, + seed=args.seed, + source=args.source, + date=args.date, + ) + print(json.dumps(entry, ensure_ascii=True, indent=2)) + return 0 + if not args.entry_json: + raise ValueError("--entry-json is required unless --print-template is used") entry = _load_entry(Path(args.entry_json)) log = krea2_eval_log.append_entry(entry, path=args.log_path, dry_run=args.dry_run) except Exception as exc: diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index 875c432..31d5b13 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -13,6 +13,7 @@ import argparse import json import random import re +import subprocess import sys import tempfile from dataclasses import dataclass, field @@ -7037,6 +7038,46 @@ def smoke_krea2_eval_log_policy() -> None: catalog_keys=set(krea2_pose_variant_catalog.variant_keys()), ) _expect(any("unknown variant" in error for error in bad_variant_errors), "Krea2 eval validation should reject unknown variants") + template = krea2_eval_log.entry_template( + "pov_footjob_frontal_sole_stroke", + seed=9102, + source="smoke", + date="2026-06-29", + ) + _expect(template.get("variant_key") == "pov_footjob_frontal_sole_stroke", "Krea2 eval template lost variant key") + _expect(template.get("seed") == 9102, "Krea2 eval template lost fixed seed") + _expect(template.get("result") == "inconclusive", "Krea2 eval template should default to inconclusive") + _expect(template.get("decision") == "needs_more_tests", "Krea2 eval template should default to needs_more_tests") + _expect("footjob" in str(template.get("id") or ""), "Krea2 eval template id should include variant family") + template_errors = krea2_eval_log.validate_entry( + template, + existing_entries=appended_log.get("entries") or [], + catalog_keys=set(krea2_pose_variant_catalog.variant_keys()), + ) + _expect(template_errors == [], f"Krea2 eval template should validate immediately: {template_errors}") + cli_result = subprocess.run( + [ + sys.executable, + str(ROOT / "tools" / "krea2_record_eval.py"), + "--print-template", + "--variant-key", + "pov_fingering_reclined_open_thighs", + "--seed", + "9103", + "--source", + "smoke", + "--date", + "2026-06-29", + ], + cwd=str(ROOT), + capture_output=True, + text=True, + check=False, + ) + _expect(cli_result.returncode == 0, f"Krea2 eval template CLI failed: {cli_result.stderr}") + cli_template = json.loads(cli_result.stdout) + _expect(cli_template.get("variant_key") == "pov_fingering_reclined_open_thighs", "Krea2 eval template CLI lost variant") + _expect(cli_template.get("seed") == 9103, "Krea2 eval template CLI lost seed") def smoke_krea2_prompt_guide_policy() -> None: