ComfyUI-Ethanfel-Prompt-Bui…/krea2_eval_log.py

from __future__ import annotations

import copy
import json
from functools import lru_cache
from pathlib import Path
from typing import Any


ROOT = Path(__file__).resolve().parent
DEFAULT_EVAL_LOG_PATH = ROOT / "docs" / "krea2-eval-log.json"
VALID_RESULTS = {"accepted", "rejected", "inconclusive"}
VALID_DECISIONS = {
    "generator_patch",
    "provisional_generator_patch",
    "prompt_guide_rule",
    "prompt_only_retry",
    "needs_more_tests",
}


def _path_key(path: str | Path | None = None) -> str:
    return str(Path(path or DEFAULT_EVAL_LOG_PATH).resolve())


@lru_cache(maxsize=8)
def _load_raw_eval_log(path_key: str) -> dict[str, Any]:
    with Path(path_key).open("r", encoding="utf-8") as handle:
        data = json.load(handle)
    return data if isinstance(data, dict) else {}


def clear_cache() -> None:
    _load_raw_eval_log.cache_clear()


def load_eval_log(path: str | Path | None = None) -> dict[str, Any]:
    return copy.deepcopy(_load_raw_eval_log(_path_key(path)))


def _text(value: Any) -> str:
    return value if isinstance(value, str) else ""


def _require_text(errors: list[str], entry: dict[str, Any], key: str, min_len: int) -> None:
    value = _text(entry.get(key)).strip()
    if len(value) < min_len:
        errors.append(f"{key} must be at least {min_len} characters")


def _entry_id_slug(variant_key: str) -> str:
    value = variant_key.removeprefix("pov_")
    chars = [char.lower() if char.isalnum() else "-" for char in value]
    slug = "".join(chars).strip("-")
    while "--" in slug:
        slug = slug.replace("--", "-")
    return slug or "krea2-eval"


def entry_template(
    variant_key: str,
    *,
    seed: int,
    generator_seed: int | None = None,
    source: str = "sxcp_eval_mcp",
    date: str = "",
    result: str = "inconclusive",
    decision: str = "needs_more_tests",
    commit: str = "pending",
) -> dict[str, Any]:
    if not isinstance(seed, int) or isinstance(seed, bool):
        raise ValueError("seed must be an integer")
    if generator_seed is not None and (not isinstance(generator_seed, int) or isinstance(generator_seed, bool)):
        raise ValueError("generator_seed must be an integer")
    variant = _text(variant_key).strip()
    if not variant:
        raise ValueError("variant_key is required")
    entry = {
        "id": f"{_entry_id_slug(variant)}-{seed}-eval",
        "date": date,
        "variant_key": variant,
        "seed": seed,
        "source": source,
        "result": result,
        "decision": decision,
        "baseline_prompt_summary": f"Replace this with what the generated {variant} prompt did before the edit.",
        "candidate_prompt_summary": f"Replace this with what the same-seed candidate prompt changed for {variant}.",
        "observation": f"Replace this with the fixed-seed Krea2 image comparison observation for {variant}.",
        "baseline_image": "",
        "candidate_image": "",
        "commit": commit,
    }
    if generator_seed is not None:
        entry["generator_seed"] = generator_seed
    return entry


def validate_entry(
    entry: dict[str, Any],
    *,
    existing_entries: list[dict[str, Any]] | None = None,
    catalog_keys: set[str] | None = None,
) -> list[str]:
    errors: list[str] = []
    if not isinstance(entry, dict):
        return ["entry must be an object"]

    _require_text(errors, entry, "id", 6)
    entry_id = _text(entry.get("id")).strip()
    if entry_id and existing_entries:
        existing_ids = {_text(row.get("id")).strip() for row in existing_entries if isinstance(row, dict)}
        if entry_id in existing_ids:
            errors.append(f"duplicate id {entry_id!r}")

    _require_text(errors, entry, "variant_key", 8)
    variant_key = _text(entry.get("variant_key")).strip()
    if variant_key and catalog_keys is not None and variant_key not in catalog_keys:
        errors.append(f"unknown variant {variant_key!r}")

    seed = entry.get("seed")
    if not isinstance(seed, int) or isinstance(seed, bool):
        errors.append("seed must be an integer")
    generator_seed = entry.get("generator_seed")
    if generator_seed is not None and (not isinstance(generator_seed, int) or isinstance(generator_seed, bool)):
        errors.append("generator_seed must be an integer")

    result = entry.get("result")
    if result not in VALID_RESULTS:
        errors.append(f"result must be one of {sorted(VALID_RESULTS)}")

    decision = entry.get("decision")
    if decision not in VALID_DECISIONS:
        errors.append(f"decision must be one of {sorted(VALID_DECISIONS)}")

    _require_text(errors, entry, "baseline_prompt_summary", 20)
    _require_text(errors, entry, "candidate_prompt_summary", 20)
    _require_text(errors, entry, "observation", 30)

    for image_key in ("baseline_image", "candidate_image"):
        image_path = _text(entry.get(image_key)).strip()
        if not image_path:
            continue
        path = Path(image_path)
        if not path.is_absolute():
            errors.append(f"{image_key} must be absolute when present")
        if path.suffix.lower() != ".png":
            errors.append(f"{image_key} must reference a PNG artifact")

    return errors


def save_eval_log(log: dict[str, Any], *, path: str | Path | None = None) -> None:
    target = Path(path or DEFAULT_EVAL_LOG_PATH)
    target.write_text(json.dumps(log, ensure_ascii=True, indent=2) + "\n", encoding="utf-8")
    clear_cache()


def append_entry(
    entry: dict[str, Any],
    *,
    path: str | Path | None = None,
    catalog_path: str | Path | None = None,
    dry_run: bool = False,
) -> dict[str, Any]:
    try:
        from . import krea2_pose_variant_catalog
    except ImportError:  # Allows local smoke tests from the repository root.
        import krea2_pose_variant_catalog

    log = load_eval_log(path)
    rows = log.get("entries")
    if not isinstance(rows, list):
        rows = []
        log["entries"] = rows
    new_entry = copy.deepcopy(entry)
    errors = validate_entry(
        new_entry,
        existing_entries=[row for row in rows if isinstance(row, dict)],
        catalog_keys=set(krea2_pose_variant_catalog.variant_keys(path=catalog_path)),
    )
    if errors:
        raise ValueError("; ".join(errors))
    rows.append(new_entry)
    if not dry_run:
        save_eval_log(log, path=path)
    return copy.deepcopy(log)


def entries(
    *,
    variant_key: str | None = None,
    result: str | None = None,
    decision: str | None = None,
    path: str | Path | None = None,
) -> list[dict[str, Any]]:
    log = load_eval_log(path)
    rows = log.get("entries") or []
    if not isinstance(rows, list):
        return []
    filtered: list[dict[str, Any]] = []
    for row in rows:
        if not isinstance(row, dict):
            continue
        if variant_key is not None and row.get("variant_key") != variant_key:
            continue
        if result is not None and row.get("result") != result:
            continue
        if decision is not None and row.get("decision") != decision:
            continue
        filtered.append(row)
    return filtered


def entries_for_variant(
    variant_key: str,
    *,
    result: str | None = None,
    decision: str | None = None,
    path: str | Path | None = None,
) -> list[dict[str, Any]]:
    return entries(variant_key=variant_key, result=result, decision=decision, path=path)


def variant_keys(
    *,
    result: str | None = None,
    decision: str | None = None,
    path: str | Path | None = None,
) -> list[str]:
    keys: list[str] = []
    for row in entries(result=result, decision=decision, path=path):
        key = row.get("variant_key")
        if key and key not in keys:
            keys.append(str(key))
    return keys