Audit runtime metadata route traces

2026-06-27 15:30:40 +02:00
parent 607c2b8751
commit bb7df8ad77
2 changed files with 203 additions and 2 deletions
@@ -225,6 +225,11 @@ Each generated row stores `generation_trace.seed_axes` in `metadata_json`.
 Use it to verify whether an axis followed the main seed or a configured seed,
 and to compare the exact per-axis RNG seed used for the row.
 `tools/prompt_map_audit.py` includes a runtime metadata route check. It builds a
 representative single row and Insta/OF pair, verifies embedded
 `generation_trace` fields, and confirms Krea2, SDXL, and caption formatters
 consume metadata JSON instead of silently falling back to raw prompt text.
 ## Category Sources
 There are two category systems.
@@ -1,8 +1,10 @@
 #!/usr/bin/env python3
 """Print a lightweight audit for the prompt routing map.
-This intentionally avoids importing the ComfyUI node package. It parses Python
+This intentionally avoids importing the ComfyUI node package. Static checks
-and JSON files directly, so it can run in a plain shell without ComfyUI loaded.
+parse Python and JSON files directly, while runtime checks import only the pure
 generator/formatter modules so the audit can run in a plain shell without
 ComfyUI loaded.
 """
 from __future__ import annotations
@@ -20,6 +22,10 @@ if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))
 import category_template_metadata as template_metadata_policy  # noqa: E402
 import caption_naturalizer  # noqa: E402
 import krea_formatter  # noqa: E402
 import prompt_builder as pb  # noqa: E402
 import sdxl_formatter  # noqa: E402
 POOL_DEFINITION_KEYS = ("scene_pools", "expression_pools", "composition_pools")
 POOL_REFERENCE_KEYS = {
@@ -413,6 +419,189 @@ def _routing_doc_errors() -> list[tuple[str, str, str]]:
    return errors
 def _json_dumps(value: Any) -> str:
    return json.dumps(value, ensure_ascii=True, sort_keys=True)
 def _expect_runtime(condition: bool, location: str, issue: str, errors: list[tuple[str, str, str]]) -> None:
    if not condition:
        errors.append(("runtime", location, issue))
 def _trace_seed_axis(trace: dict[str, Any], axis: str) -> dict[str, Any]:
    seed_axes = trace.get("seed_axes")
    if not isinstance(seed_axes, dict):
        return {}
    axis_trace = seed_axes.get(axis)
    return axis_trace if isinstance(axis_trace, dict) else {}
 def _row_trace_errors(row: dict[str, Any], location: str, errors: list[tuple[str, str, str]]) -> None:
    trace = row.get("generation_trace")
    _expect_runtime(isinstance(trace, dict), location, "missing generation_trace", errors)
    if not isinstance(trace, dict):
        return
    for key in ("builder", "branch", "source", "category", "subcategory", "seed", "seed_axes"):
        _expect_runtime(key in trace, f"{location}.generation_trace", f"missing {key}", errors)
    _expect_runtime(trace.get("builder") == "prompt_builder", f"{location}.generation_trace.builder", "unexpected builder", errors)
    _expect_runtime(isinstance(trace.get("seed_axes"), dict), f"{location}.generation_trace.seed_axes", "seed_axes must be an object", errors)
    for axis in ("content", "person", "scene", "pose", "role", "expression", "composition"):
        axis_trace = _trace_seed_axis(trace, axis)
        _expect_runtime(bool(axis_trace), f"{location}.generation_trace.seed_axes.{axis}", "missing axis trace", errors)
        if axis_trace:
            _expect_runtime(axis_trace.get("source") in {"main", "configured"}, f"{location}.generation_trace.seed_axes.{axis}.source", "invalid seed source", errors)
            _expect_runtime(isinstance(axis_trace.get("seed"), int), f"{location}.generation_trace.seed_axes.{axis}.seed", "seed must be an integer", errors)
            _expect_runtime(isinstance(axis_trace.get("rng_seed"), int), f"{location}.generation_trace.seed_axes.{axis}.rng_seed", "rng_seed must be an integer", errors)
 def _formatter_trace_errors(
    metadata_json: str,
    location: str,
    errors: list[tuple[str, str, str]],
    *,
    target: str = "auto",
 ) -> None:
    valid_metadata_branches = {"metadata", "metadata(single)", "insta_of_pair"}
    krea = krea_formatter.format_krea2_prompt("", metadata_json=metadata_json, input_hint="metadata_json", target=target)
    _expect_runtime(
        "metadata" in str(krea.get("method") or ""),
        f"{location}.krea.method",
        f"formatter did not consume metadata: {krea.get('method')}",
        errors,
    )
    krea_trace_text = str(krea.get("route_trace_json") or "")
    _expect_runtime(bool(krea_trace_text), f"{location}.krea.route_trace_json", "missing route trace", errors)
    if krea_trace_text:
        try:
            krea_trace = json.loads(krea_trace_text)
        except json.JSONDecodeError as exc:
            errors.append(("runtime", f"{location}.krea.route_trace_json", f"invalid JSON: {exc}"))
        else:
            _expect_runtime(krea_trace.get("formatter") == "krea2", f"{location}.krea.route_trace_json.formatter", "unexpected formatter", errors)
            _expect_runtime(krea_trace.get("branch") in valid_metadata_branches, f"{location}.krea.route_trace_json.branch", "unexpected branch", errors)
            _expect_runtime(krea_trace.get("input_hint") == "metadata_json", f"{location}.krea.route_trace_json.input_hint", "unexpected input hint", errors)
    sdxl = sdxl_formatter.format_sdxl_prompt("", metadata_json=metadata_json, input_hint="metadata_json", target=target)
    _expect_runtime(
        "metadata" in str(sdxl.get("method") or ""),
        f"{location}.sdxl.method",
        f"formatter did not consume metadata: {sdxl.get('method')}",
        errors,
    )
    sdxl_trace_text = str(sdxl.get("route_trace_json") or "")
    _expect_runtime(bool(sdxl_trace_text), f"{location}.sdxl.route_trace_json", "missing route trace", errors)
    if sdxl_trace_text:
        try:
            sdxl_trace = json.loads(sdxl_trace_text)
        except json.JSONDecodeError as exc:
            errors.append(("runtime", f"{location}.sdxl.route_trace_json", f"invalid JSON: {exc}"))
        else:
            _expect_runtime(sdxl_trace.get("formatter") == "sdxl", f"{location}.sdxl.route_trace_json.formatter", "unexpected formatter", errors)
            _expect_runtime(sdxl_trace.get("branch") in valid_metadata_branches, f"{location}.sdxl.route_trace_json.branch", "unexpected branch", errors)
            _expect_runtime(sdxl_trace.get("input_hint") == "metadata_json", f"{location}.sdxl.route_trace_json.input_hint", "unexpected input hint", errors)
    caption, caption_method, caption_trace_text = caption_naturalizer.naturalize_caption_with_trace(
        "",
        metadata_json=metadata_json,
        input_hint="metadata_json",
        target=target,
    )
    _expect_runtime(bool(caption.strip()), f"{location}.caption", "caption output is empty", errors)
    _expect_runtime(
        "metadata" in str(caption_method or ""),
        f"{location}.caption.method",
        f"formatter did not consume metadata: {caption_method}",
        errors,
    )
    _expect_runtime(bool(caption_trace_text), f"{location}.caption.route_trace_json", "missing route trace", errors)
    if caption_trace_text:
        try:
            caption_trace = json.loads(caption_trace_text)
        except json.JSONDecodeError as exc:
            errors.append(("runtime", f"{location}.caption.route_trace_json", f"invalid JSON: {exc}"))
        else:
            _expect_runtime(caption_trace.get("formatter") == "caption", f"{location}.caption.route_trace_json.formatter", "unexpected formatter", errors)
            _expect_runtime(caption_trace.get("branch") in valid_metadata_branches, f"{location}.caption.route_trace_json.branch", "unexpected branch", errors)
            _expect_runtime(caption_trace.get("input_hint") == "metadata_json", f"{location}.caption.route_trace_json.input_hint", "unexpected input hint", errors)
 def _runtime_metadata_errors() -> list[tuple[str, str, str]]:
    errors: list[tuple[str, str, str]] = []
    seed_lock = pb.build_seed_lock_config_json(base_seed=4101, reroll_axis="scene", reroll_seed=4102)
    row = pb.build_prompt(
        category="Casual clothes",
        subcategory="Casual clothes / Smart casual",
        row_number=2,
        start_index=5,
        seed=4101,
        clothing="random",
        ethnicity="french_european",
        poses="random",
        backside_bias=0.25,
        figure="random",
        no_plus_women=False,
        no_black=False,
        minimal_clothing_ratio=0.35,
        standard_pose_ratio=0.4,
        trigger="sxcppnl7",
        prepend_trigger_to_prompt=True,
        extra_positive="",
        extra_negative="",
        seed_config=seed_lock,
        women_count=1,
        men_count=0,
    )
    _row_trace_errors(row, "build_prompt.row", errors)
    trace = row.get("generation_trace") if isinstance(row.get("generation_trace"), dict) else {}
    _expect_runtime(trace.get("branch") == "custom", "build_prompt.row.generation_trace.branch", "expected custom branch", errors)
    _expect_runtime(trace.get("source") == "json_category", "build_prompt.row.generation_trace.source", "expected JSON category source", errors)
    scene_axis = _trace_seed_axis(trace, "scene")
    _expect_runtime(scene_axis.get("source") == "configured", "build_prompt.row.generation_trace.seed_axes.scene.source", "expected configured scene seed", errors)
    _expect_runtime(scene_axis.get("seed") == 4102, "build_prompt.row.generation_trace.seed_axes.scene.seed", "expected scene reroll seed", errors)
    _formatter_trace_errors(_json_dumps(row), "build_prompt.row", errors)
    pair_seed_lock = pb.build_seed_lock_config_json(base_seed=4201, reroll_axis="pose", reroll_seed=4202)
    pair_options = pb.build_insta_of_options_json(
        softcore_cast="same_as_hardcore",
        hardcore_cast="couple",
        hardcore_women_count=1,
        hardcore_men_count=1,
        hardcore_clothing_continuity="explicit_nude",
        hardcore_camera_mode="standard",
        camera_detail="off",
    )
    position_config = pb.build_hardcore_position_pool_json(family="penetration")
    pair = pb.build_insta_of_pair(
        row_number=1,
        start_index=1,
        seed=4201,
        ethnicity="french_european",
        figure="random",
        no_plus_women=False,
        no_black=False,
        trigger="sxcppnl7",
        prepend_trigger_to_prompt=True,
        seed_config=pair_seed_lock,
        options_json=pair_options,
        hardcore_position_config=position_config,
    )
    _expect_runtime(pair.get("mode") == "Insta/OF", "build_insta_of_pair.mode", "expected Insta/OF pair metadata", errors)
    soft_row = pair.get("softcore_row") if isinstance(pair.get("softcore_row"), dict) else {}
    hard_row = pair.get("hardcore_row") if isinstance(pair.get("hardcore_row"), dict) else {}
    _row_trace_errors(soft_row, "build_insta_of_pair.softcore_row", errors)
    _row_trace_errors(hard_row, "build_insta_of_pair.hardcore_row", errors)
    hard_trace = hard_row.get("generation_trace") if isinstance(hard_row.get("generation_trace"), dict) else {}
    _expect_runtime(hard_trace.get("category_slug") == "hardcore_sexual_poses", "build_insta_of_pair.hardcore_row.generation_trace.category_slug", "expected hardcore pose category", errors)
    _expect_runtime(hard_trace.get("content_seed_axis") == "pose", "build_insta_of_pair.hardcore_row.generation_trace.content_seed_axis", "expected pose-driven hardcore content axis", errors)
    pose_axis = _trace_seed_axis(hard_trace, "pose")
    _expect_runtime(pose_axis.get("source") == "configured", "build_insta_of_pair.hardcore_row.generation_trace.seed_axes.pose.source", "expected configured pose seed", errors)
    _expect_runtime(pose_axis.get("seed") == 4202, "build_insta_of_pair.hardcore_row.generation_trace.seed_axes.pose.seed", "expected pose reroll seed", errors)
    _formatter_trace_errors(_json_dumps(pair), "build_insta_of_pair", errors, target="hardcore")
    return errors
 def print_table(headers: tuple[str, ...], rows: list[tuple[Any, ...]]) -> None:
    widths = [len(header) for header in headers]
    for row in rows:
@@ -499,6 +688,13 @@ def main() -> int:
        print_table(("Module", "Location", "Issue"), routing_doc_errors)
        return 1
    print("OK: critical route modules are documented and covered by smoke cases.")
    print("\n# Runtime Metadata Route Validation")
    runtime_metadata_errors = _runtime_metadata_errors()
    if runtime_metadata_errors:
        print_table(("Source", "Location", "Issue"), runtime_metadata_errors)
        return 1
    print("OK: builder rows, pair rows, and formatter traces preserve metadata routes.")
    return 0