Audit runtime metadata route traces

2026-06-27 15:30:40 +02:00
parent 607c2b8751
commit bb7df8ad77
2 changed files with 203 additions and 2 deletions
@@ -225,6 +225,11 @@ Each generated row stores `generation_trace.seed_axes` in `metadata_json`.
 Use it to verify whether an axis followed the main seed or a configured seed,
 and to compare the exact per-axis RNG seed used for the row.

+`tools/prompt_map_audit.py` includes a runtime metadata route check. It builds a
+representative single row and Insta/OF pair, verifies embedded
+`generation_trace` fields, and confirms Krea2, SDXL, and caption formatters
+consume metadata JSON instead of silently falling back to raw prompt text.
+
 ## Category Sources

 There are two category systems.
@@ -1,8 +1,10 @@
 #!/usr/bin/env python3
 """Print a lightweight audit for the prompt routing map.

-This intentionally avoids importing the ComfyUI node package. It parses Python
-and JSON files directly, so it can run in a plain shell without ComfyUI loaded.
+This intentionally avoids importing the ComfyUI node package. Static checks
+parse Python and JSON files directly, while runtime checks import only the pure
+generator/formatter modules so the audit can run in a plain shell without
+ComfyUI loaded.
 """

 from __future__ import annotations
@@ -20,6 +22,10 @@ if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

 import category_template_metadata as template_metadata_policy  # noqa: E402
+import caption_naturalizer  # noqa: E402
+import krea_formatter  # noqa: E402
+import prompt_builder as pb  # noqa: E402
+import sdxl_formatter  # noqa: E402

 POOL_DEFINITION_KEYS = ("scene_pools", "expression_pools", "composition_pools")
 POOL_REFERENCE_KEYS = {
@@ -413,6 +419,189 @@ def _routing_doc_errors() -> list[tuple[str, str, str]]:
    return errors


+def _json_dumps(value: Any) -> str:
+    return json.dumps(value, ensure_ascii=True, sort_keys=True)
+
+
+def _expect_runtime(condition: bool, location: str, issue: str, errors: list[tuple[str, str, str]]) -> None:
+    if not condition:
+        errors.append(("runtime", location, issue))
+
+
+def _trace_seed_axis(trace: dict[str, Any], axis: str) -> dict[str, Any]:
+    seed_axes = trace.get("seed_axes")
+    if not isinstance(seed_axes, dict):
+        return {}
+    axis_trace = seed_axes.get(axis)
+    return axis_trace if isinstance(axis_trace, dict) else {}
+
+
+def _row_trace_errors(row: dict[str, Any], location: str, errors: list[tuple[str, str, str]]) -> None:
+    trace = row.get("generation_trace")
+    _expect_runtime(isinstance(trace, dict), location, "missing generation_trace", errors)
+    if not isinstance(trace, dict):
+        return
+    for key in ("builder", "branch", "source", "category", "subcategory", "seed", "seed_axes"):
+        _expect_runtime(key in trace, f"{location}.generation_trace", f"missing {key}", errors)
+    _expect_runtime(trace.get("builder") == "prompt_builder", f"{location}.generation_trace.builder", "unexpected builder", errors)
+    _expect_runtime(isinstance(trace.get("seed_axes"), dict), f"{location}.generation_trace.seed_axes", "seed_axes must be an object", errors)
+    for axis in ("content", "person", "scene", "pose", "role", "expression", "composition"):
+        axis_trace = _trace_seed_axis(trace, axis)
+        _expect_runtime(bool(axis_trace), f"{location}.generation_trace.seed_axes.{axis}", "missing axis trace", errors)
+        if axis_trace:
+            _expect_runtime(axis_trace.get("source") in {"main", "configured"}, f"{location}.generation_trace.seed_axes.{axis}.source", "invalid seed source", errors)
+            _expect_runtime(isinstance(axis_trace.get("seed"), int), f"{location}.generation_trace.seed_axes.{axis}.seed", "seed must be an integer", errors)
+            _expect_runtime(isinstance(axis_trace.get("rng_seed"), int), f"{location}.generation_trace.seed_axes.{axis}.rng_seed", "rng_seed must be an integer", errors)
+
+
+def _formatter_trace_errors(
+    metadata_json: str,
+    location: str,
+    errors: list[tuple[str, str, str]],
+    *,
+    target: str = "auto",
+) -> None:
+    valid_metadata_branches = {"metadata", "metadata(single)", "insta_of_pair"}
+    krea = krea_formatter.format_krea2_prompt("", metadata_json=metadata_json, input_hint="metadata_json", target=target)
+    _expect_runtime(
+        "metadata" in str(krea.get("method") or ""),
+        f"{location}.krea.method",
+        f"formatter did not consume metadata: {krea.get('method')}",
+        errors,
+    )
+    krea_trace_text = str(krea.get("route_trace_json") or "")
+    _expect_runtime(bool(krea_trace_text), f"{location}.krea.route_trace_json", "missing route trace", errors)
+    if krea_trace_text:
+        try:
+            krea_trace = json.loads(krea_trace_text)
+        except json.JSONDecodeError as exc:
+            errors.append(("runtime", f"{location}.krea.route_trace_json", f"invalid JSON: {exc}"))
+        else:
+            _expect_runtime(krea_trace.get("formatter") == "krea2", f"{location}.krea.route_trace_json.formatter", "unexpected formatter", errors)
+            _expect_runtime(krea_trace.get("branch") in valid_metadata_branches, f"{location}.krea.route_trace_json.branch", "unexpected branch", errors)
+            _expect_runtime(krea_trace.get("input_hint") == "metadata_json", f"{location}.krea.route_trace_json.input_hint", "unexpected input hint", errors)
+
+    sdxl = sdxl_formatter.format_sdxl_prompt("", metadata_json=metadata_json, input_hint="metadata_json", target=target)
+    _expect_runtime(
+        "metadata" in str(sdxl.get("method") or ""),
+        f"{location}.sdxl.method",
+        f"formatter did not consume metadata: {sdxl.get('method')}",
+        errors,
+    )
+    sdxl_trace_text = str(sdxl.get("route_trace_json") or "")
+    _expect_runtime(bool(sdxl_trace_text), f"{location}.sdxl.route_trace_json", "missing route trace", errors)
+    if sdxl_trace_text:
+        try:
+            sdxl_trace = json.loads(sdxl_trace_text)
+        except json.JSONDecodeError as exc:
+            errors.append(("runtime", f"{location}.sdxl.route_trace_json", f"invalid JSON: {exc}"))
+        else:
+            _expect_runtime(sdxl_trace.get("formatter") == "sdxl", f"{location}.sdxl.route_trace_json.formatter", "unexpected formatter", errors)
+            _expect_runtime(sdxl_trace.get("branch") in valid_metadata_branches, f"{location}.sdxl.route_trace_json.branch", "unexpected branch", errors)
+            _expect_runtime(sdxl_trace.get("input_hint") == "metadata_json", f"{location}.sdxl.route_trace_json.input_hint", "unexpected input hint", errors)
+
+    caption, caption_method, caption_trace_text = caption_naturalizer.naturalize_caption_with_trace(
+        "",
+        metadata_json=metadata_json,
+        input_hint="metadata_json",
+        target=target,
+    )
+    _expect_runtime(bool(caption.strip()), f"{location}.caption", "caption output is empty", errors)
+    _expect_runtime(
+        "metadata" in str(caption_method or ""),
+        f"{location}.caption.method",
+        f"formatter did not consume metadata: {caption_method}",
+        errors,
+    )
+    _expect_runtime(bool(caption_trace_text), f"{location}.caption.route_trace_json", "missing route trace", errors)
+    if caption_trace_text:
+        try:
+            caption_trace = json.loads(caption_trace_text)
+        except json.JSONDecodeError as exc:
+            errors.append(("runtime", f"{location}.caption.route_trace_json", f"invalid JSON: {exc}"))
+        else:
+            _expect_runtime(caption_trace.get("formatter") == "caption", f"{location}.caption.route_trace_json.formatter", "unexpected formatter", errors)
+            _expect_runtime(caption_trace.get("branch") in valid_metadata_branches, f"{location}.caption.route_trace_json.branch", "unexpected branch", errors)
+            _expect_runtime(caption_trace.get("input_hint") == "metadata_json", f"{location}.caption.route_trace_json.input_hint", "unexpected input hint", errors)
+
+
+def _runtime_metadata_errors() -> list[tuple[str, str, str]]:
+    errors: list[tuple[str, str, str]] = []
+
+    seed_lock = pb.build_seed_lock_config_json(base_seed=4101, reroll_axis="scene", reroll_seed=4102)
+    row = pb.build_prompt(
+        category="Casual clothes",
+        subcategory="Casual clothes / Smart casual",
+        row_number=2,
+        start_index=5,
+        seed=4101,
+        clothing="random",
+        ethnicity="french_european",
+        poses="random",
+        backside_bias=0.25,
+        figure="random",
+        no_plus_women=False,
+        no_black=False,
+        minimal_clothing_ratio=0.35,
+        standard_pose_ratio=0.4,
+        trigger="sxcppnl7",
+        prepend_trigger_to_prompt=True,
+        extra_positive="",
+        extra_negative="",
+        seed_config=seed_lock,
+        women_count=1,
+        men_count=0,
+    )
+    _row_trace_errors(row, "build_prompt.row", errors)
+    trace = row.get("generation_trace") if isinstance(row.get("generation_trace"), dict) else {}
+    _expect_runtime(trace.get("branch") == "custom", "build_prompt.row.generation_trace.branch", "expected custom branch", errors)
+    _expect_runtime(trace.get("source") == "json_category", "build_prompt.row.generation_trace.source", "expected JSON category source", errors)
+    scene_axis = _trace_seed_axis(trace, "scene")
+    _expect_runtime(scene_axis.get("source") == "configured", "build_prompt.row.generation_trace.seed_axes.scene.source", "expected configured scene seed", errors)
+    _expect_runtime(scene_axis.get("seed") == 4102, "build_prompt.row.generation_trace.seed_axes.scene.seed", "expected scene reroll seed", errors)
+    _formatter_trace_errors(_json_dumps(row), "build_prompt.row", errors)
+
+    pair_seed_lock = pb.build_seed_lock_config_json(base_seed=4201, reroll_axis="pose", reroll_seed=4202)
+    pair_options = pb.build_insta_of_options_json(
+        softcore_cast="same_as_hardcore",
+        hardcore_cast="couple",
+        hardcore_women_count=1,
+        hardcore_men_count=1,
+        hardcore_clothing_continuity="explicit_nude",
+        hardcore_camera_mode="standard",
+        camera_detail="off",
+    )
+    position_config = pb.build_hardcore_position_pool_json(family="penetration")
+    pair = pb.build_insta_of_pair(
+        row_number=1,
+        start_index=1,
+        seed=4201,
+        ethnicity="french_european",
+        figure="random",
+        no_plus_women=False,
+        no_black=False,
+        trigger="sxcppnl7",
+        prepend_trigger_to_prompt=True,
+        seed_config=pair_seed_lock,
+        options_json=pair_options,
+        hardcore_position_config=position_config,
+    )
+    _expect_runtime(pair.get("mode") == "Insta/OF", "build_insta_of_pair.mode", "expected Insta/OF pair metadata", errors)
+    soft_row = pair.get("softcore_row") if isinstance(pair.get("softcore_row"), dict) else {}
+    hard_row = pair.get("hardcore_row") if isinstance(pair.get("hardcore_row"), dict) else {}
+    _row_trace_errors(soft_row, "build_insta_of_pair.softcore_row", errors)
+    _row_trace_errors(hard_row, "build_insta_of_pair.hardcore_row", errors)
+    hard_trace = hard_row.get("generation_trace") if isinstance(hard_row.get("generation_trace"), dict) else {}
+    _expect_runtime(hard_trace.get("category_slug") == "hardcore_sexual_poses", "build_insta_of_pair.hardcore_row.generation_trace.category_slug", "expected hardcore pose category", errors)
+    _expect_runtime(hard_trace.get("content_seed_axis") == "pose", "build_insta_of_pair.hardcore_row.generation_trace.content_seed_axis", "expected pose-driven hardcore content axis", errors)
+    pose_axis = _trace_seed_axis(hard_trace, "pose")
+    _expect_runtime(pose_axis.get("source") == "configured", "build_insta_of_pair.hardcore_row.generation_trace.seed_axes.pose.source", "expected configured pose seed", errors)
+    _expect_runtime(pose_axis.get("seed") == 4202, "build_insta_of_pair.hardcore_row.generation_trace.seed_axes.pose.seed", "expected pose reroll seed", errors)
+    _formatter_trace_errors(_json_dumps(pair), "build_insta_of_pair", errors, target="hardcore")
+
+    return errors
+
+
 def print_table(headers: tuple[str, ...], rows: list[tuple[Any, ...]]) -> None:
    widths = [len(header) for header in headers]
    for row in rows:
@@ -499,6 +688,13 @@ def main() -> int:
        print_table(("Module", "Location", "Issue"), routing_doc_errors)
        return 1
    print("OK: critical route modules are documented and covered by smoke cases.")
+
+    print("\n# Runtime Metadata Route Validation")
+    runtime_metadata_errors = _runtime_metadata_errors()
+    if runtime_metadata_errors:
+        print_table(("Source", "Location", "Issue"), runtime_metadata_errors)
+        return 1
+    print("OK: builder rows, pair rows, and formatter traces preserve metadata routes.")
    return 0