diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index ec7561d..79b3e66 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -225,6 +225,11 @@ Each generated row stores `generation_trace.seed_axes` in `metadata_json`. Use it to verify whether an axis followed the main seed or a configured seed, and to compare the exact per-axis RNG seed used for the row. +`tools/prompt_map_audit.py` includes a runtime metadata route check. It builds a +representative single row and Insta/OF pair, verifies embedded +`generation_trace` fields, and confirms Krea2, SDXL, and caption formatters +consume metadata JSON instead of silently falling back to raw prompt text. + ## Category Sources There are two category systems. diff --git a/tools/prompt_map_audit.py b/tools/prompt_map_audit.py index 9400856..be74eaa 100644 --- a/tools/prompt_map_audit.py +++ b/tools/prompt_map_audit.py @@ -1,8 +1,10 @@ #!/usr/bin/env python3 """Print a lightweight audit for the prompt routing map. -This intentionally avoids importing the ComfyUI node package. It parses Python -and JSON files directly, so it can run in a plain shell without ComfyUI loaded. +This intentionally avoids importing the ComfyUI node package. Static checks +parse Python and JSON files directly, while runtime checks import only the pure +generator/formatter modules so the audit can run in a plain shell without +ComfyUI loaded. """ from __future__ import annotations @@ -20,6 +22,10 @@ if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) import category_template_metadata as template_metadata_policy # noqa: E402 +import caption_naturalizer # noqa: E402 +import krea_formatter # noqa: E402 +import prompt_builder as pb # noqa: E402 +import sdxl_formatter # noqa: E402 POOL_DEFINITION_KEYS = ("scene_pools", "expression_pools", "composition_pools") POOL_REFERENCE_KEYS = { @@ -413,6 +419,189 @@ def _routing_doc_errors() -> list[tuple[str, str, str]]: return errors +def _json_dumps(value: Any) -> str: + return json.dumps(value, ensure_ascii=True, sort_keys=True) + + +def _expect_runtime(condition: bool, location: str, issue: str, errors: list[tuple[str, str, str]]) -> None: + if not condition: + errors.append(("runtime", location, issue)) + + +def _trace_seed_axis(trace: dict[str, Any], axis: str) -> dict[str, Any]: + seed_axes = trace.get("seed_axes") + if not isinstance(seed_axes, dict): + return {} + axis_trace = seed_axes.get(axis) + return axis_trace if isinstance(axis_trace, dict) else {} + + +def _row_trace_errors(row: dict[str, Any], location: str, errors: list[tuple[str, str, str]]) -> None: + trace = row.get("generation_trace") + _expect_runtime(isinstance(trace, dict), location, "missing generation_trace", errors) + if not isinstance(trace, dict): + return + for key in ("builder", "branch", "source", "category", "subcategory", "seed", "seed_axes"): + _expect_runtime(key in trace, f"{location}.generation_trace", f"missing {key}", errors) + _expect_runtime(trace.get("builder") == "prompt_builder", f"{location}.generation_trace.builder", "unexpected builder", errors) + _expect_runtime(isinstance(trace.get("seed_axes"), dict), f"{location}.generation_trace.seed_axes", "seed_axes must be an object", errors) + for axis in ("content", "person", "scene", "pose", "role", "expression", "composition"): + axis_trace = _trace_seed_axis(trace, axis) + _expect_runtime(bool(axis_trace), f"{location}.generation_trace.seed_axes.{axis}", "missing axis trace", errors) + if axis_trace: + _expect_runtime(axis_trace.get("source") in {"main", "configured"}, f"{location}.generation_trace.seed_axes.{axis}.source", "invalid seed source", errors) + _expect_runtime(isinstance(axis_trace.get("seed"), int), f"{location}.generation_trace.seed_axes.{axis}.seed", "seed must be an integer", errors) + _expect_runtime(isinstance(axis_trace.get("rng_seed"), int), f"{location}.generation_trace.seed_axes.{axis}.rng_seed", "rng_seed must be an integer", errors) + + +def _formatter_trace_errors( + metadata_json: str, + location: str, + errors: list[tuple[str, str, str]], + *, + target: str = "auto", +) -> None: + valid_metadata_branches = {"metadata", "metadata(single)", "insta_of_pair"} + krea = krea_formatter.format_krea2_prompt("", metadata_json=metadata_json, input_hint="metadata_json", target=target) + _expect_runtime( + "metadata" in str(krea.get("method") or ""), + f"{location}.krea.method", + f"formatter did not consume metadata: {krea.get('method')}", + errors, + ) + krea_trace_text = str(krea.get("route_trace_json") or "") + _expect_runtime(bool(krea_trace_text), f"{location}.krea.route_trace_json", "missing route trace", errors) + if krea_trace_text: + try: + krea_trace = json.loads(krea_trace_text) + except json.JSONDecodeError as exc: + errors.append(("runtime", f"{location}.krea.route_trace_json", f"invalid JSON: {exc}")) + else: + _expect_runtime(krea_trace.get("formatter") == "krea2", f"{location}.krea.route_trace_json.formatter", "unexpected formatter", errors) + _expect_runtime(krea_trace.get("branch") in valid_metadata_branches, f"{location}.krea.route_trace_json.branch", "unexpected branch", errors) + _expect_runtime(krea_trace.get("input_hint") == "metadata_json", f"{location}.krea.route_trace_json.input_hint", "unexpected input hint", errors) + + sdxl = sdxl_formatter.format_sdxl_prompt("", metadata_json=metadata_json, input_hint="metadata_json", target=target) + _expect_runtime( + "metadata" in str(sdxl.get("method") or ""), + f"{location}.sdxl.method", + f"formatter did not consume metadata: {sdxl.get('method')}", + errors, + ) + sdxl_trace_text = str(sdxl.get("route_trace_json") or "") + _expect_runtime(bool(sdxl_trace_text), f"{location}.sdxl.route_trace_json", "missing route trace", errors) + if sdxl_trace_text: + try: + sdxl_trace = json.loads(sdxl_trace_text) + except json.JSONDecodeError as exc: + errors.append(("runtime", f"{location}.sdxl.route_trace_json", f"invalid JSON: {exc}")) + else: + _expect_runtime(sdxl_trace.get("formatter") == "sdxl", f"{location}.sdxl.route_trace_json.formatter", "unexpected formatter", errors) + _expect_runtime(sdxl_trace.get("branch") in valid_metadata_branches, f"{location}.sdxl.route_trace_json.branch", "unexpected branch", errors) + _expect_runtime(sdxl_trace.get("input_hint") == "metadata_json", f"{location}.sdxl.route_trace_json.input_hint", "unexpected input hint", errors) + + caption, caption_method, caption_trace_text = caption_naturalizer.naturalize_caption_with_trace( + "", + metadata_json=metadata_json, + input_hint="metadata_json", + target=target, + ) + _expect_runtime(bool(caption.strip()), f"{location}.caption", "caption output is empty", errors) + _expect_runtime( + "metadata" in str(caption_method or ""), + f"{location}.caption.method", + f"formatter did not consume metadata: {caption_method}", + errors, + ) + _expect_runtime(bool(caption_trace_text), f"{location}.caption.route_trace_json", "missing route trace", errors) + if caption_trace_text: + try: + caption_trace = json.loads(caption_trace_text) + except json.JSONDecodeError as exc: + errors.append(("runtime", f"{location}.caption.route_trace_json", f"invalid JSON: {exc}")) + else: + _expect_runtime(caption_trace.get("formatter") == "caption", f"{location}.caption.route_trace_json.formatter", "unexpected formatter", errors) + _expect_runtime(caption_trace.get("branch") in valid_metadata_branches, f"{location}.caption.route_trace_json.branch", "unexpected branch", errors) + _expect_runtime(caption_trace.get("input_hint") == "metadata_json", f"{location}.caption.route_trace_json.input_hint", "unexpected input hint", errors) + + +def _runtime_metadata_errors() -> list[tuple[str, str, str]]: + errors: list[tuple[str, str, str]] = [] + + seed_lock = pb.build_seed_lock_config_json(base_seed=4101, reroll_axis="scene", reroll_seed=4102) + row = pb.build_prompt( + category="Casual clothes", + subcategory="Casual clothes / Smart casual", + row_number=2, + start_index=5, + seed=4101, + clothing="random", + ethnicity="french_european", + poses="random", + backside_bias=0.25, + figure="random", + no_plus_women=False, + no_black=False, + minimal_clothing_ratio=0.35, + standard_pose_ratio=0.4, + trigger="sxcppnl7", + prepend_trigger_to_prompt=True, + extra_positive="", + extra_negative="", + seed_config=seed_lock, + women_count=1, + men_count=0, + ) + _row_trace_errors(row, "build_prompt.row", errors) + trace = row.get("generation_trace") if isinstance(row.get("generation_trace"), dict) else {} + _expect_runtime(trace.get("branch") == "custom", "build_prompt.row.generation_trace.branch", "expected custom branch", errors) + _expect_runtime(trace.get("source") == "json_category", "build_prompt.row.generation_trace.source", "expected JSON category source", errors) + scene_axis = _trace_seed_axis(trace, "scene") + _expect_runtime(scene_axis.get("source") == "configured", "build_prompt.row.generation_trace.seed_axes.scene.source", "expected configured scene seed", errors) + _expect_runtime(scene_axis.get("seed") == 4102, "build_prompt.row.generation_trace.seed_axes.scene.seed", "expected scene reroll seed", errors) + _formatter_trace_errors(_json_dumps(row), "build_prompt.row", errors) + + pair_seed_lock = pb.build_seed_lock_config_json(base_seed=4201, reroll_axis="pose", reroll_seed=4202) + pair_options = pb.build_insta_of_options_json( + softcore_cast="same_as_hardcore", + hardcore_cast="couple", + hardcore_women_count=1, + hardcore_men_count=1, + hardcore_clothing_continuity="explicit_nude", + hardcore_camera_mode="standard", + camera_detail="off", + ) + position_config = pb.build_hardcore_position_pool_json(family="penetration") + pair = pb.build_insta_of_pair( + row_number=1, + start_index=1, + seed=4201, + ethnicity="french_european", + figure="random", + no_plus_women=False, + no_black=False, + trigger="sxcppnl7", + prepend_trigger_to_prompt=True, + seed_config=pair_seed_lock, + options_json=pair_options, + hardcore_position_config=position_config, + ) + _expect_runtime(pair.get("mode") == "Insta/OF", "build_insta_of_pair.mode", "expected Insta/OF pair metadata", errors) + soft_row = pair.get("softcore_row") if isinstance(pair.get("softcore_row"), dict) else {} + hard_row = pair.get("hardcore_row") if isinstance(pair.get("hardcore_row"), dict) else {} + _row_trace_errors(soft_row, "build_insta_of_pair.softcore_row", errors) + _row_trace_errors(hard_row, "build_insta_of_pair.hardcore_row", errors) + hard_trace = hard_row.get("generation_trace") if isinstance(hard_row.get("generation_trace"), dict) else {} + _expect_runtime(hard_trace.get("category_slug") == "hardcore_sexual_poses", "build_insta_of_pair.hardcore_row.generation_trace.category_slug", "expected hardcore pose category", errors) + _expect_runtime(hard_trace.get("content_seed_axis") == "pose", "build_insta_of_pair.hardcore_row.generation_trace.content_seed_axis", "expected pose-driven hardcore content axis", errors) + pose_axis = _trace_seed_axis(hard_trace, "pose") + _expect_runtime(pose_axis.get("source") == "configured", "build_insta_of_pair.hardcore_row.generation_trace.seed_axes.pose.source", "expected configured pose seed", errors) + _expect_runtime(pose_axis.get("seed") == 4202, "build_insta_of_pair.hardcore_row.generation_trace.seed_axes.pose.seed", "expected pose reroll seed", errors) + _formatter_trace_errors(_json_dumps(pair), "build_insta_of_pair", errors, target="hardcore") + + return errors + + def print_table(headers: tuple[str, ...], rows: list[tuple[Any, ...]]) -> None: widths = [len(header) for header in headers] for row in rows: @@ -499,6 +688,13 @@ def main() -> int: print_table(("Module", "Location", "Issue"), routing_doc_errors) return 1 print("OK: critical route modules are documented and covered by smoke cases.") + + print("\n# Runtime Metadata Route Validation") + runtime_metadata_errors = _runtime_metadata_errors() + if runtime_metadata_errors: + print_table(("Source", "Location", "Issue"), runtime_metadata_errors) + return 1 + print("OK: builder rows, pair rows, and formatter traces preserve metadata routes.") return 0