Audit runtime metadata route traces

This commit is contained in:
2026-06-27 15:30:40 +02:00
parent 607c2b8751
commit bb7df8ad77
2 changed files with 203 additions and 2 deletions
+5
View File
@@ -225,6 +225,11 @@ Each generated row stores `generation_trace.seed_axes` in `metadata_json`.
Use it to verify whether an axis followed the main seed or a configured seed, Use it to verify whether an axis followed the main seed or a configured seed,
and to compare the exact per-axis RNG seed used for the row. and to compare the exact per-axis RNG seed used for the row.
`tools/prompt_map_audit.py` includes a runtime metadata route check. It builds a
representative single row and Insta/OF pair, verifies embedded
`generation_trace` fields, and confirms Krea2, SDXL, and caption formatters
consume metadata JSON instead of silently falling back to raw prompt text.
## Category Sources ## Category Sources
There are two category systems. There are two category systems.
+198 -2
View File
@@ -1,8 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""Print a lightweight audit for the prompt routing map. """Print a lightweight audit for the prompt routing map.
This intentionally avoids importing the ComfyUI node package. It parses Python This intentionally avoids importing the ComfyUI node package. Static checks
and JSON files directly, so it can run in a plain shell without ComfyUI loaded. parse Python and JSON files directly, while runtime checks import only the pure
generator/formatter modules so the audit can run in a plain shell without
ComfyUI loaded.
""" """
from __future__ import annotations from __future__ import annotations
@@ -20,6 +22,10 @@ if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT)) sys.path.insert(0, str(ROOT))
import category_template_metadata as template_metadata_policy # noqa: E402 import category_template_metadata as template_metadata_policy # noqa: E402
import caption_naturalizer # noqa: E402
import krea_formatter # noqa: E402
import prompt_builder as pb # noqa: E402
import sdxl_formatter # noqa: E402
POOL_DEFINITION_KEYS = ("scene_pools", "expression_pools", "composition_pools") POOL_DEFINITION_KEYS = ("scene_pools", "expression_pools", "composition_pools")
POOL_REFERENCE_KEYS = { POOL_REFERENCE_KEYS = {
@@ -413,6 +419,189 @@ def _routing_doc_errors() -> list[tuple[str, str, str]]:
return errors return errors
def _json_dumps(value: Any) -> str:
return json.dumps(value, ensure_ascii=True, sort_keys=True)
def _expect_runtime(condition: bool, location: str, issue: str, errors: list[tuple[str, str, str]]) -> None:
if not condition:
errors.append(("runtime", location, issue))
def _trace_seed_axis(trace: dict[str, Any], axis: str) -> dict[str, Any]:
seed_axes = trace.get("seed_axes")
if not isinstance(seed_axes, dict):
return {}
axis_trace = seed_axes.get(axis)
return axis_trace if isinstance(axis_trace, dict) else {}
def _row_trace_errors(row: dict[str, Any], location: str, errors: list[tuple[str, str, str]]) -> None:
trace = row.get("generation_trace")
_expect_runtime(isinstance(trace, dict), location, "missing generation_trace", errors)
if not isinstance(trace, dict):
return
for key in ("builder", "branch", "source", "category", "subcategory", "seed", "seed_axes"):
_expect_runtime(key in trace, f"{location}.generation_trace", f"missing {key}", errors)
_expect_runtime(trace.get("builder") == "prompt_builder", f"{location}.generation_trace.builder", "unexpected builder", errors)
_expect_runtime(isinstance(trace.get("seed_axes"), dict), f"{location}.generation_trace.seed_axes", "seed_axes must be an object", errors)
for axis in ("content", "person", "scene", "pose", "role", "expression", "composition"):
axis_trace = _trace_seed_axis(trace, axis)
_expect_runtime(bool(axis_trace), f"{location}.generation_trace.seed_axes.{axis}", "missing axis trace", errors)
if axis_trace:
_expect_runtime(axis_trace.get("source") in {"main", "configured"}, f"{location}.generation_trace.seed_axes.{axis}.source", "invalid seed source", errors)
_expect_runtime(isinstance(axis_trace.get("seed"), int), f"{location}.generation_trace.seed_axes.{axis}.seed", "seed must be an integer", errors)
_expect_runtime(isinstance(axis_trace.get("rng_seed"), int), f"{location}.generation_trace.seed_axes.{axis}.rng_seed", "rng_seed must be an integer", errors)
def _formatter_trace_errors(
metadata_json: str,
location: str,
errors: list[tuple[str, str, str]],
*,
target: str = "auto",
) -> None:
valid_metadata_branches = {"metadata", "metadata(single)", "insta_of_pair"}
krea = krea_formatter.format_krea2_prompt("", metadata_json=metadata_json, input_hint="metadata_json", target=target)
_expect_runtime(
"metadata" in str(krea.get("method") or ""),
f"{location}.krea.method",
f"formatter did not consume metadata: {krea.get('method')}",
errors,
)
krea_trace_text = str(krea.get("route_trace_json") or "")
_expect_runtime(bool(krea_trace_text), f"{location}.krea.route_trace_json", "missing route trace", errors)
if krea_trace_text:
try:
krea_trace = json.loads(krea_trace_text)
except json.JSONDecodeError as exc:
errors.append(("runtime", f"{location}.krea.route_trace_json", f"invalid JSON: {exc}"))
else:
_expect_runtime(krea_trace.get("formatter") == "krea2", f"{location}.krea.route_trace_json.formatter", "unexpected formatter", errors)
_expect_runtime(krea_trace.get("branch") in valid_metadata_branches, f"{location}.krea.route_trace_json.branch", "unexpected branch", errors)
_expect_runtime(krea_trace.get("input_hint") == "metadata_json", f"{location}.krea.route_trace_json.input_hint", "unexpected input hint", errors)
sdxl = sdxl_formatter.format_sdxl_prompt("", metadata_json=metadata_json, input_hint="metadata_json", target=target)
_expect_runtime(
"metadata" in str(sdxl.get("method") or ""),
f"{location}.sdxl.method",
f"formatter did not consume metadata: {sdxl.get('method')}",
errors,
)
sdxl_trace_text = str(sdxl.get("route_trace_json") or "")
_expect_runtime(bool(sdxl_trace_text), f"{location}.sdxl.route_trace_json", "missing route trace", errors)
if sdxl_trace_text:
try:
sdxl_trace = json.loads(sdxl_trace_text)
except json.JSONDecodeError as exc:
errors.append(("runtime", f"{location}.sdxl.route_trace_json", f"invalid JSON: {exc}"))
else:
_expect_runtime(sdxl_trace.get("formatter") == "sdxl", f"{location}.sdxl.route_trace_json.formatter", "unexpected formatter", errors)
_expect_runtime(sdxl_trace.get("branch") in valid_metadata_branches, f"{location}.sdxl.route_trace_json.branch", "unexpected branch", errors)
_expect_runtime(sdxl_trace.get("input_hint") == "metadata_json", f"{location}.sdxl.route_trace_json.input_hint", "unexpected input hint", errors)
caption, caption_method, caption_trace_text = caption_naturalizer.naturalize_caption_with_trace(
"",
metadata_json=metadata_json,
input_hint="metadata_json",
target=target,
)
_expect_runtime(bool(caption.strip()), f"{location}.caption", "caption output is empty", errors)
_expect_runtime(
"metadata" in str(caption_method or ""),
f"{location}.caption.method",
f"formatter did not consume metadata: {caption_method}",
errors,
)
_expect_runtime(bool(caption_trace_text), f"{location}.caption.route_trace_json", "missing route trace", errors)
if caption_trace_text:
try:
caption_trace = json.loads(caption_trace_text)
except json.JSONDecodeError as exc:
errors.append(("runtime", f"{location}.caption.route_trace_json", f"invalid JSON: {exc}"))
else:
_expect_runtime(caption_trace.get("formatter") == "caption", f"{location}.caption.route_trace_json.formatter", "unexpected formatter", errors)
_expect_runtime(caption_trace.get("branch") in valid_metadata_branches, f"{location}.caption.route_trace_json.branch", "unexpected branch", errors)
_expect_runtime(caption_trace.get("input_hint") == "metadata_json", f"{location}.caption.route_trace_json.input_hint", "unexpected input hint", errors)
def _runtime_metadata_errors() -> list[tuple[str, str, str]]:
errors: list[tuple[str, str, str]] = []
seed_lock = pb.build_seed_lock_config_json(base_seed=4101, reroll_axis="scene", reroll_seed=4102)
row = pb.build_prompt(
category="Casual clothes",
subcategory="Casual clothes / Smart casual",
row_number=2,
start_index=5,
seed=4101,
clothing="random",
ethnicity="french_european",
poses="random",
backside_bias=0.25,
figure="random",
no_plus_women=False,
no_black=False,
minimal_clothing_ratio=0.35,
standard_pose_ratio=0.4,
trigger="sxcppnl7",
prepend_trigger_to_prompt=True,
extra_positive="",
extra_negative="",
seed_config=seed_lock,
women_count=1,
men_count=0,
)
_row_trace_errors(row, "build_prompt.row", errors)
trace = row.get("generation_trace") if isinstance(row.get("generation_trace"), dict) else {}
_expect_runtime(trace.get("branch") == "custom", "build_prompt.row.generation_trace.branch", "expected custom branch", errors)
_expect_runtime(trace.get("source") == "json_category", "build_prompt.row.generation_trace.source", "expected JSON category source", errors)
scene_axis = _trace_seed_axis(trace, "scene")
_expect_runtime(scene_axis.get("source") == "configured", "build_prompt.row.generation_trace.seed_axes.scene.source", "expected configured scene seed", errors)
_expect_runtime(scene_axis.get("seed") == 4102, "build_prompt.row.generation_trace.seed_axes.scene.seed", "expected scene reroll seed", errors)
_formatter_trace_errors(_json_dumps(row), "build_prompt.row", errors)
pair_seed_lock = pb.build_seed_lock_config_json(base_seed=4201, reroll_axis="pose", reroll_seed=4202)
pair_options = pb.build_insta_of_options_json(
softcore_cast="same_as_hardcore",
hardcore_cast="couple",
hardcore_women_count=1,
hardcore_men_count=1,
hardcore_clothing_continuity="explicit_nude",
hardcore_camera_mode="standard",
camera_detail="off",
)
position_config = pb.build_hardcore_position_pool_json(family="penetration")
pair = pb.build_insta_of_pair(
row_number=1,
start_index=1,
seed=4201,
ethnicity="french_european",
figure="random",
no_plus_women=False,
no_black=False,
trigger="sxcppnl7",
prepend_trigger_to_prompt=True,
seed_config=pair_seed_lock,
options_json=pair_options,
hardcore_position_config=position_config,
)
_expect_runtime(pair.get("mode") == "Insta/OF", "build_insta_of_pair.mode", "expected Insta/OF pair metadata", errors)
soft_row = pair.get("softcore_row") if isinstance(pair.get("softcore_row"), dict) else {}
hard_row = pair.get("hardcore_row") if isinstance(pair.get("hardcore_row"), dict) else {}
_row_trace_errors(soft_row, "build_insta_of_pair.softcore_row", errors)
_row_trace_errors(hard_row, "build_insta_of_pair.hardcore_row", errors)
hard_trace = hard_row.get("generation_trace") if isinstance(hard_row.get("generation_trace"), dict) else {}
_expect_runtime(hard_trace.get("category_slug") == "hardcore_sexual_poses", "build_insta_of_pair.hardcore_row.generation_trace.category_slug", "expected hardcore pose category", errors)
_expect_runtime(hard_trace.get("content_seed_axis") == "pose", "build_insta_of_pair.hardcore_row.generation_trace.content_seed_axis", "expected pose-driven hardcore content axis", errors)
pose_axis = _trace_seed_axis(hard_trace, "pose")
_expect_runtime(pose_axis.get("source") == "configured", "build_insta_of_pair.hardcore_row.generation_trace.seed_axes.pose.source", "expected configured pose seed", errors)
_expect_runtime(pose_axis.get("seed") == 4202, "build_insta_of_pair.hardcore_row.generation_trace.seed_axes.pose.seed", "expected pose reroll seed", errors)
_formatter_trace_errors(_json_dumps(pair), "build_insta_of_pair", errors, target="hardcore")
return errors
def print_table(headers: tuple[str, ...], rows: list[tuple[Any, ...]]) -> None: def print_table(headers: tuple[str, ...], rows: list[tuple[Any, ...]]) -> None:
widths = [len(header) for header in headers] widths = [len(header) for header in headers]
for row in rows: for row in rows:
@@ -499,6 +688,13 @@ def main() -> int:
print_table(("Module", "Location", "Issue"), routing_doc_errors) print_table(("Module", "Location", "Issue"), routing_doc_errors)
return 1 return 1
print("OK: critical route modules are documented and covered by smoke cases.") print("OK: critical route modules are documented and covered by smoke cases.")
print("\n# Runtime Metadata Route Validation")
runtime_metadata_errors = _runtime_metadata_errors()
if runtime_metadata_errors:
print_table(("Source", "Location", "Issue"), runtime_metadata_errors)
return 1
print("OK: builder rows, pair rows, and formatter traces preserve metadata routes.")
return 0 return 0