diff --git a/caption_format_route.py b/caption_format_route.py index a27d9e9..d8875b1 100644 --- a/caption_format_route.py +++ b/caption_format_route.py @@ -88,6 +88,7 @@ def naturalize_caption_result( style_policy=style_policy, include_trigger=include_trigger, keep_style=keep_style, + **trace_policy.metadata_trace_fields(row, target=target), ) return CaptionFormatRoute( caption=caption, diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md index fb8cc0a..1c78cd4 100644 --- a/docs/prompt-architecture-improvement-plan.md +++ b/docs/prompt-architecture-improvement-plan.md @@ -29,6 +29,10 @@ The map audit currently sees: - Pair seed simulation, so Insta/OF soft/hard metadata and formatter outputs prove locked determinism and person/scene/content/pose/expression/ composition reroll behavior. +- Formatter route traces expose selected metadata fields, so Krea2, SDXL, and + caption outputs can be debugged by category, action/position family, selected + pair side, scene profile, position keys, and POV labels instead of only + proving that a metadata branch was used. ## Architectural Finding diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index bd89a3a..e7de255 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -242,7 +242,10 @@ and to compare the exact per-axis RNG seed used for the row. `tools/prompt_map_audit.py` includes a runtime metadata route check. It builds a representative single row and Insta/OF pair, verifies embedded `generation_trace` fields, and confirms Krea2, SDXL, and caption formatters -consume metadata JSON instead of silently falling back to raw prompt text. +consume metadata JSON instead of silently falling back to raw prompt text. The +formatter route traces also expose selected row metadata such as selected pair +side, category, action/position family, scene profile, position keys, and POV +labels. The same audit also statically rejects direct `row["prompt"]` reads in formatter metadata modules outside the shared fallback helpers. @@ -997,6 +1000,9 @@ them through Krea2, SDXL, and training-caption paths, and reports structured issues for: - formatter routes falling back away from metadata; +- formatter route traces exposing selected row metadata such as category, + action/position family, selected pair side, scene profile, position keys, and + POV labels; - raw builder labels leaking into Krea output; - duplicate negative-prompt comma items; - softcore prompt noise; diff --git a/formatter_route_trace.py b/formatter_route_trace.py index fdc3411..261a74d 100644 --- a/formatter_route_trace.py +++ b/formatter_route_trace.py @@ -3,6 +3,14 @@ from __future__ import annotations import json from typing import Any +try: + from . import route_metadata as route_metadata_policy +except ImportError: # Allows local smoke tests with top-level imports. + import route_metadata as route_metadata_policy + + +PAIR_SIDES = ("softcore", "hardcore") + def route_trace_json(**values: Any) -> str: trace: dict[str, Any] = {} @@ -15,3 +23,55 @@ def route_trace_json(**values: Any) -> str: continue trace[key] = value return json.dumps(trace, ensure_ascii=True, sort_keys=True) + + +def _pair_selected_side(target: Any, selected_side: Any = "") -> str: + side = str(selected_side or "").strip().lower() + if side in PAIR_SIDES: + return side + target_side = str(target or "").strip().lower() + return target_side if target_side in PAIR_SIDES else "softcore" + + +def _add_if_value(trace: dict[str, Any], key: str, value: Any) -> None: + if value is None: + return + if isinstance(value, str): + value = value.strip() + if not value: + return + if isinstance(value, (list, tuple, set)) and not value: + return + trace[key] = value + + +def metadata_trace_fields(row: Any, *, target: Any = "", selected_side: Any = "") -> dict[str, Any]: + """Return compact row metadata fields for formatter route traces. + + The trace intentionally carries routing/debug identifiers, not full prompt + prose or cast descriptors. + """ + if not isinstance(row, dict): + return {} + trace: dict[str, Any] = {} + source_row = row + if isinstance(row.get("softcore_row"), dict) or isinstance(row.get("hardcore_row"), dict): + side = _pair_selected_side(target, selected_side) + source_row = row.get(f"{side}_row") if isinstance(row.get(f"{side}_row"), dict) else {} + trace["metadata_kind"] = "pair" + trace["selected_side"] = side + else: + trace["metadata_kind"] = "row" + + if not isinstance(source_row, dict): + return trace + + _add_if_value(trace, "metadata_category", source_row.get("main_category") or source_row.get("category")) + _add_if_value(trace, "metadata_subcategory", source_row.get("subcategory")) + _add_if_value(trace, "action_family", route_metadata_policy.row_action_family(source_row)) + _add_if_value(trace, "position_family", route_metadata_policy.row_position_family(source_row)) + _add_if_value(trace, "position_key", source_row.get("position_key")) + _add_if_value(trace, "position_keys", route_metadata_policy.row_position_keys(source_row, include_unknown=True)) + _add_if_value(trace, "scene_profile", source_row.get("scene_camera_profile_key")) + _add_if_value(trace, "pov_labels", source_row.get("pov_character_labels")) + return trace diff --git a/krea_format_route.py b/krea_format_route.py index 968d6fa..8eabba2 100644 --- a/krea_format_route.py +++ b/krea_format_route.py @@ -113,9 +113,9 @@ def format_krea2_prompt_result(request: KreaFormatRequest, deps: KreaFormatDepen method=output["method"], input_hint=input_hint, target=target, - selected_side=pair_target.selected_side, detail_level=detail_level, style_mode=style_mode, + **trace_policy.metadata_trace_fields(row, target=target, selected_side=pair_target.selected_side), ) return KreaFormatRoute( output=output, @@ -164,6 +164,7 @@ def format_krea2_prompt_result(request: KreaFormatRequest, deps: KreaFormatDepen target=target, detail_level=detail_level, style_mode=style_mode, + **trace_policy.metadata_trace_fields(row, target=target), ) return KreaFormatRoute( output=output, diff --git a/sdxl_format_route.py b/sdxl_format_route.py index 721a3e1..86bdaad 100644 --- a/sdxl_format_route.py +++ b/sdxl_format_route.py @@ -128,10 +128,10 @@ def format_sdxl_prompt_result(request: SDXLFormatRequest, deps: SDXLFormatDepend method=output["method"], input_hint=input_hint, target=target, - selected_side=pair_target.selected_side, style_preset=style_preset, quality_preset=quality_preset, nude_weight=nude_weight, + **trace_policy.metadata_trace_fields(row, target=target, selected_side=pair_target.selected_side), ) return SDXLFormatRoute( output=output, @@ -186,6 +186,7 @@ def format_sdxl_prompt_result(request: SDXLFormatRequest, deps: SDXLFormatDepend style_preset=style_preset, quality_preset=quality_preset, nude_weight=nude_weight, + **trace_policy.metadata_trace_fields(row, target=target), ) return SDXLFormatRoute( output=output, diff --git a/tools/prompt_map_audit.py b/tools/prompt_map_audit.py index 1b09dc5..8574231 100644 --- a/tools/prompt_map_audit.py +++ b/tools/prompt_map_audit.py @@ -106,6 +106,10 @@ AUDIT_DOC_SNIPPETS: tuple[tuple[str, str], ...] = ( "docs/prompt-pool-routing-map.md", "pair pose rerolls changing hardcore action metadata", ), + ( + "docs/prompt-pool-routing-map.md", + "formatter route traces exposing selected row metadata", + ), ) PROMPT_ROW_READ_SCAN_GLOBS: tuple[str, ...] = ( diff --git a/tools/prompt_route_simulation.py b/tools/prompt_route_simulation.py index 5c8f92b..979ac7a 100644 --- a/tools/prompt_route_simulation.py +++ b/tools/prompt_route_simulation.py @@ -518,11 +518,44 @@ def _trace_dict(formatter_name: str, payload: dict[str, Any]) -> tuple[dict[str, return trace, "" +def _formatter_trace_metadata_issues(name: str, trace: dict[str, Any], row: dict[str, Any] | None) -> list[str]: + if not isinstance(row, dict): + return [] + issues: list[str] = [] + expected_fields = { + "metadata_category": row.get("main_category") or row.get("category"), + "metadata_subcategory": row.get("subcategory"), + "action_family": row.get("action_family"), + "position_family": row.get("position_family"), + "position_key": row.get("position_key"), + "scene_profile": row.get("scene_camera_profile_key"), + } + for key, expected in expected_fields.items(): + if expected in (None, "", []): + continue + if trace.get(key) != expected: + issues.append(f"{name}: trace_{key}_mismatch:{trace.get(key)} != {expected}") + expected_position_keys = [str(value) for value in (row.get("position_keys") or []) if str(value or "").strip()] + if expected_position_keys: + trace_position_keys = [str(value) for value in (trace.get("position_keys") or [])] + for key in expected_position_keys: + if key not in trace_position_keys: + issues.append(f"{name}: trace_missing_position_key:{key}") + expected_pov_labels = [str(value) for value in (row.get("pov_character_labels") or []) if str(value or "").strip()] + if expected_pov_labels: + trace_pov_labels = [str(value) for value in (trace.get("pov_labels") or [])] + for label in expected_pov_labels: + if label not in trace_pov_labels: + issues.append(f"{name}: trace_missing_pov_label:{label}") + return issues + + def _formatter_trace_issues( name: str, formats: dict[str, Any], *, target: str, + row: dict[str, Any] | None = None, ) -> list[str]: expected_formatters = { "krea": "krea2", @@ -554,14 +587,15 @@ def _formatter_trace_issues( if formatter_name in ("krea", "sdxl"): if branch != "insta_of_pair": issues.append(f"{name}.{formatter_name}: trace_pair_branch_mismatch:{branch}") - if trace.get("selected_side") != target: - issues.append(f"{name}.{formatter_name}: trace_selected_side_mismatch:{trace.get('selected_side')} != {target}") elif "metadata(insta_of_pair)" not in method: issues.append(f"{name}.{formatter_name}: trace_caption_pair_method_mismatch:{method}") + if trace.get("selected_side") != target: + issues.append(f"{name}.{formatter_name}: trace_selected_side_mismatch:{trace.get('selected_side')} != {target}") elif formatter_name == "krea" and not branch.startswith("metadata("): issues.append(f"{name}.{formatter_name}: trace_krea_metadata_branch_mismatch:{branch}") elif formatter_name in ("sdxl", "caption") and branch != "metadata": issues.append(f"{name}.{formatter_name}: trace_metadata_branch_mismatch:{branch}") + issues.extend(_formatter_trace_metadata_issues(f"{name}.{formatter_name}", trace, row)) return issues @@ -602,7 +636,7 @@ def _formatter_issues( ): if "metadata" not in str(method or ""): issues.append(f"{name}.{formatter_name}: not_metadata_route:{method}") - issues.extend(_formatter_trace_issues(name, formats, target=target)) + issues.extend(_formatter_trace_issues(name, formats, target=target, row=row)) for label, value in ( (f"{name}.krea_negative", krea.get("negative_prompt")),