Enrich formatter route trace metadata

This commit is contained in:
2026-06-27 19:46:13 +02:00
parent 3c7ccbb711
commit 307ffdba3b
8 changed files with 117 additions and 6 deletions
+1
View File
@@ -88,6 +88,7 @@ def naturalize_caption_result(
style_policy=style_policy, style_policy=style_policy,
include_trigger=include_trigger, include_trigger=include_trigger,
keep_style=keep_style, keep_style=keep_style,
**trace_policy.metadata_trace_fields(row, target=target),
) )
return CaptionFormatRoute( return CaptionFormatRoute(
caption=caption, caption=caption,
@@ -29,6 +29,10 @@ The map audit currently sees:
- Pair seed simulation, so Insta/OF soft/hard metadata and formatter outputs - Pair seed simulation, so Insta/OF soft/hard metadata and formatter outputs
prove locked determinism and person/scene/content/pose/expression/ prove locked determinism and person/scene/content/pose/expression/
composition reroll behavior. composition reroll behavior.
- Formatter route traces expose selected metadata fields, so Krea2, SDXL, and
caption outputs can be debugged by category, action/position family, selected
pair side, scene profile, position keys, and POV labels instead of only
proving that a metadata branch was used.
## Architectural Finding ## Architectural Finding
+7 -1
View File
@@ -242,7 +242,10 @@ and to compare the exact per-axis RNG seed used for the row.
`tools/prompt_map_audit.py` includes a runtime metadata route check. It builds a `tools/prompt_map_audit.py` includes a runtime metadata route check. It builds a
representative single row and Insta/OF pair, verifies embedded representative single row and Insta/OF pair, verifies embedded
`generation_trace` fields, and confirms Krea2, SDXL, and caption formatters `generation_trace` fields, and confirms Krea2, SDXL, and caption formatters
consume metadata JSON instead of silently falling back to raw prompt text. consume metadata JSON instead of silently falling back to raw prompt text. The
formatter route traces also expose selected row metadata such as selected pair
side, category, action/position family, scene profile, position keys, and POV
labels.
The same audit also statically rejects direct `row["prompt"]` reads in The same audit also statically rejects direct `row["prompt"]` reads in
formatter metadata modules outside the shared fallback helpers. formatter metadata modules outside the shared fallback helpers.
@@ -997,6 +1000,9 @@ them through Krea2, SDXL, and training-caption paths, and reports structured
issues for: issues for:
- formatter routes falling back away from metadata; - formatter routes falling back away from metadata;
- formatter route traces exposing selected row metadata such as category,
action/position family, selected pair side, scene profile, position keys, and
POV labels;
- raw builder labels leaking into Krea output; - raw builder labels leaking into Krea output;
- duplicate negative-prompt comma items; - duplicate negative-prompt comma items;
- softcore prompt noise; - softcore prompt noise;
+60
View File
@@ -3,6 +3,14 @@ from __future__ import annotations
import json import json
from typing import Any from typing import Any
try:
from . import route_metadata as route_metadata_policy
except ImportError: # Allows local smoke tests with top-level imports.
import route_metadata as route_metadata_policy
PAIR_SIDES = ("softcore", "hardcore")
def route_trace_json(**values: Any) -> str: def route_trace_json(**values: Any) -> str:
trace: dict[str, Any] = {} trace: dict[str, Any] = {}
@@ -15,3 +23,55 @@ def route_trace_json(**values: Any) -> str:
continue continue
trace[key] = value trace[key] = value
return json.dumps(trace, ensure_ascii=True, sort_keys=True) return json.dumps(trace, ensure_ascii=True, sort_keys=True)
def _pair_selected_side(target: Any, selected_side: Any = "") -> str:
side = str(selected_side or "").strip().lower()
if side in PAIR_SIDES:
return side
target_side = str(target or "").strip().lower()
return target_side if target_side in PAIR_SIDES else "softcore"
def _add_if_value(trace: dict[str, Any], key: str, value: Any) -> None:
if value is None:
return
if isinstance(value, str):
value = value.strip()
if not value:
return
if isinstance(value, (list, tuple, set)) and not value:
return
trace[key] = value
def metadata_trace_fields(row: Any, *, target: Any = "", selected_side: Any = "") -> dict[str, Any]:
"""Return compact row metadata fields for formatter route traces.
The trace intentionally carries routing/debug identifiers, not full prompt
prose or cast descriptors.
"""
if not isinstance(row, dict):
return {}
trace: dict[str, Any] = {}
source_row = row
if isinstance(row.get("softcore_row"), dict) or isinstance(row.get("hardcore_row"), dict):
side = _pair_selected_side(target, selected_side)
source_row = row.get(f"{side}_row") if isinstance(row.get(f"{side}_row"), dict) else {}
trace["metadata_kind"] = "pair"
trace["selected_side"] = side
else:
trace["metadata_kind"] = "row"
if not isinstance(source_row, dict):
return trace
_add_if_value(trace, "metadata_category", source_row.get("main_category") or source_row.get("category"))
_add_if_value(trace, "metadata_subcategory", source_row.get("subcategory"))
_add_if_value(trace, "action_family", route_metadata_policy.row_action_family(source_row))
_add_if_value(trace, "position_family", route_metadata_policy.row_position_family(source_row))
_add_if_value(trace, "position_key", source_row.get("position_key"))
_add_if_value(trace, "position_keys", route_metadata_policy.row_position_keys(source_row, include_unknown=True))
_add_if_value(trace, "scene_profile", source_row.get("scene_camera_profile_key"))
_add_if_value(trace, "pov_labels", source_row.get("pov_character_labels"))
return trace
+2 -1
View File
@@ -113,9 +113,9 @@ def format_krea2_prompt_result(request: KreaFormatRequest, deps: KreaFormatDepen
method=output["method"], method=output["method"],
input_hint=input_hint, input_hint=input_hint,
target=target, target=target,
selected_side=pair_target.selected_side,
detail_level=detail_level, detail_level=detail_level,
style_mode=style_mode, style_mode=style_mode,
**trace_policy.metadata_trace_fields(row, target=target, selected_side=pair_target.selected_side),
) )
return KreaFormatRoute( return KreaFormatRoute(
output=output, output=output,
@@ -164,6 +164,7 @@ def format_krea2_prompt_result(request: KreaFormatRequest, deps: KreaFormatDepen
target=target, target=target,
detail_level=detail_level, detail_level=detail_level,
style_mode=style_mode, style_mode=style_mode,
**trace_policy.metadata_trace_fields(row, target=target),
) )
return KreaFormatRoute( return KreaFormatRoute(
output=output, output=output,
+2 -1
View File
@@ -128,10 +128,10 @@ def format_sdxl_prompt_result(request: SDXLFormatRequest, deps: SDXLFormatDepend
method=output["method"], method=output["method"],
input_hint=input_hint, input_hint=input_hint,
target=target, target=target,
selected_side=pair_target.selected_side,
style_preset=style_preset, style_preset=style_preset,
quality_preset=quality_preset, quality_preset=quality_preset,
nude_weight=nude_weight, nude_weight=nude_weight,
**trace_policy.metadata_trace_fields(row, target=target, selected_side=pair_target.selected_side),
) )
return SDXLFormatRoute( return SDXLFormatRoute(
output=output, output=output,
@@ -186,6 +186,7 @@ def format_sdxl_prompt_result(request: SDXLFormatRequest, deps: SDXLFormatDepend
style_preset=style_preset, style_preset=style_preset,
quality_preset=quality_preset, quality_preset=quality_preset,
nude_weight=nude_weight, nude_weight=nude_weight,
**trace_policy.metadata_trace_fields(row, target=target),
) )
return SDXLFormatRoute( return SDXLFormatRoute(
output=output, output=output,
+4
View File
@@ -106,6 +106,10 @@ AUDIT_DOC_SNIPPETS: tuple[tuple[str, str], ...] = (
"docs/prompt-pool-routing-map.md", "docs/prompt-pool-routing-map.md",
"pair pose rerolls changing hardcore action metadata", "pair pose rerolls changing hardcore action metadata",
), ),
(
"docs/prompt-pool-routing-map.md",
"formatter route traces exposing selected row metadata",
),
) )
PROMPT_ROW_READ_SCAN_GLOBS: tuple[str, ...] = ( PROMPT_ROW_READ_SCAN_GLOBS: tuple[str, ...] = (
+37 -3
View File
@@ -518,11 +518,44 @@ def _trace_dict(formatter_name: str, payload: dict[str, Any]) -> tuple[dict[str,
return trace, "" return trace, ""
def _formatter_trace_metadata_issues(name: str, trace: dict[str, Any], row: dict[str, Any] | None) -> list[str]:
if not isinstance(row, dict):
return []
issues: list[str] = []
expected_fields = {
"metadata_category": row.get("main_category") or row.get("category"),
"metadata_subcategory": row.get("subcategory"),
"action_family": row.get("action_family"),
"position_family": row.get("position_family"),
"position_key": row.get("position_key"),
"scene_profile": row.get("scene_camera_profile_key"),
}
for key, expected in expected_fields.items():
if expected in (None, "", []):
continue
if trace.get(key) != expected:
issues.append(f"{name}: trace_{key}_mismatch:{trace.get(key)} != {expected}")
expected_position_keys = [str(value) for value in (row.get("position_keys") or []) if str(value or "").strip()]
if expected_position_keys:
trace_position_keys = [str(value) for value in (trace.get("position_keys") or [])]
for key in expected_position_keys:
if key not in trace_position_keys:
issues.append(f"{name}: trace_missing_position_key:{key}")
expected_pov_labels = [str(value) for value in (row.get("pov_character_labels") or []) if str(value or "").strip()]
if expected_pov_labels:
trace_pov_labels = [str(value) for value in (trace.get("pov_labels") or [])]
for label in expected_pov_labels:
if label not in trace_pov_labels:
issues.append(f"{name}: trace_missing_pov_label:{label}")
return issues
def _formatter_trace_issues( def _formatter_trace_issues(
name: str, name: str,
formats: dict[str, Any], formats: dict[str, Any],
*, *,
target: str, target: str,
row: dict[str, Any] | None = None,
) -> list[str]: ) -> list[str]:
expected_formatters = { expected_formatters = {
"krea": "krea2", "krea": "krea2",
@@ -554,14 +587,15 @@ def _formatter_trace_issues(
if formatter_name in ("krea", "sdxl"): if formatter_name in ("krea", "sdxl"):
if branch != "insta_of_pair": if branch != "insta_of_pair":
issues.append(f"{name}.{formatter_name}: trace_pair_branch_mismatch:{branch}") issues.append(f"{name}.{formatter_name}: trace_pair_branch_mismatch:{branch}")
if trace.get("selected_side") != target:
issues.append(f"{name}.{formatter_name}: trace_selected_side_mismatch:{trace.get('selected_side')} != {target}")
elif "metadata(insta_of_pair)" not in method: elif "metadata(insta_of_pair)" not in method:
issues.append(f"{name}.{formatter_name}: trace_caption_pair_method_mismatch:{method}") issues.append(f"{name}.{formatter_name}: trace_caption_pair_method_mismatch:{method}")
if trace.get("selected_side") != target:
issues.append(f"{name}.{formatter_name}: trace_selected_side_mismatch:{trace.get('selected_side')} != {target}")
elif formatter_name == "krea" and not branch.startswith("metadata("): elif formatter_name == "krea" and not branch.startswith("metadata("):
issues.append(f"{name}.{formatter_name}: trace_krea_metadata_branch_mismatch:{branch}") issues.append(f"{name}.{formatter_name}: trace_krea_metadata_branch_mismatch:{branch}")
elif formatter_name in ("sdxl", "caption") and branch != "metadata": elif formatter_name in ("sdxl", "caption") and branch != "metadata":
issues.append(f"{name}.{formatter_name}: trace_metadata_branch_mismatch:{branch}") issues.append(f"{name}.{formatter_name}: trace_metadata_branch_mismatch:{branch}")
issues.extend(_formatter_trace_metadata_issues(f"{name}.{formatter_name}", trace, row))
return issues return issues
@@ -602,7 +636,7 @@ def _formatter_issues(
): ):
if "metadata" not in str(method or ""): if "metadata" not in str(method or ""):
issues.append(f"{name}.{formatter_name}: not_metadata_route:{method}") issues.append(f"{name}.{formatter_name}: not_metadata_route:{method}")
issues.extend(_formatter_trace_issues(name, formats, target=target)) issues.extend(_formatter_trace_issues(name, formats, target=target, row=row))
for label, value in ( for label, value in (
(f"{name}.krea_negative", krea.get("negative_prompt")), (f"{name}.krea_negative", krea.get("negative_prompt")),