Enrich formatter route trace metadata
This commit is contained in:
@@ -88,6 +88,7 @@ def naturalize_caption_result(
|
|||||||
style_policy=style_policy,
|
style_policy=style_policy,
|
||||||
include_trigger=include_trigger,
|
include_trigger=include_trigger,
|
||||||
keep_style=keep_style,
|
keep_style=keep_style,
|
||||||
|
**trace_policy.metadata_trace_fields(row, target=target),
|
||||||
)
|
)
|
||||||
return CaptionFormatRoute(
|
return CaptionFormatRoute(
|
||||||
caption=caption,
|
caption=caption,
|
||||||
|
|||||||
@@ -29,6 +29,10 @@ The map audit currently sees:
|
|||||||
- Pair seed simulation, so Insta/OF soft/hard metadata and formatter outputs
|
- Pair seed simulation, so Insta/OF soft/hard metadata and formatter outputs
|
||||||
prove locked determinism and person/scene/content/pose/expression/
|
prove locked determinism and person/scene/content/pose/expression/
|
||||||
composition reroll behavior.
|
composition reroll behavior.
|
||||||
|
- Formatter route traces expose selected metadata fields, so Krea2, SDXL, and
|
||||||
|
caption outputs can be debugged by category, action/position family, selected
|
||||||
|
pair side, scene profile, position keys, and POV labels instead of only
|
||||||
|
proving that a metadata branch was used.
|
||||||
|
|
||||||
## Architectural Finding
|
## Architectural Finding
|
||||||
|
|
||||||
|
|||||||
@@ -242,7 +242,10 @@ and to compare the exact per-axis RNG seed used for the row.
|
|||||||
`tools/prompt_map_audit.py` includes a runtime metadata route check. It builds a
|
`tools/prompt_map_audit.py` includes a runtime metadata route check. It builds a
|
||||||
representative single row and Insta/OF pair, verifies embedded
|
representative single row and Insta/OF pair, verifies embedded
|
||||||
`generation_trace` fields, and confirms Krea2, SDXL, and caption formatters
|
`generation_trace` fields, and confirms Krea2, SDXL, and caption formatters
|
||||||
consume metadata JSON instead of silently falling back to raw prompt text.
|
consume metadata JSON instead of silently falling back to raw prompt text. The
|
||||||
|
formatter route traces also expose selected row metadata such as selected pair
|
||||||
|
side, category, action/position family, scene profile, position keys, and POV
|
||||||
|
labels.
|
||||||
The same audit also statically rejects direct `row["prompt"]` reads in
|
The same audit also statically rejects direct `row["prompt"]` reads in
|
||||||
formatter metadata modules outside the shared fallback helpers.
|
formatter metadata modules outside the shared fallback helpers.
|
||||||
|
|
||||||
@@ -997,6 +1000,9 @@ them through Krea2, SDXL, and training-caption paths, and reports structured
|
|||||||
issues for:
|
issues for:
|
||||||
|
|
||||||
- formatter routes falling back away from metadata;
|
- formatter routes falling back away from metadata;
|
||||||
|
- formatter route traces exposing selected row metadata such as category,
|
||||||
|
action/position family, selected pair side, scene profile, position keys, and
|
||||||
|
POV labels;
|
||||||
- raw builder labels leaking into Krea output;
|
- raw builder labels leaking into Krea output;
|
||||||
- duplicate negative-prompt comma items;
|
- duplicate negative-prompt comma items;
|
||||||
- softcore prompt noise;
|
- softcore prompt noise;
|
||||||
|
|||||||
@@ -3,6 +3,14 @@ from __future__ import annotations
|
|||||||
import json
|
import json
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
try:
|
||||||
|
from . import route_metadata as route_metadata_policy
|
||||||
|
except ImportError: # Allows local smoke tests with top-level imports.
|
||||||
|
import route_metadata as route_metadata_policy
|
||||||
|
|
||||||
|
|
||||||
|
PAIR_SIDES = ("softcore", "hardcore")
|
||||||
|
|
||||||
|
|
||||||
def route_trace_json(**values: Any) -> str:
|
def route_trace_json(**values: Any) -> str:
|
||||||
trace: dict[str, Any] = {}
|
trace: dict[str, Any] = {}
|
||||||
@@ -15,3 +23,55 @@ def route_trace_json(**values: Any) -> str:
|
|||||||
continue
|
continue
|
||||||
trace[key] = value
|
trace[key] = value
|
||||||
return json.dumps(trace, ensure_ascii=True, sort_keys=True)
|
return json.dumps(trace, ensure_ascii=True, sort_keys=True)
|
||||||
|
|
||||||
|
|
||||||
|
def _pair_selected_side(target: Any, selected_side: Any = "") -> str:
|
||||||
|
side = str(selected_side or "").strip().lower()
|
||||||
|
if side in PAIR_SIDES:
|
||||||
|
return side
|
||||||
|
target_side = str(target or "").strip().lower()
|
||||||
|
return target_side if target_side in PAIR_SIDES else "softcore"
|
||||||
|
|
||||||
|
|
||||||
|
def _add_if_value(trace: dict[str, Any], key: str, value: Any) -> None:
|
||||||
|
if value is None:
|
||||||
|
return
|
||||||
|
if isinstance(value, str):
|
||||||
|
value = value.strip()
|
||||||
|
if not value:
|
||||||
|
return
|
||||||
|
if isinstance(value, (list, tuple, set)) and not value:
|
||||||
|
return
|
||||||
|
trace[key] = value
|
||||||
|
|
||||||
|
|
||||||
|
def metadata_trace_fields(row: Any, *, target: Any = "", selected_side: Any = "") -> dict[str, Any]:
|
||||||
|
"""Return compact row metadata fields for formatter route traces.
|
||||||
|
|
||||||
|
The trace intentionally carries routing/debug identifiers, not full prompt
|
||||||
|
prose or cast descriptors.
|
||||||
|
"""
|
||||||
|
if not isinstance(row, dict):
|
||||||
|
return {}
|
||||||
|
trace: dict[str, Any] = {}
|
||||||
|
source_row = row
|
||||||
|
if isinstance(row.get("softcore_row"), dict) or isinstance(row.get("hardcore_row"), dict):
|
||||||
|
side = _pair_selected_side(target, selected_side)
|
||||||
|
source_row = row.get(f"{side}_row") if isinstance(row.get(f"{side}_row"), dict) else {}
|
||||||
|
trace["metadata_kind"] = "pair"
|
||||||
|
trace["selected_side"] = side
|
||||||
|
else:
|
||||||
|
trace["metadata_kind"] = "row"
|
||||||
|
|
||||||
|
if not isinstance(source_row, dict):
|
||||||
|
return trace
|
||||||
|
|
||||||
|
_add_if_value(trace, "metadata_category", source_row.get("main_category") or source_row.get("category"))
|
||||||
|
_add_if_value(trace, "metadata_subcategory", source_row.get("subcategory"))
|
||||||
|
_add_if_value(trace, "action_family", route_metadata_policy.row_action_family(source_row))
|
||||||
|
_add_if_value(trace, "position_family", route_metadata_policy.row_position_family(source_row))
|
||||||
|
_add_if_value(trace, "position_key", source_row.get("position_key"))
|
||||||
|
_add_if_value(trace, "position_keys", route_metadata_policy.row_position_keys(source_row, include_unknown=True))
|
||||||
|
_add_if_value(trace, "scene_profile", source_row.get("scene_camera_profile_key"))
|
||||||
|
_add_if_value(trace, "pov_labels", source_row.get("pov_character_labels"))
|
||||||
|
return trace
|
||||||
|
|||||||
@@ -113,9 +113,9 @@ def format_krea2_prompt_result(request: KreaFormatRequest, deps: KreaFormatDepen
|
|||||||
method=output["method"],
|
method=output["method"],
|
||||||
input_hint=input_hint,
|
input_hint=input_hint,
|
||||||
target=target,
|
target=target,
|
||||||
selected_side=pair_target.selected_side,
|
|
||||||
detail_level=detail_level,
|
detail_level=detail_level,
|
||||||
style_mode=style_mode,
|
style_mode=style_mode,
|
||||||
|
**trace_policy.metadata_trace_fields(row, target=target, selected_side=pair_target.selected_side),
|
||||||
)
|
)
|
||||||
return KreaFormatRoute(
|
return KreaFormatRoute(
|
||||||
output=output,
|
output=output,
|
||||||
@@ -164,6 +164,7 @@ def format_krea2_prompt_result(request: KreaFormatRequest, deps: KreaFormatDepen
|
|||||||
target=target,
|
target=target,
|
||||||
detail_level=detail_level,
|
detail_level=detail_level,
|
||||||
style_mode=style_mode,
|
style_mode=style_mode,
|
||||||
|
**trace_policy.metadata_trace_fields(row, target=target),
|
||||||
)
|
)
|
||||||
return KreaFormatRoute(
|
return KreaFormatRoute(
|
||||||
output=output,
|
output=output,
|
||||||
|
|||||||
@@ -128,10 +128,10 @@ def format_sdxl_prompt_result(request: SDXLFormatRequest, deps: SDXLFormatDepend
|
|||||||
method=output["method"],
|
method=output["method"],
|
||||||
input_hint=input_hint,
|
input_hint=input_hint,
|
||||||
target=target,
|
target=target,
|
||||||
selected_side=pair_target.selected_side,
|
|
||||||
style_preset=style_preset,
|
style_preset=style_preset,
|
||||||
quality_preset=quality_preset,
|
quality_preset=quality_preset,
|
||||||
nude_weight=nude_weight,
|
nude_weight=nude_weight,
|
||||||
|
**trace_policy.metadata_trace_fields(row, target=target, selected_side=pair_target.selected_side),
|
||||||
)
|
)
|
||||||
return SDXLFormatRoute(
|
return SDXLFormatRoute(
|
||||||
output=output,
|
output=output,
|
||||||
@@ -186,6 +186,7 @@ def format_sdxl_prompt_result(request: SDXLFormatRequest, deps: SDXLFormatDepend
|
|||||||
style_preset=style_preset,
|
style_preset=style_preset,
|
||||||
quality_preset=quality_preset,
|
quality_preset=quality_preset,
|
||||||
nude_weight=nude_weight,
|
nude_weight=nude_weight,
|
||||||
|
**trace_policy.metadata_trace_fields(row, target=target),
|
||||||
)
|
)
|
||||||
return SDXLFormatRoute(
|
return SDXLFormatRoute(
|
||||||
output=output,
|
output=output,
|
||||||
|
|||||||
@@ -106,6 +106,10 @@ AUDIT_DOC_SNIPPETS: tuple[tuple[str, str], ...] = (
|
|||||||
"docs/prompt-pool-routing-map.md",
|
"docs/prompt-pool-routing-map.md",
|
||||||
"pair pose rerolls changing hardcore action metadata",
|
"pair pose rerolls changing hardcore action metadata",
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
"docs/prompt-pool-routing-map.md",
|
||||||
|
"formatter route traces exposing selected row metadata",
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
PROMPT_ROW_READ_SCAN_GLOBS: tuple[str, ...] = (
|
PROMPT_ROW_READ_SCAN_GLOBS: tuple[str, ...] = (
|
||||||
|
|||||||
@@ -518,11 +518,44 @@ def _trace_dict(formatter_name: str, payload: dict[str, Any]) -> tuple[dict[str,
|
|||||||
return trace, ""
|
return trace, ""
|
||||||
|
|
||||||
|
|
||||||
|
def _formatter_trace_metadata_issues(name: str, trace: dict[str, Any], row: dict[str, Any] | None) -> list[str]:
|
||||||
|
if not isinstance(row, dict):
|
||||||
|
return []
|
||||||
|
issues: list[str] = []
|
||||||
|
expected_fields = {
|
||||||
|
"metadata_category": row.get("main_category") or row.get("category"),
|
||||||
|
"metadata_subcategory": row.get("subcategory"),
|
||||||
|
"action_family": row.get("action_family"),
|
||||||
|
"position_family": row.get("position_family"),
|
||||||
|
"position_key": row.get("position_key"),
|
||||||
|
"scene_profile": row.get("scene_camera_profile_key"),
|
||||||
|
}
|
||||||
|
for key, expected in expected_fields.items():
|
||||||
|
if expected in (None, "", []):
|
||||||
|
continue
|
||||||
|
if trace.get(key) != expected:
|
||||||
|
issues.append(f"{name}: trace_{key}_mismatch:{trace.get(key)} != {expected}")
|
||||||
|
expected_position_keys = [str(value) for value in (row.get("position_keys") or []) if str(value or "").strip()]
|
||||||
|
if expected_position_keys:
|
||||||
|
trace_position_keys = [str(value) for value in (trace.get("position_keys") or [])]
|
||||||
|
for key in expected_position_keys:
|
||||||
|
if key not in trace_position_keys:
|
||||||
|
issues.append(f"{name}: trace_missing_position_key:{key}")
|
||||||
|
expected_pov_labels = [str(value) for value in (row.get("pov_character_labels") or []) if str(value or "").strip()]
|
||||||
|
if expected_pov_labels:
|
||||||
|
trace_pov_labels = [str(value) for value in (trace.get("pov_labels") or [])]
|
||||||
|
for label in expected_pov_labels:
|
||||||
|
if label not in trace_pov_labels:
|
||||||
|
issues.append(f"{name}: trace_missing_pov_label:{label}")
|
||||||
|
return issues
|
||||||
|
|
||||||
|
|
||||||
def _formatter_trace_issues(
|
def _formatter_trace_issues(
|
||||||
name: str,
|
name: str,
|
||||||
formats: dict[str, Any],
|
formats: dict[str, Any],
|
||||||
*,
|
*,
|
||||||
target: str,
|
target: str,
|
||||||
|
row: dict[str, Any] | None = None,
|
||||||
) -> list[str]:
|
) -> list[str]:
|
||||||
expected_formatters = {
|
expected_formatters = {
|
||||||
"krea": "krea2",
|
"krea": "krea2",
|
||||||
@@ -554,14 +587,15 @@ def _formatter_trace_issues(
|
|||||||
if formatter_name in ("krea", "sdxl"):
|
if formatter_name in ("krea", "sdxl"):
|
||||||
if branch != "insta_of_pair":
|
if branch != "insta_of_pair":
|
||||||
issues.append(f"{name}.{formatter_name}: trace_pair_branch_mismatch:{branch}")
|
issues.append(f"{name}.{formatter_name}: trace_pair_branch_mismatch:{branch}")
|
||||||
if trace.get("selected_side") != target:
|
|
||||||
issues.append(f"{name}.{formatter_name}: trace_selected_side_mismatch:{trace.get('selected_side')} != {target}")
|
|
||||||
elif "metadata(insta_of_pair)" not in method:
|
elif "metadata(insta_of_pair)" not in method:
|
||||||
issues.append(f"{name}.{formatter_name}: trace_caption_pair_method_mismatch:{method}")
|
issues.append(f"{name}.{formatter_name}: trace_caption_pair_method_mismatch:{method}")
|
||||||
|
if trace.get("selected_side") != target:
|
||||||
|
issues.append(f"{name}.{formatter_name}: trace_selected_side_mismatch:{trace.get('selected_side')} != {target}")
|
||||||
elif formatter_name == "krea" and not branch.startswith("metadata("):
|
elif formatter_name == "krea" and not branch.startswith("metadata("):
|
||||||
issues.append(f"{name}.{formatter_name}: trace_krea_metadata_branch_mismatch:{branch}")
|
issues.append(f"{name}.{formatter_name}: trace_krea_metadata_branch_mismatch:{branch}")
|
||||||
elif formatter_name in ("sdxl", "caption") and branch != "metadata":
|
elif formatter_name in ("sdxl", "caption") and branch != "metadata":
|
||||||
issues.append(f"{name}.{formatter_name}: trace_metadata_branch_mismatch:{branch}")
|
issues.append(f"{name}.{formatter_name}: trace_metadata_branch_mismatch:{branch}")
|
||||||
|
issues.extend(_formatter_trace_metadata_issues(f"{name}.{formatter_name}", trace, row))
|
||||||
return issues
|
return issues
|
||||||
|
|
||||||
|
|
||||||
@@ -602,7 +636,7 @@ def _formatter_issues(
|
|||||||
):
|
):
|
||||||
if "metadata" not in str(method or ""):
|
if "metadata" not in str(method or ""):
|
||||||
issues.append(f"{name}.{formatter_name}: not_metadata_route:{method}")
|
issues.append(f"{name}.{formatter_name}: not_metadata_route:{method}")
|
||||||
issues.extend(_formatter_trace_issues(name, formats, target=target))
|
issues.extend(_formatter_trace_issues(name, formats, target=target, row=row))
|
||||||
|
|
||||||
for label, value in (
|
for label, value in (
|
||||||
(f"{name}.krea_negative", krea.get("negative_prompt")),
|
(f"{name}.krea_negative", krea.get("negative_prompt")),
|
||||||
|
|||||||
Reference in New Issue
Block a user