Audit metadata prompt fallback boundaries

This commit is contained in:
2026-06-27 17:31:30 +02:00
parent 1950ce7bbf
commit c59c9947b2
4 changed files with 83 additions and 1 deletions
+2
View File
@@ -236,6 +236,8 @@ and to compare the exact per-axis RNG seed used for the row.
representative single row and Insta/OF pair, verifies embedded representative single row and Insta/OF pair, verifies embedded
`generation_trace` fields, and confirms Krea2, SDXL, and caption formatters `generation_trace` fields, and confirms Krea2, SDXL, and caption formatters
consume metadata JSON instead of silently falling back to raw prompt text. consume metadata JSON instead of silently falling back to raw prompt text.
The same audit also statically rejects direct `row["prompt"]` reads in
formatter metadata modules outside the shared fallback helpers.
## Category Sources ## Category Sources
+1 -1
View File
@@ -398,7 +398,7 @@ def _style_phrase(row: dict[str, Any], style_mode: str) -> str:
if style_mode == "photographic": if style_mode == "photographic":
return "realistic creator-shot photography with natural lighting, tactile skin and fabric detail, and clean social-media composition" return "realistic creator-shot photography with natural lighting, tactile skin and fabric detail, and clean social-media composition"
style = _clean(row.get("style")) style = _clean(row.get("style"))
suffix = _clean(row.get("positive_suffix")) or _prompt_field(_clean(row.get("prompt")), "Use") suffix = _clean(row.get("positive_suffix"))
if style and suffix: if style and suffix:
return f"{style}; {suffix}" return f"{style}; {suffix}"
return style or suffix return style or suffix
+68
View File
@@ -69,6 +69,21 @@ ENTRY_ROUTE_SNIPPETS: tuple[str, ...] = (
"`naturalize_caption` -> `caption_format_route.py`", "`naturalize_caption` -> `caption_format_route.py`",
) )
PROMPT_ROW_READ_SCAN_GLOBS: tuple[str, ...] = (
"krea_*.py",
"sdxl_*.py",
"caption_*.py",
"formatter_*.py",
)
ALLOWED_PROMPT_ROW_READS: set[tuple[str, str]] = {
# Central row-value fallback. Metadata routes should prefer explicit fields,
# but any remaining label fallback must pass through this shared helper.
("formatter_input.py", "row_value"),
# Last-resort caption fallback after all metadata branches decline the row.
("caption_naturalizer.py", "_metadata_to_prose"),
}
def _literal_or_none(node: ast.AST) -> Any: def _literal_or_none(node: ast.AST) -> Any:
try: try:
@@ -419,6 +434,52 @@ def _routing_doc_errors() -> list[tuple[str, str, str]]:
return errors return errors
class _PromptRowReadVisitor(ast.NodeVisitor):
def __init__(self, path: Path) -> None:
self.path = path
self.source = path.read_text(encoding="utf-8")
self.function_stack: list[str] = []
self.errors: list[tuple[str, str, str]] = []
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
self.function_stack.append(node.name)
self.generic_visit(node)
self.function_stack.pop()
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
self.function_stack.append(node.name)
self.generic_visit(node)
self.function_stack.pop()
def visit_Call(self, node: ast.Call) -> None:
segment = ast.get_source_segment(self.source, node) or ""
if 'row.get("prompt"' in segment or "row.get('prompt'" in segment:
function_name = self.function_stack[-1] if self.function_stack else "<module>"
key = (self.path.name, function_name)
if key not in ALLOWED_PROMPT_ROW_READS:
self.errors.append(
(
self.path.name,
f"{function_name}:{node.lineno}",
"metadata formatter code reads row prompt text; use structured metadata or add an explicit audited exception",
)
)
self.generic_visit(node)
def _prompt_row_read_errors() -> list[tuple[str, str, str]]:
paths: dict[Path, None] = {}
for pattern in PROMPT_ROW_READ_SCAN_GLOBS:
for path in ROOT.glob(pattern):
paths[path] = None
errors: list[tuple[str, str, str]] = []
for path in sorted(paths):
visitor = _PromptRowReadVisitor(path)
visitor.visit(ast.parse(visitor.source))
errors.extend(visitor.errors)
return errors
def _json_dumps(value: Any) -> str: def _json_dumps(value: Any) -> str:
return json.dumps(value, ensure_ascii=True, sort_keys=True) return json.dumps(value, ensure_ascii=True, sort_keys=True)
@@ -689,6 +750,13 @@ def main() -> int:
return 1 return 1
print("OK: critical route modules are documented and covered by smoke cases.") print("OK: critical route modules are documented and covered by smoke cases.")
print("\n# Metadata Prompt Fallback Validation")
prompt_row_read_errors = _prompt_row_read_errors()
if prompt_row_read_errors:
print_table(("Module", "Location", "Issue"), prompt_row_read_errors)
return 1
print("OK: metadata formatter modules avoid raw prompt reads outside audited fallback helpers.")
print("\n# Runtime Metadata Route Validation") print("\n# Runtime Metadata Route Validation")
runtime_metadata_errors = _runtime_metadata_errors() runtime_metadata_errors = _runtime_metadata_errors()
if runtime_metadata_errors: if runtime_metadata_errors:
+12
View File
@@ -1385,6 +1385,18 @@ def smoke_krea_normal_row_routes() -> None:
"style": "realistic creator-shot photography", "style": "realistic creator-shot photography",
} }
_expect_krea_normal_route_parity(single, "krea_normal_single", "metadata(single)") _expect_krea_normal_route_parity(single, "krea_normal_single", "metadata(single)")
style_metadata = dict(
single,
style="metadata style phrase",
positive_suffix="metadata suffix phrase",
prompt="Use: stale prompt suffix phrase.",
)
style_prompt, style_method = krea_formatter._normal_row_to_krea(style_metadata, "balanced", "preserve")
_expect(style_method == "metadata(single)", "Krea style metadata route changed method")
style_prompt_lower = style_prompt.lower()
_expect("metadata style phrase" in style_prompt_lower, "Krea metadata route lost structured style")
_expect("metadata suffix phrase" in style_prompt_lower, "Krea metadata route lost structured positive suffix")
_expect("stale prompt suffix" not in style_prompt_lower, "Krea metadata route parsed stale Use prompt text")
couple = { couple = {
"subject_type": "couple", "subject_type": "couple",