From c59c9947b2eece79845358075b8f21c5dd658d64 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sat, 27 Jun 2026 17:31:30 +0200 Subject: [PATCH] Audit metadata prompt fallback boundaries --- docs/prompt-pool-routing-map.md | 2 + krea_formatter.py | 2 +- tools/prompt_map_audit.py | 68 +++++++++++++++++++++++++++++++++ tools/prompt_smoke.py | 12 ++++++ 4 files changed, 83 insertions(+), 1 deletion(-) diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index 1e9d379..0876d3f 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -236,6 +236,8 @@ and to compare the exact per-axis RNG seed used for the row. representative single row and Insta/OF pair, verifies embedded `generation_trace` fields, and confirms Krea2, SDXL, and caption formatters consume metadata JSON instead of silently falling back to raw prompt text. +The same audit also statically rejects direct `row["prompt"]` reads in +formatter metadata modules outside the shared fallback helpers. ## Category Sources diff --git a/krea_formatter.py b/krea_formatter.py index ba41e23..c57eb07 100644 --- a/krea_formatter.py +++ b/krea_formatter.py @@ -398,7 +398,7 @@ def _style_phrase(row: dict[str, Any], style_mode: str) -> str: if style_mode == "photographic": return "realistic creator-shot photography with natural lighting, tactile skin and fabric detail, and clean social-media composition" style = _clean(row.get("style")) - suffix = _clean(row.get("positive_suffix")) or _prompt_field(_clean(row.get("prompt")), "Use") + suffix = _clean(row.get("positive_suffix")) if style and suffix: return f"{style}; {suffix}" return style or suffix diff --git a/tools/prompt_map_audit.py b/tools/prompt_map_audit.py index be74eaa..1347f85 100644 --- a/tools/prompt_map_audit.py +++ b/tools/prompt_map_audit.py @@ -69,6 +69,21 @@ ENTRY_ROUTE_SNIPPETS: tuple[str, ...] = ( "`naturalize_caption` -> `caption_format_route.py`", ) +PROMPT_ROW_READ_SCAN_GLOBS: tuple[str, ...] = ( + "krea_*.py", + "sdxl_*.py", + "caption_*.py", + "formatter_*.py", +) + +ALLOWED_PROMPT_ROW_READS: set[tuple[str, str]] = { + # Central row-value fallback. Metadata routes should prefer explicit fields, + # but any remaining label fallback must pass through this shared helper. + ("formatter_input.py", "row_value"), + # Last-resort caption fallback after all metadata branches decline the row. + ("caption_naturalizer.py", "_metadata_to_prose"), +} + def _literal_or_none(node: ast.AST) -> Any: try: @@ -419,6 +434,52 @@ def _routing_doc_errors() -> list[tuple[str, str, str]]: return errors +class _PromptRowReadVisitor(ast.NodeVisitor): + def __init__(self, path: Path) -> None: + self.path = path + self.source = path.read_text(encoding="utf-8") + self.function_stack: list[str] = [] + self.errors: list[tuple[str, str, str]] = [] + + def visit_FunctionDef(self, node: ast.FunctionDef) -> None: + self.function_stack.append(node.name) + self.generic_visit(node) + self.function_stack.pop() + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: + self.function_stack.append(node.name) + self.generic_visit(node) + self.function_stack.pop() + + def visit_Call(self, node: ast.Call) -> None: + segment = ast.get_source_segment(self.source, node) or "" + if 'row.get("prompt"' in segment or "row.get('prompt'" in segment: + function_name = self.function_stack[-1] if self.function_stack else "" + key = (self.path.name, function_name) + if key not in ALLOWED_PROMPT_ROW_READS: + self.errors.append( + ( + self.path.name, + f"{function_name}:{node.lineno}", + "metadata formatter code reads row prompt text; use structured metadata or add an explicit audited exception", + ) + ) + self.generic_visit(node) + + +def _prompt_row_read_errors() -> list[tuple[str, str, str]]: + paths: dict[Path, None] = {} + for pattern in PROMPT_ROW_READ_SCAN_GLOBS: + for path in ROOT.glob(pattern): + paths[path] = None + errors: list[tuple[str, str, str]] = [] + for path in sorted(paths): + visitor = _PromptRowReadVisitor(path) + visitor.visit(ast.parse(visitor.source)) + errors.extend(visitor.errors) + return errors + + def _json_dumps(value: Any) -> str: return json.dumps(value, ensure_ascii=True, sort_keys=True) @@ -689,6 +750,13 @@ def main() -> int: return 1 print("OK: critical route modules are documented and covered by smoke cases.") + print("\n# Metadata Prompt Fallback Validation") + prompt_row_read_errors = _prompt_row_read_errors() + if prompt_row_read_errors: + print_table(("Module", "Location", "Issue"), prompt_row_read_errors) + return 1 + print("OK: metadata formatter modules avoid raw prompt reads outside audited fallback helpers.") + print("\n# Runtime Metadata Route Validation") runtime_metadata_errors = _runtime_metadata_errors() if runtime_metadata_errors: diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index ee2bda9..f3f73f6 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -1385,6 +1385,18 @@ def smoke_krea_normal_row_routes() -> None: "style": "realistic creator-shot photography", } _expect_krea_normal_route_parity(single, "krea_normal_single", "metadata(single)") + style_metadata = dict( + single, + style="metadata style phrase", + positive_suffix="metadata suffix phrase", + prompt="Use: stale prompt suffix phrase.", + ) + style_prompt, style_method = krea_formatter._normal_row_to_krea(style_metadata, "balanced", "preserve") + _expect(style_method == "metadata(single)", "Krea style metadata route changed method") + style_prompt_lower = style_prompt.lower() + _expect("metadata style phrase" in style_prompt_lower, "Krea metadata route lost structured style") + _expect("metadata suffix phrase" in style_prompt_lower, "Krea metadata route lost structured positive suffix") + _expect("stale prompt suffix" not in style_prompt_lower, "Krea metadata route parsed stale Use prompt text") couple = { "subject_type": "couple",