From c59c9947b2eece79845358075b8f21c5dd658d64 Mon Sep 17 00:00:00 2001
From: Ethanfel <ethan.fel@ts-pc.fr>
Date: Sat, 27 Jun 2026 17:31:30 +0200
Subject: [PATCH] Audit metadata prompt fallback boundaries

---
 docs/prompt-pool-routing-map.md |  2 +
 krea_formatter.py               |  2 +-
 tools/prompt_map_audit.py       | 68 +++++++++++++++++++++++++++++++++
 tools/prompt_smoke.py           | 12 ++++++
 4 files changed, 83 insertions(+), 1 deletion(-)
diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md
index 1e9d379..0876d3f 100644
--- a/docs/prompt-pool-routing-map.md
+++ b/docs/prompt-pool-routing-map.md
@@ -236,6 +236,8 @@ and to compare the exact per-axis RNG seed used for the row.
 representative single row and Insta/OF pair, verifies embedded
 `generation_trace` fields, and confirms Krea2, SDXL, and caption formatters
 consume metadata JSON instead of silently falling back to raw prompt text.
+The same audit also statically rejects direct `row["prompt"]` reads in
+formatter metadata modules outside the shared fallback helpers.
 
 ## Category Sources
 
diff --git a/krea_formatter.py b/krea_formatter.py
index ba41e23..c57eb07 100644
--- a/krea_formatter.py
+++ b/krea_formatter.py
@@ -398,7 +398,7 @@ def _style_phrase(row: dict[str, Any], style_mode: str) -> str:
     if style_mode == "photographic":
         return "realistic creator-shot photography with natural lighting, tactile skin and fabric detail, and clean social-media composition"
     style = _clean(row.get("style"))
-    suffix = _clean(row.get("positive_suffix")) or _prompt_field(_clean(row.get("prompt")), "Use")
+    suffix = _clean(row.get("positive_suffix"))
     if style and suffix:
         return f"{style}; {suffix}"
     return style or suffix
diff --git a/tools/prompt_map_audit.py b/tools/prompt_map_audit.py
index be74eaa..1347f85 100644
--- a/tools/prompt_map_audit.py
+++ b/tools/prompt_map_audit.py
@@ -69,6 +69,21 @@ ENTRY_ROUTE_SNIPPETS: tuple[str, ...] = (
     "`naturalize_caption` -> `caption_format_route.py`",
 )
 
+PROMPT_ROW_READ_SCAN_GLOBS: tuple[str, ...] = (
+    "krea_*.py",
+    "sdxl_*.py",
+    "caption_*.py",
+    "formatter_*.py",
+)
+
+ALLOWED_PROMPT_ROW_READS: set[tuple[str, str]] = {
+    # Central row-value fallback. Metadata routes should prefer explicit fields,
+    # but any remaining label fallback must pass through this shared helper.
+    ("formatter_input.py", "row_value"),
+    # Last-resort caption fallback after all metadata branches decline the row.
+    ("caption_naturalizer.py", "_metadata_to_prose"),
+}
+
 
 def _literal_or_none(node: ast.AST) -> Any:
     try:
@@ -419,6 +434,52 @@ def _routing_doc_errors() -> list[tuple[str, str, str]]:
     return errors
 
 
+class _PromptRowReadVisitor(ast.NodeVisitor):
+    def __init__(self, path: Path) -> None:
+        self.path = path
+        self.source = path.read_text(encoding="utf-8")
+        self.function_stack: list[str] = []
+        self.errors: list[tuple[str, str, str]] = []
+
+    def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
+        self.function_stack.append(node.name)
+        self.generic_visit(node)
+        self.function_stack.pop()
+
+    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
+        self.function_stack.append(node.name)
+        self.generic_visit(node)
+        self.function_stack.pop()
+
+    def visit_Call(self, node: ast.Call) -> None:
+        segment = ast.get_source_segment(self.source, node) or ""
+        if 'row.get("prompt"' in segment or "row.get('prompt'" in segment:
+            function_name = self.function_stack[-1] if self.function_stack else "<module>"
+            key = (self.path.name, function_name)
+            if key not in ALLOWED_PROMPT_ROW_READS:
+                self.errors.append(
+                    (
+                        self.path.name,
+                        f"{function_name}:{node.lineno}",
+                        "metadata formatter code reads row prompt text; use structured metadata or add an explicit audited exception",
+                    )
+                )
+        self.generic_visit(node)
+
+
+def _prompt_row_read_errors() -> list[tuple[str, str, str]]:
+    paths: dict[Path, None] = {}
+    for pattern in PROMPT_ROW_READ_SCAN_GLOBS:
+        for path in ROOT.glob(pattern):
+            paths[path] = None
+    errors: list[tuple[str, str, str]] = []
+    for path in sorted(paths):
+        visitor = _PromptRowReadVisitor(path)
+        visitor.visit(ast.parse(visitor.source))
+        errors.extend(visitor.errors)
+    return errors
+
+
 def _json_dumps(value: Any) -> str:
     return json.dumps(value, ensure_ascii=True, sort_keys=True)
 
@@ -689,6 +750,13 @@ def main() -> int:
         return 1
     print("OK: critical route modules are documented and covered by smoke cases.")
 
+    print("\n# Metadata Prompt Fallback Validation")
+    prompt_row_read_errors = _prompt_row_read_errors()
+    if prompt_row_read_errors:
+        print_table(("Module", "Location", "Issue"), prompt_row_read_errors)
+        return 1
+    print("OK: metadata formatter modules avoid raw prompt reads outside audited fallback helpers.")
+
     print("\n# Runtime Metadata Route Validation")
     runtime_metadata_errors = _runtime_metadata_errors()
     if runtime_metadata_errors:
diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py
index ee2bda9..f3f73f6 100644
--- a/tools/prompt_smoke.py
+++ b/tools/prompt_smoke.py
@@ -1385,6 +1385,18 @@ def smoke_krea_normal_row_routes() -> None:
         "style": "realistic creator-shot photography",
     }
     _expect_krea_normal_route_parity(single, "krea_normal_single", "metadata(single)")
+    style_metadata = dict(
+        single,
+        style="metadata style phrase",
+        positive_suffix="metadata suffix phrase",
+        prompt="Use: stale prompt suffix phrase.",
+    )
+    style_prompt, style_method = krea_formatter._normal_row_to_krea(style_metadata, "balanced", "preserve")
+    _expect(style_method == "metadata(single)", "Krea style metadata route changed method")
+    style_prompt_lower = style_prompt.lower()
+    _expect("metadata style phrase" in style_prompt_lower, "Krea metadata route lost structured style")
+    _expect("metadata suffix phrase" in style_prompt_lower, "Krea metadata route lost structured positive suffix")
+    _expect("stale prompt suffix" not in style_prompt_lower, "Krea metadata route parsed stale Use prompt text")
 
     couple = {
         "subject_type": "couple",