Audit metadata prompt fallback boundaries
This commit is contained in:
@@ -236,6 +236,8 @@ and to compare the exact per-axis RNG seed used for the row.
|
|||||||
representative single row and Insta/OF pair, verifies embedded
|
representative single row and Insta/OF pair, verifies embedded
|
||||||
`generation_trace` fields, and confirms Krea2, SDXL, and caption formatters
|
`generation_trace` fields, and confirms Krea2, SDXL, and caption formatters
|
||||||
consume metadata JSON instead of silently falling back to raw prompt text.
|
consume metadata JSON instead of silently falling back to raw prompt text.
|
||||||
|
The same audit also statically rejects direct `row["prompt"]` reads in
|
||||||
|
formatter metadata modules outside the shared fallback helpers.
|
||||||
|
|
||||||
## Category Sources
|
## Category Sources
|
||||||
|
|
||||||
|
|||||||
+1
-1
@@ -398,7 +398,7 @@ def _style_phrase(row: dict[str, Any], style_mode: str) -> str:
|
|||||||
if style_mode == "photographic":
|
if style_mode == "photographic":
|
||||||
return "realistic creator-shot photography with natural lighting, tactile skin and fabric detail, and clean social-media composition"
|
return "realistic creator-shot photography with natural lighting, tactile skin and fabric detail, and clean social-media composition"
|
||||||
style = _clean(row.get("style"))
|
style = _clean(row.get("style"))
|
||||||
suffix = _clean(row.get("positive_suffix")) or _prompt_field(_clean(row.get("prompt")), "Use")
|
suffix = _clean(row.get("positive_suffix"))
|
||||||
if style and suffix:
|
if style and suffix:
|
||||||
return f"{style}; {suffix}"
|
return f"{style}; {suffix}"
|
||||||
return style or suffix
|
return style or suffix
|
||||||
|
|||||||
@@ -69,6 +69,21 @@ ENTRY_ROUTE_SNIPPETS: tuple[str, ...] = (
|
|||||||
"`naturalize_caption` -> `caption_format_route.py`",
|
"`naturalize_caption` -> `caption_format_route.py`",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
PROMPT_ROW_READ_SCAN_GLOBS: tuple[str, ...] = (
|
||||||
|
"krea_*.py",
|
||||||
|
"sdxl_*.py",
|
||||||
|
"caption_*.py",
|
||||||
|
"formatter_*.py",
|
||||||
|
)
|
||||||
|
|
||||||
|
ALLOWED_PROMPT_ROW_READS: set[tuple[str, str]] = {
|
||||||
|
# Central row-value fallback. Metadata routes should prefer explicit fields,
|
||||||
|
# but any remaining label fallback must pass through this shared helper.
|
||||||
|
("formatter_input.py", "row_value"),
|
||||||
|
# Last-resort caption fallback after all metadata branches decline the row.
|
||||||
|
("caption_naturalizer.py", "_metadata_to_prose"),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def _literal_or_none(node: ast.AST) -> Any:
|
def _literal_or_none(node: ast.AST) -> Any:
|
||||||
try:
|
try:
|
||||||
@@ -419,6 +434,52 @@ def _routing_doc_errors() -> list[tuple[str, str, str]]:
|
|||||||
return errors
|
return errors
|
||||||
|
|
||||||
|
|
||||||
|
class _PromptRowReadVisitor(ast.NodeVisitor):
|
||||||
|
def __init__(self, path: Path) -> None:
|
||||||
|
self.path = path
|
||||||
|
self.source = path.read_text(encoding="utf-8")
|
||||||
|
self.function_stack: list[str] = []
|
||||||
|
self.errors: list[tuple[str, str, str]] = []
|
||||||
|
|
||||||
|
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
||||||
|
self.function_stack.append(node.name)
|
||||||
|
self.generic_visit(node)
|
||||||
|
self.function_stack.pop()
|
||||||
|
|
||||||
|
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:
|
||||||
|
self.function_stack.append(node.name)
|
||||||
|
self.generic_visit(node)
|
||||||
|
self.function_stack.pop()
|
||||||
|
|
||||||
|
def visit_Call(self, node: ast.Call) -> None:
|
||||||
|
segment = ast.get_source_segment(self.source, node) or ""
|
||||||
|
if 'row.get("prompt"' in segment or "row.get('prompt'" in segment:
|
||||||
|
function_name = self.function_stack[-1] if self.function_stack else "<module>"
|
||||||
|
key = (self.path.name, function_name)
|
||||||
|
if key not in ALLOWED_PROMPT_ROW_READS:
|
||||||
|
self.errors.append(
|
||||||
|
(
|
||||||
|
self.path.name,
|
||||||
|
f"{function_name}:{node.lineno}",
|
||||||
|
"metadata formatter code reads row prompt text; use structured metadata or add an explicit audited exception",
|
||||||
|
)
|
||||||
|
)
|
||||||
|
self.generic_visit(node)
|
||||||
|
|
||||||
|
|
||||||
|
def _prompt_row_read_errors() -> list[tuple[str, str, str]]:
|
||||||
|
paths: dict[Path, None] = {}
|
||||||
|
for pattern in PROMPT_ROW_READ_SCAN_GLOBS:
|
||||||
|
for path in ROOT.glob(pattern):
|
||||||
|
paths[path] = None
|
||||||
|
errors: list[tuple[str, str, str]] = []
|
||||||
|
for path in sorted(paths):
|
||||||
|
visitor = _PromptRowReadVisitor(path)
|
||||||
|
visitor.visit(ast.parse(visitor.source))
|
||||||
|
errors.extend(visitor.errors)
|
||||||
|
return errors
|
||||||
|
|
||||||
|
|
||||||
def _json_dumps(value: Any) -> str:
|
def _json_dumps(value: Any) -> str:
|
||||||
return json.dumps(value, ensure_ascii=True, sort_keys=True)
|
return json.dumps(value, ensure_ascii=True, sort_keys=True)
|
||||||
|
|
||||||
@@ -689,6 +750,13 @@ def main() -> int:
|
|||||||
return 1
|
return 1
|
||||||
print("OK: critical route modules are documented and covered by smoke cases.")
|
print("OK: critical route modules are documented and covered by smoke cases.")
|
||||||
|
|
||||||
|
print("\n# Metadata Prompt Fallback Validation")
|
||||||
|
prompt_row_read_errors = _prompt_row_read_errors()
|
||||||
|
if prompt_row_read_errors:
|
||||||
|
print_table(("Module", "Location", "Issue"), prompt_row_read_errors)
|
||||||
|
return 1
|
||||||
|
print("OK: metadata formatter modules avoid raw prompt reads outside audited fallback helpers.")
|
||||||
|
|
||||||
print("\n# Runtime Metadata Route Validation")
|
print("\n# Runtime Metadata Route Validation")
|
||||||
runtime_metadata_errors = _runtime_metadata_errors()
|
runtime_metadata_errors = _runtime_metadata_errors()
|
||||||
if runtime_metadata_errors:
|
if runtime_metadata_errors:
|
||||||
|
|||||||
@@ -1385,6 +1385,18 @@ def smoke_krea_normal_row_routes() -> None:
|
|||||||
"style": "realistic creator-shot photography",
|
"style": "realistic creator-shot photography",
|
||||||
}
|
}
|
||||||
_expect_krea_normal_route_parity(single, "krea_normal_single", "metadata(single)")
|
_expect_krea_normal_route_parity(single, "krea_normal_single", "metadata(single)")
|
||||||
|
style_metadata = dict(
|
||||||
|
single,
|
||||||
|
style="metadata style phrase",
|
||||||
|
positive_suffix="metadata suffix phrase",
|
||||||
|
prompt="Use: stale prompt suffix phrase.",
|
||||||
|
)
|
||||||
|
style_prompt, style_method = krea_formatter._normal_row_to_krea(style_metadata, "balanced", "preserve")
|
||||||
|
_expect(style_method == "metadata(single)", "Krea style metadata route changed method")
|
||||||
|
style_prompt_lower = style_prompt.lower()
|
||||||
|
_expect("metadata style phrase" in style_prompt_lower, "Krea metadata route lost structured style")
|
||||||
|
_expect("metadata suffix phrase" in style_prompt_lower, "Krea metadata route lost structured positive suffix")
|
||||||
|
_expect("stale prompt suffix" not in style_prompt_lower, "Krea metadata route parsed stale Use prompt text")
|
||||||
|
|
||||||
couple = {
|
couple = {
|
||||||
"subject_type": "couple",
|
"subject_type": "couple",
|
||||||
|
|||||||
Reference in New Issue
Block a user