From dfdfff953b516172eee6b6573f763283540820e6 Mon Sep 17 00:00:00 2001
From: Ethanfel <ethan.fel@ts-pc.fr>
Date: Sat, 27 Jun 2026 02:10:42 +0200
Subject: [PATCH] Validate item template formatter hints

---
 category_template_metadata.py                | 68 ++++++++++++++++++++
 docs/prompt-architecture-improvement-plan.md |  9 ++-
 docs/prompt-pool-routing-map.md              |  6 +-
 prompt_builder.py                            |  6 ++
 tools/prompt_smoke.py                        | 15 +++++
 5 files changed, 98 insertions(+), 6 deletions(-)

diff --git a/category_template_metadata.py b/category_template_metadata.py
index 17cebd4..01d5b1f 100644
--- a/category_template_metadata.py
+++ b/category_template_metadata.py
@@ -20,6 +20,12 @@ TEMPLATE_METADATA_KEYS = (
     "position_keys",
     "formatter_hint",
 )
+FORMATTER_HINT_ROUTES = ("all", "krea", "sdxl", "caption")
+FORMATTER_HINT_ROUTE_ALIASES = {
+    "krea2": "krea",
+    "naturalizer": "caption",
+    "training_caption": "caption",
+}
 
 
 def template_metadata(item: Any) -> dict[str, Any]:
@@ -49,6 +55,47 @@ def template_action_family(metadata: dict[str, Any]) -> str:
     return normalize_hardcore_action_family(metadata.get("action_family") or metadata.get("action_type"), "")
 
 
+def _list_from(value: Any) -> list[Any]:
+    if value is None:
+        return []
+    if isinstance(value, list):
+        return value
+    return [value]
+
+
+def _clean_hint(value: Any) -> str:
+    return str(value or "").strip()
+
+
+def normalize_formatter_route(value: Any) -> str:
+    route = re.sub(r"[^a-z0-9]+", "_", str(value or "").strip().lower()).strip("_")
+    route = FORMATTER_HINT_ROUTE_ALIASES.get(route, route)
+    return route if route in FORMATTER_HINT_ROUTES else ""
+
+
+def formatter_hints(metadata: dict[str, Any]) -> dict[str, list[str]]:
+    raw = metadata.get("formatter_hint")
+    if raw is None:
+        return {}
+    normalized: dict[str, list[str]] = {}
+
+    def add(route: str, values: Any) -> None:
+        route = normalize_formatter_route(route)
+        if not route:
+            return
+        for value in _list_from(values):
+            hint = _clean_hint(value)
+            if hint and hint not in normalized.setdefault(route, []):
+                normalized[route].append(hint)
+
+    if isinstance(raw, dict):
+        for route, values in raw.items():
+            add(str(route), values)
+    else:
+        add("all", raw)
+    return {route: hints for route, hints in normalized.items() if hints}
+
+
 def merge_position_keys(primary: list[str], fallback: list[str]) -> list[str]:
     merged: list[str] = []
     for key in [*primary, *fallback]:
@@ -85,4 +132,25 @@ def template_metadata_errors(metadata: dict[str, Any]) -> list[str]:
     ]
     if invalid_keys:
         errors.append("unknown position key(s): " + ", ".join(invalid_keys))
+    raw_hint = metadata.get("formatter_hint")
+    if raw_hint is not None:
+        if isinstance(raw_hint, dict):
+            for route, values in raw_hint.items():
+                if not normalize_formatter_route(route):
+                    errors.append(f"unknown formatter_hint route: {route}")
+                invalid_values = [
+                    repr(value)
+                    for value in _list_from(values)
+                    if not isinstance(value, str) or not value.strip()
+                ]
+                if invalid_values:
+                    errors.append(f"invalid formatter_hint value(s) for {route}: " + ", ".join(invalid_values))
+        else:
+            invalid_values = [
+                repr(value)
+                for value in _list_from(raw_hint)
+                if not isinstance(value, str) or not value.strip()
+            ]
+            if invalid_values:
+                errors.append("invalid formatter_hint value(s): " + ", ".join(invalid_values))
     return errors
diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md
index d1f7dcb..65a9999 100644
--- a/docs/prompt-architecture-improvement-plan.md
+++ b/docs/prompt-architecture-improvement-plan.md
@@ -337,15 +337,14 @@ Keep here:
 - direct category-specific wording.
 - optional object-style item templates with route metadata such as
   `action_family`, `action_type`, `position_family`, `family`, `position_key`,
-  and `position_keys`; string templates remain valid and fall back to Python
-  inference.
+  `position_keys`, and `formatter_hint`; string templates remain valid and fall
+  back to Python inference.
 
 Improve later:
 
-- add `formatter_hint` fields only where needed, not globally;
 - keep `tools/prompt_map_audit.py` passing; it now checks referenced
-  expression/composition/scene pools and item-template axes for both string and
-  object templates.
+  expression/composition/scene pools, item-template axes, and object-template
+  metadata values for both string and object templates.
 
 ### Node / UI Path
 
diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md
index 9b7bfda..eb2006a 100644
--- a/docs/prompt-pool-routing-map.md
+++ b/docs/prompt-pool-routing-map.md
@@ -219,7 +219,10 @@ Important JSON keys:
 - `item_templates`: templates with axis placeholders.
 - `item_templates` entries may be strings or objects with `template` plus
   optional route metadata such as `action_family`, `action_type`,
-  `position_family`, `family`, `position_key`, and `position_keys`.
+  `position_family`, `family`, `position_key`, `position_keys`, and
+  `formatter_hint`. Formatter hints may be a string/list for all routes or a
+  map keyed by `krea`, `sdxl`, or `caption`; aliases such as `krea2` and
+  `training_caption` are normalized by `category_template_metadata.py`.
 - `axes`: values used to fill `item_templates`.
 - `scene_pool` / `scene_pools` or direct `scenes`: location road.
 - `expression_pool` / `expression_pools` or direct `expressions`: expression road.
@@ -449,6 +452,7 @@ plain prompt text. When debugging, inspect these fields before editing pools.
 | `item` | `_compose_item` or Insta override | Krea/SDXL/Naturalizer | Clothing item, category item, or sexual scene/action text. |
 | `item_axis_values` | `_compose_item` | Krea hardcore rewrite, SDXL tags | Filled template axes such as position/action/detail values. |
 | `item_template_metadata` | `_compose_item` | Debug, Krea/SDXL/Naturalizer route metadata | Optional metadata from object-style item templates; currently used to prefer explicit action/position families and keys before inference. |
+| `formatter_hints` | `category_template_metadata.formatter_hints` | Debug/future formatter specialization | Normalized route-specific hints from object-style item templates, keyed by `all`, `krea`, `sdxl`, or `caption`. |
 | `action_family` | `item_template_metadata` or `hardcore_action_metadata.source_hardcore_action_family` | Krea hardcore rewrite, SDXL tags, natural captions, debug | Source-aware formatter semantic family such as `foreplay`, `outercourse`, `oral`, `penetration`, `toy_double`, or `climax`. |
 | `position_family` | `item_template_metadata` or `_hardcore_source_position_family` | Debug/filtering | Source/UI hardcore family selected by template metadata or subcategory, such as `manual`, `interaction`, `oral`, `anal`, or `climax`. |
 | `position_key`, `position_keys` | `item_template_metadata` plus `_hardcore_position_keys` | Debug/future filters | Concrete position tokens from object-template metadata and inferred axes/role text, such as `kneeling`, `doggy`, `boobjob`, or `open_thighs`. |
diff --git a/prompt_builder.py b/prompt_builder.py
index 8a62938..eabcac4 100644
--- a/prompt_builder.py
+++ b/prompt_builder.py
@@ -318,6 +318,10 @@ def _template_action_family(metadata: dict[str, Any]) -> str:
     return item_template_policy.template_action_family(metadata)
 
 
+def _template_formatter_hints(metadata: dict[str, Any]) -> dict[str, list[str]]:
+    return item_template_policy.formatter_hints(metadata)
+
+
 def _merge_position_keys(primary: list[str], fallback: list[str]) -> list[str]:
     return item_template_policy.merge_position_keys(primary, fallback)
 
@@ -3757,6 +3761,7 @@ def _build_custom_row(
     if is_pose_category:
         item_text = _sanitize_hardcore_environment_anchors(item_text)
         item_axis_values = _sanitize_hardcore_axis_values(item_axis_values)
+    item_formatter_hints = _template_formatter_hints(item_template_metadata)
     subject_type = str(_merged_field(category, subcategory, item, "subject_type", "single_any"))
     context = _subject_context(person_rng, subject_type, ethnicity, figure, no_plus_women, no_black, women_count, men_count)
     character_slots = _parse_character_cast(character_cast)
@@ -4029,6 +4034,7 @@ def _build_custom_row(
             "custom_item": item_name,
             "item_axis_values": item_axis_values,
             "item_template_metadata": item_template_metadata,
+            "formatter_hints": item_formatter_hints,
             "scene_text": scene,
             "location_config": parsed_location_config if _location_config_active(parsed_location_config) else {},
             "pose": pose,
diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py
index d3e2497..ccd1805 100644
--- a/tools/prompt_smoke.py
+++ b/tools/prompt_smoke.py
@@ -119,6 +119,7 @@ def _expect_custom_row(row: dict[str, Any], name: str) -> None:
     _expect_text(f"{name}.composition", row.get("composition"), 8)
     _expect_text(f"{name}.role_graph", row.get("source_role_graph") or row.get("role_graph"), 8)
     _expect(isinstance(row.get("item_axis_values"), dict), f"{name}.item_axis_values missing")
+    _expect(isinstance(row.get("formatter_hints"), dict), f"{name}.formatter_hints missing")
 
 
 def _expect_formatter_outputs(row: dict[str, Any], name: str, *, target: str = "auto") -> None:
@@ -400,6 +401,8 @@ def _fixture_hardcore_row(**overrides: Any) -> dict[str, Any]:
             "penetration_act": "full-body penetrative sex",
             "mouth_detail": "mouth close to the ear",
         },
+        "item_template_metadata": {},
+        "formatter_hints": {},
         "scene_text": "private studio room with warm light",
         "scene_kind": "explicit adult sex scene",
         "pose": "configured explicit pose",
@@ -1195,6 +1198,11 @@ def smoke_hardcore_position_config_policy() -> None:
                     "action_family": "oral",
                     "position_family": "oral",
                     "position_keys": ["kneeling", "open_thighs"],
+                    "formatter_hint": {
+                        "krea2": "keep mouth contact readable",
+                        "sdxl": ["oral contact", "kneeling oral"],
+                        "training_caption": "oral contact caption detail",
+                    },
                 }
             ],
             "item_axes": {
@@ -1213,6 +1221,10 @@ def smoke_hardcore_position_config_policy() -> None:
     _expect(pb._template_position_family(template_metadata) == "oral", "Template metadata route lost position family")
     _expect(pb._template_position_keys(template_metadata) == ["kneeling", "open_thighs"], "Template metadata route lost position keys")
     _expect(pb._template_action_family(template_metadata) == "oral", "Template metadata route lost normalized action family")
+    formatter_hints = pb._template_formatter_hints(template_metadata)
+    _expect(formatter_hints.get("krea") == ["keep mouth contact readable"], "Template metadata route lost Krea formatter hint")
+    _expect(formatter_hints.get("sdxl") == ["oral contact", "kneeling oral"], "Template metadata route lost SDXL formatter hints")
+    _expect(formatter_hints.get("caption") == ["oral contact caption detail"], "Template metadata route lost caption formatter hint")
     _expect(
         pb._template_action_family(template_metadata) == category_template_metadata.template_action_family(template_metadata),
         "Prompt builder template action policy should delegate",
@@ -1225,11 +1237,14 @@ def smoke_hardcore_position_config_policy() -> None:
         "action_family": "bad_action",
         "position_family": "bad_family",
         "position_keys": ["kneeling", "bad_position"],
+        "formatter_hint": {"bad_route": 9, "sdxl": ["ok", ""]},
     }
     invalid_errors = category_template_metadata.template_metadata_errors(invalid_metadata)
     _expect(any("bad_action" in error for error in invalid_errors), "Template metadata validation missed bad action")
     _expect(any("bad_family" in error for error in invalid_errors), "Template metadata validation missed bad family")
     _expect(any("bad_position" in error for error in invalid_errors), "Template metadata validation missed bad position key")
+    _expect(any("bad_route" in error for error in invalid_errors), "Template metadata validation missed bad formatter route")
+    _expect(any("invalid formatter_hint" in error for error in invalid_errors), "Template metadata validation missed bad formatter hint value")
 
 
 def smoke_category_library_route() -> None: