From dfdfff953b516172eee6b6573f763283540820e6 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sat, 27 Jun 2026 02:10:42 +0200 Subject: [PATCH] Validate item template formatter hints --- category_template_metadata.py | 68 ++++++++++++++++++++ docs/prompt-architecture-improvement-plan.md | 9 ++- docs/prompt-pool-routing-map.md | 6 +- prompt_builder.py | 6 ++ tools/prompt_smoke.py | 15 +++++ 5 files changed, 98 insertions(+), 6 deletions(-) diff --git a/category_template_metadata.py b/category_template_metadata.py index 17cebd4..01d5b1f 100644 --- a/category_template_metadata.py +++ b/category_template_metadata.py @@ -20,6 +20,12 @@ TEMPLATE_METADATA_KEYS = ( "position_keys", "formatter_hint", ) +FORMATTER_HINT_ROUTES = ("all", "krea", "sdxl", "caption") +FORMATTER_HINT_ROUTE_ALIASES = { + "krea2": "krea", + "naturalizer": "caption", + "training_caption": "caption", +} def template_metadata(item: Any) -> dict[str, Any]: @@ -49,6 +55,47 @@ def template_action_family(metadata: dict[str, Any]) -> str: return normalize_hardcore_action_family(metadata.get("action_family") or metadata.get("action_type"), "") +def _list_from(value: Any) -> list[Any]: + if value is None: + return [] + if isinstance(value, list): + return value + return [value] + + +def _clean_hint(value: Any) -> str: + return str(value or "").strip() + + +def normalize_formatter_route(value: Any) -> str: + route = re.sub(r"[^a-z0-9]+", "_", str(value or "").strip().lower()).strip("_") + route = FORMATTER_HINT_ROUTE_ALIASES.get(route, route) + return route if route in FORMATTER_HINT_ROUTES else "" + + +def formatter_hints(metadata: dict[str, Any]) -> dict[str, list[str]]: + raw = metadata.get("formatter_hint") + if raw is None: + return {} + normalized: dict[str, list[str]] = {} + + def add(route: str, values: Any) -> None: + route = normalize_formatter_route(route) + if not route: + return + for value in _list_from(values): + hint = _clean_hint(value) + if hint and hint not in normalized.setdefault(route, []): + normalized[route].append(hint) + + if isinstance(raw, dict): + for route, values in raw.items(): + add(str(route), values) + else: + add("all", raw) + return {route: hints for route, hints in normalized.items() if hints} + + def merge_position_keys(primary: list[str], fallback: list[str]) -> list[str]: merged: list[str] = [] for key in [*primary, *fallback]: @@ -85,4 +132,25 @@ def template_metadata_errors(metadata: dict[str, Any]) -> list[str]: ] if invalid_keys: errors.append("unknown position key(s): " + ", ".join(invalid_keys)) + raw_hint = metadata.get("formatter_hint") + if raw_hint is not None: + if isinstance(raw_hint, dict): + for route, values in raw_hint.items(): + if not normalize_formatter_route(route): + errors.append(f"unknown formatter_hint route: {route}") + invalid_values = [ + repr(value) + for value in _list_from(values) + if not isinstance(value, str) or not value.strip() + ] + if invalid_values: + errors.append(f"invalid formatter_hint value(s) for {route}: " + ", ".join(invalid_values)) + else: + invalid_values = [ + repr(value) + for value in _list_from(raw_hint) + if not isinstance(value, str) or not value.strip() + ] + if invalid_values: + errors.append("invalid formatter_hint value(s): " + ", ".join(invalid_values)) return errors diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md index d1f7dcb..65a9999 100644 --- a/docs/prompt-architecture-improvement-plan.md +++ b/docs/prompt-architecture-improvement-plan.md @@ -337,15 +337,14 @@ Keep here: - direct category-specific wording. - optional object-style item templates with route metadata such as `action_family`, `action_type`, `position_family`, `family`, `position_key`, - and `position_keys`; string templates remain valid and fall back to Python - inference. + `position_keys`, and `formatter_hint`; string templates remain valid and fall + back to Python inference. Improve later: -- add `formatter_hint` fields only where needed, not globally; - keep `tools/prompt_map_audit.py` passing; it now checks referenced - expression/composition/scene pools and item-template axes for both string and - object templates. + expression/composition/scene pools, item-template axes, and object-template + metadata values for both string and object templates. ### Node / UI Path diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index 9b7bfda..eb2006a 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -219,7 +219,10 @@ Important JSON keys: - `item_templates`: templates with axis placeholders. - `item_templates` entries may be strings or objects with `template` plus optional route metadata such as `action_family`, `action_type`, - `position_family`, `family`, `position_key`, and `position_keys`. + `position_family`, `family`, `position_key`, `position_keys`, and + `formatter_hint`. Formatter hints may be a string/list for all routes or a + map keyed by `krea`, `sdxl`, or `caption`; aliases such as `krea2` and + `training_caption` are normalized by `category_template_metadata.py`. - `axes`: values used to fill `item_templates`. - `scene_pool` / `scene_pools` or direct `scenes`: location road. - `expression_pool` / `expression_pools` or direct `expressions`: expression road. @@ -449,6 +452,7 @@ plain prompt text. When debugging, inspect these fields before editing pools. | `item` | `_compose_item` or Insta override | Krea/SDXL/Naturalizer | Clothing item, category item, or sexual scene/action text. | | `item_axis_values` | `_compose_item` | Krea hardcore rewrite, SDXL tags | Filled template axes such as position/action/detail values. | | `item_template_metadata` | `_compose_item` | Debug, Krea/SDXL/Naturalizer route metadata | Optional metadata from object-style item templates; currently used to prefer explicit action/position families and keys before inference. | +| `formatter_hints` | `category_template_metadata.formatter_hints` | Debug/future formatter specialization | Normalized route-specific hints from object-style item templates, keyed by `all`, `krea`, `sdxl`, or `caption`. | | `action_family` | `item_template_metadata` or `hardcore_action_metadata.source_hardcore_action_family` | Krea hardcore rewrite, SDXL tags, natural captions, debug | Source-aware formatter semantic family such as `foreplay`, `outercourse`, `oral`, `penetration`, `toy_double`, or `climax`. | | `position_family` | `item_template_metadata` or `_hardcore_source_position_family` | Debug/filtering | Source/UI hardcore family selected by template metadata or subcategory, such as `manual`, `interaction`, `oral`, `anal`, or `climax`. | | `position_key`, `position_keys` | `item_template_metadata` plus `_hardcore_position_keys` | Debug/future filters | Concrete position tokens from object-template metadata and inferred axes/role text, such as `kneeling`, `doggy`, `boobjob`, or `open_thighs`. | diff --git a/prompt_builder.py b/prompt_builder.py index 8a62938..eabcac4 100644 --- a/prompt_builder.py +++ b/prompt_builder.py @@ -318,6 +318,10 @@ def _template_action_family(metadata: dict[str, Any]) -> str: return item_template_policy.template_action_family(metadata) +def _template_formatter_hints(metadata: dict[str, Any]) -> dict[str, list[str]]: + return item_template_policy.formatter_hints(metadata) + + def _merge_position_keys(primary: list[str], fallback: list[str]) -> list[str]: return item_template_policy.merge_position_keys(primary, fallback) @@ -3757,6 +3761,7 @@ def _build_custom_row( if is_pose_category: item_text = _sanitize_hardcore_environment_anchors(item_text) item_axis_values = _sanitize_hardcore_axis_values(item_axis_values) + item_formatter_hints = _template_formatter_hints(item_template_metadata) subject_type = str(_merged_field(category, subcategory, item, "subject_type", "single_any")) context = _subject_context(person_rng, subject_type, ethnicity, figure, no_plus_women, no_black, women_count, men_count) character_slots = _parse_character_cast(character_cast) @@ -4029,6 +4034,7 @@ def _build_custom_row( "custom_item": item_name, "item_axis_values": item_axis_values, "item_template_metadata": item_template_metadata, + "formatter_hints": item_formatter_hints, "scene_text": scene, "location_config": parsed_location_config if _location_config_active(parsed_location_config) else {}, "pose": pose, diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index d3e2497..ccd1805 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -119,6 +119,7 @@ def _expect_custom_row(row: dict[str, Any], name: str) -> None: _expect_text(f"{name}.composition", row.get("composition"), 8) _expect_text(f"{name}.role_graph", row.get("source_role_graph") or row.get("role_graph"), 8) _expect(isinstance(row.get("item_axis_values"), dict), f"{name}.item_axis_values missing") + _expect(isinstance(row.get("formatter_hints"), dict), f"{name}.formatter_hints missing") def _expect_formatter_outputs(row: dict[str, Any], name: str, *, target: str = "auto") -> None: @@ -400,6 +401,8 @@ def _fixture_hardcore_row(**overrides: Any) -> dict[str, Any]: "penetration_act": "full-body penetrative sex", "mouth_detail": "mouth close to the ear", }, + "item_template_metadata": {}, + "formatter_hints": {}, "scene_text": "private studio room with warm light", "scene_kind": "explicit adult sex scene", "pose": "configured explicit pose", @@ -1195,6 +1198,11 @@ def smoke_hardcore_position_config_policy() -> None: "action_family": "oral", "position_family": "oral", "position_keys": ["kneeling", "open_thighs"], + "formatter_hint": { + "krea2": "keep mouth contact readable", + "sdxl": ["oral contact", "kneeling oral"], + "training_caption": "oral contact caption detail", + }, } ], "item_axes": { @@ -1213,6 +1221,10 @@ def smoke_hardcore_position_config_policy() -> None: _expect(pb._template_position_family(template_metadata) == "oral", "Template metadata route lost position family") _expect(pb._template_position_keys(template_metadata) == ["kneeling", "open_thighs"], "Template metadata route lost position keys") _expect(pb._template_action_family(template_metadata) == "oral", "Template metadata route lost normalized action family") + formatter_hints = pb._template_formatter_hints(template_metadata) + _expect(formatter_hints.get("krea") == ["keep mouth contact readable"], "Template metadata route lost Krea formatter hint") + _expect(formatter_hints.get("sdxl") == ["oral contact", "kneeling oral"], "Template metadata route lost SDXL formatter hints") + _expect(formatter_hints.get("caption") == ["oral contact caption detail"], "Template metadata route lost caption formatter hint") _expect( pb._template_action_family(template_metadata) == category_template_metadata.template_action_family(template_metadata), "Prompt builder template action policy should delegate", @@ -1225,11 +1237,14 @@ def smoke_hardcore_position_config_policy() -> None: "action_family": "bad_action", "position_family": "bad_family", "position_keys": ["kneeling", "bad_position"], + "formatter_hint": {"bad_route": 9, "sdxl": ["ok", ""]}, } invalid_errors = category_template_metadata.template_metadata_errors(invalid_metadata) _expect(any("bad_action" in error for error in invalid_errors), "Template metadata validation missed bad action") _expect(any("bad_family" in error for error in invalid_errors), "Template metadata validation missed bad family") _expect(any("bad_position" in error for error in invalid_errors), "Template metadata validation missed bad position key") + _expect(any("bad_route" in error for error in invalid_errors), "Template metadata validation missed bad formatter route") + _expect(any("invalid formatter_hint" in error for error in invalid_errors), "Template metadata validation missed bad formatter hint value") def smoke_category_library_route() -> None: