Consume formatter hints

2026-06-27 02:17:04 +02:00
parent dfdfff953b
commit 7d112c0f98
7 changed files with 145 additions and 5 deletions
@@ -5,11 +5,13 @@ from typing import Any

 try:
    from . import caption_policy
+    from . import category_template_metadata as template_metadata_policy
    from . import formatter_input as input_policy
    from . import krea_cast as cast_policy
    from .prompt_hygiene import sanitize_prose_text
 except ImportError:  # Allows local smoke tests with `python -c`.
    import caption_policy
+    import category_template_metadata as template_metadata_policy
    import formatter_input as input_policy
    import krea_cast as cast_policy
    from prompt_hygiene import sanitize_prose_text
@@ -67,6 +69,24 @@ def _join_sentences(parts: list[str]) -> str:
    return " ".join(part for part in (_sentence(part) for part in parts) if part)


+def _formatter_hint_parts(row: dict[str, Any]) -> list[str]:
+    hints: list[str] = []
+    if not isinstance(row, dict):
+        return hints
+    for hint in template_metadata_policy.formatter_hints_for_route(row, "caption"):
+        hint = _clean_text(hint).strip(" .")
+        if hint and hint not in hints:
+            hints.append(hint)
+    return hints
+
+
+def _append_formatter_hints(prose: str, row: dict[str, Any]) -> str:
+    hints = _formatter_hint_parts(row)
+    if not hints:
+        return prose
+    return _join_sentences([prose, *hints])
+
+
 def _human_join(parts: list[str]) -> str:
    parts = [part for part in (_clean_text(part) for part in parts) if part]
    if len(parts) <= 1:
@@ -538,8 +558,10 @@ def _metadata_to_prose(row: dict[str, Any], detail_level: str, keep_style: bool)
    ):
        result = builder(row, detail_level, keep_style)
        if result:
-            return result
-    return _text_to_prose(_clean_text(row.get("caption") or row.get("prompt")), detail_level, keep_style)
+            prose, method = result
+            return _append_formatter_hints(prose, row), method
+    prose, method = _text_to_prose(_clean_text(row.get("caption") or row.get("prompt")), detail_level, keep_style)
+    return _append_formatter_hints(prose, row), method


 def _prompt_to_prose(text: str, detail_level: str, keep_style: bool) -> tuple[str, str] | None:
@@ -96,6 +96,37 @@ def formatter_hints(metadata: dict[str, Any]) -> dict[str, list[str]]:
    return {route: hints for route, hints in normalized.items() if hints}


+def formatter_hints_for_route(row_or_hints: Any, route: str) -> list[str]:
+    route = normalize_formatter_route(route)
+    if not route or not isinstance(row_or_hints, dict):
+        return []
+
+    if isinstance(row_or_hints.get("formatter_hints"), dict):
+        raw_hints = row_or_hints.get("formatter_hints") or {}
+    elif "formatter_hint" in row_or_hints:
+        raw_hints = formatter_hints(row_or_hints)
+    else:
+        raw_hints = row_or_hints
+
+    normalized: dict[str, list[str]] = {}
+    if isinstance(raw_hints, dict):
+        for raw_route, values in raw_hints.items():
+            normalized_route = normalize_formatter_route(raw_route)
+            if not normalized_route:
+                continue
+            for value in _list_from(values):
+                hint = _clean_hint(value)
+                if hint and hint not in normalized.setdefault(normalized_route, []):
+                    normalized[normalized_route].append(hint)
+
+    hints: list[str] = []
+    for raw_route in ("all", route):
+        for hint in normalized.get(raw_route, []):
+            if hint not in hints:
+                hints.append(hint)
+    return hints
+
+
 def merge_position_keys(primary: list[str], fallback: list[str]) -> list[str]:
    merged: list[str] = []
    for key in [*primary, *fallback]:
@@ -338,7 +338,9 @@ Keep here:
 - optional object-style item templates with route metadata such as
  `action_family`, `action_type`, `position_family`, `family`, `position_key`,
  `position_keys`, and `formatter_hint`; string templates remain valid and fall
-  back to Python inference.
+  back to Python inference. Normalized formatter hints are routed into Krea,
+  SDXL, and caption naturalization through `all` plus the matching formatter
+  route only.

 Improve later:

@@ -222,7 +222,8 @@ Important JSON keys:
  `position_family`, `family`, `position_key`, `position_keys`, and
  `formatter_hint`. Formatter hints may be a string/list for all routes or a
  map keyed by `krea`, `sdxl`, or `caption`; aliases such as `krea2` and
-  `training_caption` are normalized by `category_template_metadata.py`.
+  `training_caption` are normalized by `category_template_metadata.py` and
+  consumed only by the matching formatter route plus the shared `all` route.
 - `axes`: values used to fill `item_templates`.
 - `scene_pool` / `scene_pools` or direct `scenes`: location road.
 - `expression_pool` / `expression_pools` or direct `expressions`: expression road.
@@ -452,7 +453,7 @@ plain prompt text. When debugging, inspect these fields before editing pools.
 | `item` | `_compose_item` or Insta override | Krea/SDXL/Naturalizer | Clothing item, category item, or sexual scene/action text. |
 | `item_axis_values` | `_compose_item` | Krea hardcore rewrite, SDXL tags | Filled template axes such as position/action/detail values. |
 | `item_template_metadata` | `_compose_item` | Debug, Krea/SDXL/Naturalizer route metadata | Optional metadata from object-style item templates; currently used to prefer explicit action/position families and keys before inference. |
-| `formatter_hints` | `category_template_metadata.formatter_hints` | Debug/future formatter specialization | Normalized route-specific hints from object-style item templates, keyed by `all`, `krea`, `sdxl`, or `caption`. |
+| `formatter_hints` | `category_template_metadata.formatter_hints` | Krea/SDXL/Naturalizer route specialization, debug | Normalized route-specific hints from object-style item templates, keyed by `all`, `krea`, `sdxl`, or `caption`; each formatter consumes `all` plus its own route only. |
 | `action_family` | `item_template_metadata` or `hardcore_action_metadata.source_hardcore_action_family` | Krea hardcore rewrite, SDXL tags, natural captions, debug | Source-aware formatter semantic family such as `foreplay`, `outercourse`, `oral`, `penetration`, `toy_double`, or `climax`. |
 | `position_family` | `item_template_metadata` or `_hardcore_source_position_family` | Debug/filtering | Source/UI hardcore family selected by template metadata or subcategory, such as `manual`, `interaction`, `oral`, `anal`, or `climax`. |
 | `position_key`, `position_keys` | `item_template_metadata` plus `_hardcore_position_keys` | Debug/future filters | Concrete position tokens from object-template metadata and inferred axes/role text, such as `kneeling`, `doggy`, `boobjob`, or `open_thighs`. |
@@ -5,6 +5,7 @@ from typing import Any

 try:
    from . import formatter_input as input_policy
+    from . import category_template_metadata as template_metadata_policy
    from .krea_action_context import (
        is_close_foreplay_text as _is_close_foreplay_text,
        is_outercourse_text as _is_outercourse_text,
@@ -35,6 +36,7 @@ try:
    from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text
 except ImportError:  # Allows local smoke tests with `python -c`.
    import formatter_input as input_policy
+    import category_template_metadata as template_metadata_policy
    from krea_action_context import (
        is_close_foreplay_text as _is_close_foreplay_text,
        is_outercourse_text as _is_outercourse_text,
@@ -102,6 +104,25 @@ def _paragraph(parts: list[str]) -> str:
    return " ".join(part for part in (_sentence(part) for part in parts) if part)


+def _formatter_hint_parts(*rows: dict[str, Any]) -> list[str]:
+    hints: list[str] = []
+    for row in rows:
+        if not isinstance(row, dict):
+            continue
+        for hint in template_metadata_policy.formatter_hints_for_route(row, "krea"):
+            hint = _clean(hint).strip(" .")
+            if hint and hint not in hints:
+                hints.append(hint)
+    return hints
+
+
+def _append_formatter_hints(prompt: str, *rows: dict[str, Any]) -> str:
+    hints = _formatter_hint_parts(*rows)
+    if not hints:
+        return prompt
+    return _paragraph([prompt, *hints])
+
+
 def _with_indefinite_article(text: str) -> str:
    text = _clean(text)
    if not text or text.lower().startswith(("a ", "an ")):
@@ -715,6 +736,10 @@ def format_krea2_prompt(

    if row and row.get("mode") == "Insta/OF":
        soft_prompt, soft_negative, hard_prompt, hard_negative = _insta_pair_to_krea(row, detail_level, style_mode)
+        soft_row = row.get("softcore_row") if isinstance(row.get("softcore_row"), dict) else {}
+        hard_row = row.get("hardcore_row") if isinstance(row.get("hardcore_row"), dict) else {}
+        soft_prompt = _append_formatter_hints(soft_prompt, row, soft_row)
+        hard_prompt = _append_formatter_hints(hard_prompt, row, hard_row)
        if extra_positive.strip():
            soft_prompt = f"{soft_prompt.rstrip()} {extra_positive.strip()}"
            hard_prompt = f"{hard_prompt.rstrip()} {extra_positive.strip()}"
@@ -735,6 +760,7 @@ def format_krea2_prompt(

    if row:
        prompt, kind = _normal_row_to_krea(row, detail_level, style_mode)
+        prompt = _append_formatter_hints(prompt, row)
        extracted_negative = _clean(row.get("negative_prompt"))
        method = f"{method}:krea2({kind})"
    else:
@@ -5,11 +5,13 @@ from typing import Any

 try:
    from . import formatter_input as input_policy
+    from . import category_template_metadata as template_metadata_policy
    from . import sdxl_presets as sdxl_policy
    from .hardcore_action_metadata import normalize_hardcore_action_family
    from .prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
 except ImportError:  # Allows local smoke tests with `python -c`.
    import formatter_input as input_policy
+    import category_template_metadata as template_metadata_policy
    import sdxl_presets as sdxl_policy
    from hardcore_action_metadata import normalize_hardcore_action_family
    from prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
@@ -137,6 +139,18 @@ def _metadata_family_tags(row: dict[str, Any]) -> list[str]:
    return tags


+def _formatter_hint_tags(*rows: dict[str, Any]) -> list[str]:
+    tags: list[str] = []
+    for row in rows:
+        if not isinstance(row, dict):
+            continue
+        for hint in template_metadata_policy.formatter_hints_for_route(row, "sdxl"):
+            hint = _clean(hint).strip(" ,.")
+            if hint and hint not in tags:
+                tags.append(hint)
+    return tags
+
+
 def _combine_tags(*parts: Any) -> str:
    tags: list[str] = []
    seen: set[str] = set()
@@ -288,6 +302,8 @@ def _row_core_tags(row: dict[str, Any], nude_weight: float) -> list[str]:

    for tag in _metadata_family_tags(row):
        _add_one(tags, seen, tag)
+    for tag in _formatter_hint_tags(row):
+        _add(tags, seen, tag)

    item = _row_value(row, "item", ("Sexual scene", "Sexual pose", "Erotic outfit", "Clothing")) or _clean(row.get("custom_item"))
    pose = _row_value(row, "pose", ("Sexual pose", "Pose"))
@@ -334,6 +350,8 @@ def _quality_tail(quality_preset: str, custom_quality: str) -> str:
 def _soft_tags(row: dict[str, Any], root: dict[str, Any], nude_weight: float) -> str:
    tags = _row_core_tags(row, nude_weight)
    seen = {_tag_key(tag) for tag in tags}
+    for tag in _formatter_hint_tags(root):
+        _add(tags, seen, tag)
    descriptor = _clean(root.get("shared_descriptor"))
    if descriptor and not any("woman" in _tag_key(tag) for tag in tags):
        for tag in _character_tags_from_descriptor(descriptor):
@@ -369,6 +387,8 @@ def _hard_tags(row: dict[str, Any], root: dict[str, Any], nude_weight: float) ->

    for tag in _metadata_family_tags(row):
        _add_one(tags, seen, tag)
+    for tag in _formatter_hint_tags(row, root):
+        _add(tags, seen, tag)

    hard_scene = _clean(row.get("scene_text"))
    hard_item = _clean(row.get("item"))
@@ -1225,6 +1225,11 @@ def smoke_hardcore_position_config_policy() -> None:
    _expect(formatter_hints.get("krea") == ["keep mouth contact readable"], "Template metadata route lost Krea formatter hint")
    _expect(formatter_hints.get("sdxl") == ["oral contact", "kneeling oral"], "Template metadata route lost SDXL formatter hints")
    _expect(formatter_hints.get("caption") == ["oral contact caption detail"], "Template metadata route lost caption formatter hint")
+    route_hints = category_template_metadata.formatter_hints_for_route(
+        {"formatter_hints": {"all": ["shared formatter cue"], "krea2": ["krea formatter cue"]}},
+        "krea2",
+    )
+    _expect(route_hints == ["shared formatter cue", "krea formatter cue"], "Formatter hint route resolver changed")
    _expect(
        pb._template_action_family(template_metadata) == category_template_metadata.template_action_family(template_metadata),
        "Prompt builder template action policy should delegate",
@@ -2486,6 +2491,39 @@ def smoke_formatter_metadata_fixtures() -> None:
        for term in case["caption_terms"]:
            _expect(term in caption_text, f"{name}.caption missing {term!r}")

+    route_row = _fixture_hardcore_row(
+        formatter_hints={
+            "all": ["shared route anchor"],
+            "krea": ["krea readable anchor"],
+            "sdxl": ["sdxl route tag"],
+            "caption": ["caption route phrase"],
+        }
+    )
+    _expect_custom_row(route_row, "fixture_formatter_hints")
+    metadata = _json(route_row)
+
+    krea = krea_formatter.format_krea2_prompt("", metadata_json=metadata, target="single")
+    krea_prompt = _expect_text("fixture_formatter_hints.krea_prompt", krea.get("krea_prompt"), 40).lower()
+    _expect("shared route anchor" in krea_prompt, "Krea formatter missed shared formatter hint")
+    _expect("krea readable anchor" in krea_prompt, "Krea formatter missed Krea formatter hint")
+    _expect("sdxl route tag" not in krea_prompt, "Krea formatter leaked SDXL formatter hint")
+    _expect("caption route phrase" not in krea_prompt, "Krea formatter leaked caption formatter hint")
+
+    sdxl = sdxl_formatter.format_sdxl_prompt("", metadata_json=metadata, target="single", trigger=SdxlTrigger, prepend_trigger=True)
+    sdxl_prompt = _expect_text("fixture_formatter_hints.sdxl_prompt", sdxl.get("sdxl_prompt"), 40).lower()
+    _expect("shared route anchor" in sdxl_prompt, "SDXL formatter missed shared formatter hint")
+    _expect("sdxl route tag" in sdxl_prompt, "SDXL formatter missed SDXL formatter hint")
+    _expect("krea readable anchor" not in sdxl_prompt, "SDXL formatter leaked Krea formatter hint")
+    _expect("caption route phrase" not in sdxl_prompt, "SDXL formatter leaked caption formatter hint")
+
+    caption, method = caption_naturalizer.naturalize_caption("", metadata_json=metadata, trigger=Trigger, include_trigger=True)
+    caption_text = _expect_text("fixture_formatter_hints.caption", caption, 40).lower()
+    _expect("metadata" in method, "Caption formatter hints fixture did not use metadata")
+    _expect("shared route anchor" in caption_text, "Caption naturalizer missed shared formatter hint")
+    _expect("caption route phrase" in caption_text, "Caption naturalizer missed caption formatter hint")
+    _expect("krea readable anchor" not in caption_text, "Caption naturalizer leaked Krea formatter hint")
+    _expect("sdxl route tag" not in caption_text, "Caption naturalizer leaked SDXL formatter hint")
+

 def smoke_node_utility_registration() -> None:
    required_nodes = [