Extract caption naturalizer policy

2026-06-27 01:38:00 +02:00
parent 5efa073bfb
commit 36ce394462
5 changed files with 163 additions and 61 deletions
@@ -4,54 +4,26 @@ import re
 from typing import Any
 try:
    from . import caption_policy
    from . import formatter_input as input_policy
    from .hardcore_action_metadata import normalize_hardcore_action_family
    from . import krea_cast as cast_policy
    from .prompt_hygiene import sanitize_prose_text
 except ImportError:  # Allows local smoke tests with `python -c`.
    import caption_policy
    import formatter_input as input_policy
    from hardcore_action_metadata import normalize_hardcore_action_family
    import krea_cast as cast_policy
    from prompt_hygiene import sanitize_prose_text
-OLD_TRIGGER = "sxcpinup_coloredpencil"
+OLD_TRIGGER = caption_policy.OLD_TRIGGER
-DEFAULT_TRIGGER = "sxcppnl7"
+DEFAULT_TRIGGER = caption_policy.DEFAULT_TRIGGER
-
+STYLE_TAILS = caption_policy.STYLE_TAILS
 STYLE_TAILS = [
    ", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured parchment paper",
    ", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured paper",
 ]
 PROMPT_FIELD_LABELS = input_policy.prompt_field_labels()
-ITEM_LABELS = (
+ITEM_LABELS = caption_policy.ITEM_LABELS
-    "Sexual pose",
+ACTION_FAMILY_CAPTION_LABELS = caption_policy.ACTION_FAMILY_CAPTION_LABELS
-    "Erotic outfit",
+POSITION_FAMILY_CAPTION_LABELS = caption_policy.POSITION_FAMILY_CAPTION_LABELS
    "Clothing",
 )
 ACTION_FAMILY_CAPTION_LABELS = {
    "foreplay": "foreplay action",
    "outercourse": "non-penetrative action",
    "oral": "oral action",
    "penetration": "penetrative action",
    "toy_double": "toy-assisted double-contact action",
    "climax": "climax action",
 }
 POSITION_FAMILY_CAPTION_LABELS = {
    "penetrative": "penetrative action",
    "foreplay": "foreplay action",
    "interaction": "interaction beat",
    "manual": "manual action",
    "oral": "oral action",
    "outercourse": "non-penetrative action",
    "anal": "anal action",
    "climax": "climax action",
    "threesome": "three-person action",
    "group": "group action",
 }
 def _clean_text(value: Any) -> str:
@@ -105,13 +77,7 @@ def _human_join(parts: list[str]) -> str:
 def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
-    position_family = _clean_text(row.get("position_family")).lower()
+    return caption_policy.metadata_action_label(row, default)
    if position_family in POSITION_FAMILY_CAPTION_LABELS:
        return POSITION_FAMILY_CAPTION_LABELS[position_family]
    action_family = normalize_hardcore_action_family(row.get("action_family"))
    if action_family in ACTION_FAMILY_CAPTION_LABELS:
        return ACTION_FAMILY_CAPTION_LABELS[action_family]
    return default
 def _prompt_cast_descriptors(text: str) -> str:
@@ -135,11 +101,7 @@ def _natural_label_text(text: Any, labels: list[str]) -> str:
 def _strip_style_tail(text: str) -> str:
-    text = _clean_text(text)
+    return caption_policy.strip_style_tail(text)
    for tail in STYLE_TAILS:
        if text.endswith(tail):
            return text[: -len(tail)].strip(" ,")
    return text
 def _remove_trigger(text: str, trigger: str) -> str:
@@ -185,14 +147,11 @@ def _field_from_any_prompt(text: str, labels: tuple[str, ...]) -> str:
 def _normalize_composition(text: str) -> str:
-    return re.sub(r"^vertical\s+", "", _clean_text(text), flags=re.IGNORECASE)
+    return caption_policy.normalize_composition(text)
 def _clean_clothing(text: str) -> str:
-    text = _clean_text(text)
+    return caption_policy.clean_clothing(text)
    text = re.sub(r",?\s*fashion editorial styling$", "", text, flags=re.IGNORECASE)
    text = re.sub(r",?\s*resort styling$", "", text, flags=re.IGNORECASE)
    return text.strip(" ,")
 def _body_phrase(body: Any, figure_note: Any = "") -> str:
@@ -300,10 +259,7 @@ def _verb_for_row(row: dict[str, Any]) -> str:
 def _detail_allows(level: str, dense_only: bool = False) -> bool:
-    level = (level or "balanced").strip().lower()
+    return caption_policy.detail_allows(level, dense_only=dense_only)
    if dense_only:
        return level == "dense"
    return level != "concise"
 def _single_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None:
@@ -674,8 +630,8 @@ def naturalize_caption(
 ) -> tuple[str, str]:
    """Rewrite tag-style prompt/caption text into compact natural language."""
    input_hint = input_hint if input_hint in ("auto", "metadata_json", "caption_or_prompt") else "auto"
-    detail_level = detail_level if detail_level in ("concise", "balanced", "dense") else "balanced"
+    detail_level = caption_policy.normalize_detail_level(detail_level)
-    keep_style = style_policy == "keep_style_terms"
+    keep_style = caption_policy.keep_style_terms(style_policy)
    row, row_method = _row_from_inputs(source_text, metadata_json, input_hint)
    if row is not None:
        prose, method = _metadata_to_prose(row, detail_level, keep_style)
@@ -0,0 +1,99 @@
 from __future__ import annotations
 import re
 from typing import Any
 try:
    from . import formatter_input as input_policy
    from .hardcore_action_metadata import normalize_hardcore_action_family
 except ImportError:  # Allows local smoke tests with `python tools/prompt_smoke.py`.
    import formatter_input as input_policy
    from hardcore_action_metadata import normalize_hardcore_action_family
 OLD_TRIGGER = "sxcpinup_coloredpencil"
 DEFAULT_TRIGGER = "sxcppnl7"
 DETAIL_LEVELS = ("balanced", "concise", "dense")
 STYLE_POLICIES = ("drop_style_tail", "keep_style_terms")
 STYLE_TAILS = [
    ", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured parchment paper",
    ", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured paper",
 ]
 ITEM_LABELS = (
    "Sexual pose",
    "Erotic outfit",
    "Clothing",
 )
 ACTION_FAMILY_CAPTION_LABELS = {
    "foreplay": "foreplay action",
    "outercourse": "non-penetrative action",
    "oral": "oral action",
    "penetration": "penetrative action",
    "toy_double": "toy-assisted double-contact action",
    "climax": "climax action",
 }
 POSITION_FAMILY_CAPTION_LABELS = {
    "penetrative": "penetrative action",
    "foreplay": "foreplay action",
    "interaction": "interaction beat",
    "manual": "manual action",
    "oral": "oral action",
    "outercourse": "non-penetrative action",
    "anal": "anal action",
    "climax": "climax action",
    "threesome": "three-person action",
    "group": "group action",
 }
 def normalize_detail_level(value: str) -> str:
    return value if value in DETAIL_LEVELS else "balanced"
 def normalize_style_policy(value: str) -> str:
    return value if value in STYLE_POLICIES else "drop_style_tail"
 def keep_style_terms(style_policy: str) -> bool:
    return normalize_style_policy(style_policy) == "keep_style_terms"
 def detail_allows(level: str, dense_only: bool = False) -> bool:
    level = normalize_detail_level((level or "balanced").strip().lower())
    if dense_only:
        return level == "dense"
    return level != "concise"
 def strip_style_tail(text: str) -> str:
    text = input_policy.clean_text(text)
    for tail in STYLE_TAILS:
        if text.endswith(tail):
            return text[: -len(tail)].strip(" ,")
    return text
 def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
    position_family = input_policy.clean_text(row.get("position_family")).lower()
    if position_family in POSITION_FAMILY_CAPTION_LABELS:
        return POSITION_FAMILY_CAPTION_LABELS[position_family]
    action_family = normalize_hardcore_action_family(row.get("action_family"))
    if action_family in ACTION_FAMILY_CAPTION_LABELS:
        return ACTION_FAMILY_CAPTION_LABELS[action_family]
    return default
 def normalize_composition(text: str) -> str:
    return re.sub(r"^vertical\s+", "", input_policy.clean_text(text), flags=re.IGNORECASE)
 def clean_clothing(text: str) -> str:
    text = input_policy.clean_text(text)
    text = re.sub(r",?\s*fashion editorial styling$", "", text, flags=re.IGNORECASE)
    text = re.sub(r",?\s*resort styling$", "", text, flags=re.IGNORECASE)
    return text.strip(" ,")
@@ -302,10 +302,13 @@ Keep here:
 - natural sentence caption assembly;
 - training-caption trigger behavior;
- style-tail policy.
+- style-tail policy from `caption_policy.py`.
- metadata-family action labels from `action_family` and `position_family`.
+- metadata-family action labels from `action_family` and `position_family` via
  `caption_policy.py`.
 - shared formatter input parsing from `formatter_input.py`.
 - shared cast descriptor parsing and label replacement from `krea_cast.py`.
 - caption detail-level/style-policy normalization, clothing cleanup, and
  composition cleanup from `caption_policy.py`.
 Improve later:
@@ -97,6 +97,7 @@ Core helper ownership:
 | `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. |
 | `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
 | `sdxl_presets.py` | SDXL style presets, quality presets, default negative prompt, and metadata-family tag hints used by the SDXL formatter and node choice lists. |
 | `caption_policy.py` | Caption naturalizer policy data and helpers: style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. |
 ## Node IO Map
@@ -24,6 +24,7 @@ if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))
 import caption_naturalizer  # noqa: E402
 import caption_policy  # noqa: E402
 import character_config  # noqa: E402
 import character_profile  # noqa: E402
 import category_cast_config  # noqa: E402
@@ -951,6 +952,47 @@ def smoke_formatter_cast_policy() -> None:
    )
 def smoke_caption_policy() -> None:
    _expect(
        caption_naturalizer.STYLE_TAILS is caption_policy.STYLE_TAILS,
        "Caption naturalizer style tails should delegate to caption_policy",
    )
    _expect(
        caption_naturalizer.ITEM_LABELS is caption_policy.ITEM_LABELS,
        "Caption naturalizer item labels should delegate to caption_policy",
    )
    _expect(
        caption_naturalizer.ACTION_FAMILY_CAPTION_LABELS is caption_policy.ACTION_FAMILY_CAPTION_LABELS,
        "Caption naturalizer action labels should delegate to caption_policy",
    )
    _expect(caption_policy.normalize_detail_level("bad") == "balanced", "Caption invalid detail fallback changed")
    _expect(caption_policy.keep_style_terms("keep_style_terms") is True, "Caption style policy keep flag changed")
    _expect(caption_policy.detail_allows("concise") is False, "Caption concise detail gate changed")
    _expect(caption_policy.detail_allows("dense", dense_only=True) is True, "Caption dense-only gate changed")
    style_tail = caption_policy.STYLE_TAILS[0]
    _expect(
        caption_policy.strip_style_tail(f"caption body{style_tail}") == "caption body",
        "Caption style-tail stripping changed",
    )
    _expect(
        caption_naturalizer._strip_style_tail(f"caption body{style_tail}") == "caption body",
        "Caption naturalizer style-tail wrapper should delegate",
    )
    _expect(
        caption_policy.normalize_composition("vertical centered body frame") == "centered body frame",
        "Caption composition normalization changed",
    )
    _expect(
        caption_policy.clean_clothing("silk dress, fashion editorial styling") == "silk dress",
        "Caption clothing cleanup changed",
    )
    row = {"action_family": "oral", "position_family": ""}
    _expect(caption_policy.metadata_action_label(row) == "oral action", "Caption action-family label changed")
    row = {"action_family": "oral", "position_family": "anal"}
    _expect(caption_naturalizer._metadata_action_label(row) == "anal action", "Caption position-family label priority changed")
 def smoke_sdxl_presets_policy() -> None:
    _expect(
        sdxl_formatter.SDXL_STYLE_PRESETS is sdxl_presets.SDXL_STYLE_PRESETS,
@@ -2964,6 +3006,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
    ("row_normalization_policy", smoke_row_normalization_policy),
    ("formatter_input_policy", smoke_formatter_input_policy),
    ("formatter_cast_policy", smoke_formatter_cast_policy),
    ("caption_policy", smoke_caption_policy),
    ("sdxl_presets_policy", smoke_sdxl_presets_policy),
    ("hardcore_position_config_policy", smoke_hardcore_position_config_policy),
    ("category_library_route", smoke_category_library_route),