Extract caption naturalizer policy

2026-06-27 01:38:00 +02:00
parent 5efa073bfb
commit 36ce394462
5 changed files with 163 additions and 61 deletions
@@ -4,54 +4,26 @@ import re
 from typing import Any

 try:
+    from . import caption_policy
    from . import formatter_input as input_policy
-    from .hardcore_action_metadata import normalize_hardcore_action_family
    from . import krea_cast as cast_policy
    from .prompt_hygiene import sanitize_prose_text
 except ImportError:  # Allows local smoke tests with `python -c`.
+    import caption_policy
    import formatter_input as input_policy
-    from hardcore_action_metadata import normalize_hardcore_action_family
    import krea_cast as cast_policy
    from prompt_hygiene import sanitize_prose_text


-OLD_TRIGGER = "sxcpinup_coloredpencil"
-DEFAULT_TRIGGER = "sxcppnl7"
-
-STYLE_TAILS = [
-    ", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured parchment paper",
-    ", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured paper",
-]
+OLD_TRIGGER = caption_policy.OLD_TRIGGER
+DEFAULT_TRIGGER = caption_policy.DEFAULT_TRIGGER
+STYLE_TAILS = caption_policy.STYLE_TAILS

 PROMPT_FIELD_LABELS = input_policy.prompt_field_labels()

-ITEM_LABELS = (
-    "Sexual pose",
-    "Erotic outfit",
-    "Clothing",
-)
-
-ACTION_FAMILY_CAPTION_LABELS = {
-    "foreplay": "foreplay action",
-    "outercourse": "non-penetrative action",
-    "oral": "oral action",
-    "penetration": "penetrative action",
-    "toy_double": "toy-assisted double-contact action",
-    "climax": "climax action",
-}
-
-POSITION_FAMILY_CAPTION_LABELS = {
-    "penetrative": "penetrative action",
-    "foreplay": "foreplay action",
-    "interaction": "interaction beat",
-    "manual": "manual action",
-    "oral": "oral action",
-    "outercourse": "non-penetrative action",
-    "anal": "anal action",
-    "climax": "climax action",
-    "threesome": "three-person action",
-    "group": "group action",
-}
+ITEM_LABELS = caption_policy.ITEM_LABELS
+ACTION_FAMILY_CAPTION_LABELS = caption_policy.ACTION_FAMILY_CAPTION_LABELS
+POSITION_FAMILY_CAPTION_LABELS = caption_policy.POSITION_FAMILY_CAPTION_LABELS


 def _clean_text(value: Any) -> str:
@@ -105,13 +77,7 @@ def _human_join(parts: list[str]) -> str:


 def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
-    position_family = _clean_text(row.get("position_family")).lower()
-    if position_family in POSITION_FAMILY_CAPTION_LABELS:
-        return POSITION_FAMILY_CAPTION_LABELS[position_family]
-    action_family = normalize_hardcore_action_family(row.get("action_family"))
-    if action_family in ACTION_FAMILY_CAPTION_LABELS:
-        return ACTION_FAMILY_CAPTION_LABELS[action_family]
-    return default
+    return caption_policy.metadata_action_label(row, default)


 def _prompt_cast_descriptors(text: str) -> str:
@@ -135,11 +101,7 @@ def _natural_label_text(text: Any, labels: list[str]) -> str:


 def _strip_style_tail(text: str) -> str:
-    text = _clean_text(text)
-    for tail in STYLE_TAILS:
-        if text.endswith(tail):
-            return text[: -len(tail)].strip(" ,")
-    return text
+    return caption_policy.strip_style_tail(text)


 def _remove_trigger(text: str, trigger: str) -> str:
@@ -185,14 +147,11 @@ def _field_from_any_prompt(text: str, labels: tuple[str, ...]) -> str:


 def _normalize_composition(text: str) -> str:
-    return re.sub(r"^vertical\s+", "", _clean_text(text), flags=re.IGNORECASE)
+    return caption_policy.normalize_composition(text)


 def _clean_clothing(text: str) -> str:
-    text = _clean_text(text)
-    text = re.sub(r",?\s*fashion editorial styling$", "", text, flags=re.IGNORECASE)
-    text = re.sub(r",?\s*resort styling$", "", text, flags=re.IGNORECASE)
-    return text.strip(" ,")
+    return caption_policy.clean_clothing(text)


 def _body_phrase(body: Any, figure_note: Any = "") -> str:
@@ -300,10 +259,7 @@ def _verb_for_row(row: dict[str, Any]) -> str:


 def _detail_allows(level: str, dense_only: bool = False) -> bool:
-    level = (level or "balanced").strip().lower()
-    if dense_only:
-        return level == "dense"
-    return level != "concise"
+    return caption_policy.detail_allows(level, dense_only=dense_only)


 def _single_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None:
@@ -674,8 +630,8 @@ def naturalize_caption(
 ) -> tuple[str, str]:
    """Rewrite tag-style prompt/caption text into compact natural language."""
    input_hint = input_hint if input_hint in ("auto", "metadata_json", "caption_or_prompt") else "auto"
-    detail_level = detail_level if detail_level in ("concise", "balanced", "dense") else "balanced"
-    keep_style = style_policy == "keep_style_terms"
+    detail_level = caption_policy.normalize_detail_level(detail_level)
+    keep_style = caption_policy.keep_style_terms(style_policy)
    row, row_method = _row_from_inputs(source_text, metadata_json, input_hint)
    if row is not None:
        prose, method = _metadata_to_prose(row, detail_level, keep_style)