Extract caption naturalizer policy

This commit is contained in:
2026-06-27 01:38:00 +02:00
parent 5efa073bfb
commit 36ce394462
5 changed files with 163 additions and 61 deletions
+15 -59
View File
@@ -4,54 +4,26 @@ import re
from typing import Any
try:
from . import caption_policy
from . import formatter_input as input_policy
from .hardcore_action_metadata import normalize_hardcore_action_family
from . import krea_cast as cast_policy
from .prompt_hygiene import sanitize_prose_text
except ImportError: # Allows local smoke tests with `python -c`.
import caption_policy
import formatter_input as input_policy
from hardcore_action_metadata import normalize_hardcore_action_family
import krea_cast as cast_policy
from prompt_hygiene import sanitize_prose_text
OLD_TRIGGER = "sxcpinup_coloredpencil"
DEFAULT_TRIGGER = "sxcppnl7"
STYLE_TAILS = [
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured parchment paper",
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured paper",
]
OLD_TRIGGER = caption_policy.OLD_TRIGGER
DEFAULT_TRIGGER = caption_policy.DEFAULT_TRIGGER
STYLE_TAILS = caption_policy.STYLE_TAILS
PROMPT_FIELD_LABELS = input_policy.prompt_field_labels()
ITEM_LABELS = (
"Sexual pose",
"Erotic outfit",
"Clothing",
)
ACTION_FAMILY_CAPTION_LABELS = {
"foreplay": "foreplay action",
"outercourse": "non-penetrative action",
"oral": "oral action",
"penetration": "penetrative action",
"toy_double": "toy-assisted double-contact action",
"climax": "climax action",
}
POSITION_FAMILY_CAPTION_LABELS = {
"penetrative": "penetrative action",
"foreplay": "foreplay action",
"interaction": "interaction beat",
"manual": "manual action",
"oral": "oral action",
"outercourse": "non-penetrative action",
"anal": "anal action",
"climax": "climax action",
"threesome": "three-person action",
"group": "group action",
}
ITEM_LABELS = caption_policy.ITEM_LABELS
ACTION_FAMILY_CAPTION_LABELS = caption_policy.ACTION_FAMILY_CAPTION_LABELS
POSITION_FAMILY_CAPTION_LABELS = caption_policy.POSITION_FAMILY_CAPTION_LABELS
def _clean_text(value: Any) -> str:
@@ -105,13 +77,7 @@ def _human_join(parts: list[str]) -> str:
def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
position_family = _clean_text(row.get("position_family")).lower()
if position_family in POSITION_FAMILY_CAPTION_LABELS:
return POSITION_FAMILY_CAPTION_LABELS[position_family]
action_family = normalize_hardcore_action_family(row.get("action_family"))
if action_family in ACTION_FAMILY_CAPTION_LABELS:
return ACTION_FAMILY_CAPTION_LABELS[action_family]
return default
return caption_policy.metadata_action_label(row, default)
def _prompt_cast_descriptors(text: str) -> str:
@@ -135,11 +101,7 @@ def _natural_label_text(text: Any, labels: list[str]) -> str:
def _strip_style_tail(text: str) -> str:
text = _clean_text(text)
for tail in STYLE_TAILS:
if text.endswith(tail):
return text[: -len(tail)].strip(" ,")
return text
return caption_policy.strip_style_tail(text)
def _remove_trigger(text: str, trigger: str) -> str:
@@ -185,14 +147,11 @@ def _field_from_any_prompt(text: str, labels: tuple[str, ...]) -> str:
def _normalize_composition(text: str) -> str:
return re.sub(r"^vertical\s+", "", _clean_text(text), flags=re.IGNORECASE)
return caption_policy.normalize_composition(text)
def _clean_clothing(text: str) -> str:
text = _clean_text(text)
text = re.sub(r",?\s*fashion editorial styling$", "", text, flags=re.IGNORECASE)
text = re.sub(r",?\s*resort styling$", "", text, flags=re.IGNORECASE)
return text.strip(" ,")
return caption_policy.clean_clothing(text)
def _body_phrase(body: Any, figure_note: Any = "") -> str:
@@ -300,10 +259,7 @@ def _verb_for_row(row: dict[str, Any]) -> str:
def _detail_allows(level: str, dense_only: bool = False) -> bool:
level = (level or "balanced").strip().lower()
if dense_only:
return level == "dense"
return level != "concise"
return caption_policy.detail_allows(level, dense_only=dense_only)
def _single_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None:
@@ -674,8 +630,8 @@ def naturalize_caption(
) -> tuple[str, str]:
"""Rewrite tag-style prompt/caption text into compact natural language."""
input_hint = input_hint if input_hint in ("auto", "metadata_json", "caption_or_prompt") else "auto"
detail_level = detail_level if detail_level in ("concise", "balanced", "dense") else "balanced"
keep_style = style_policy == "keep_style_terms"
detail_level = caption_policy.normalize_detail_level(detail_level)
keep_style = caption_policy.keep_style_terms(style_policy)
row, row_method = _row_from_inputs(source_text, metadata_json, input_hint)
if row is not None:
prose, method = _metadata_to_prose(row, detail_level, keep_style)