Extract caption naturalizer policy

This commit is contained in:
2026-06-27 01:38:00 +02:00
parent 5efa073bfb
commit 36ce394462
5 changed files with 163 additions and 61 deletions
+99
View File
@@ -0,0 +1,99 @@
from __future__ import annotations
import re
from typing import Any
try:
from . import formatter_input as input_policy
from .hardcore_action_metadata import normalize_hardcore_action_family
except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`.
import formatter_input as input_policy
from hardcore_action_metadata import normalize_hardcore_action_family
OLD_TRIGGER = "sxcpinup_coloredpencil"
DEFAULT_TRIGGER = "sxcppnl7"
DETAIL_LEVELS = ("balanced", "concise", "dense")
STYLE_POLICIES = ("drop_style_tail", "keep_style_terms")
STYLE_TAILS = [
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured parchment paper",
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured paper",
]
ITEM_LABELS = (
"Sexual pose",
"Erotic outfit",
"Clothing",
)
ACTION_FAMILY_CAPTION_LABELS = {
"foreplay": "foreplay action",
"outercourse": "non-penetrative action",
"oral": "oral action",
"penetration": "penetrative action",
"toy_double": "toy-assisted double-contact action",
"climax": "climax action",
}
POSITION_FAMILY_CAPTION_LABELS = {
"penetrative": "penetrative action",
"foreplay": "foreplay action",
"interaction": "interaction beat",
"manual": "manual action",
"oral": "oral action",
"outercourse": "non-penetrative action",
"anal": "anal action",
"climax": "climax action",
"threesome": "three-person action",
"group": "group action",
}
def normalize_detail_level(value: str) -> str:
return value if value in DETAIL_LEVELS else "balanced"
def normalize_style_policy(value: str) -> str:
return value if value in STYLE_POLICIES else "drop_style_tail"
def keep_style_terms(style_policy: str) -> bool:
return normalize_style_policy(style_policy) == "keep_style_terms"
def detail_allows(level: str, dense_only: bool = False) -> bool:
level = normalize_detail_level((level or "balanced").strip().lower())
if dense_only:
return level == "dense"
return level != "concise"
def strip_style_tail(text: str) -> str:
text = input_policy.clean_text(text)
for tail in STYLE_TAILS:
if text.endswith(tail):
return text[: -len(tail)].strip(" ,")
return text
def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
position_family = input_policy.clean_text(row.get("position_family")).lower()
if position_family in POSITION_FAMILY_CAPTION_LABELS:
return POSITION_FAMILY_CAPTION_LABELS[position_family]
action_family = normalize_hardcore_action_family(row.get("action_family"))
if action_family in ACTION_FAMILY_CAPTION_LABELS:
return ACTION_FAMILY_CAPTION_LABELS[action_family]
return default
def normalize_composition(text: str) -> str:
return re.sub(r"^vertical\s+", "", input_policy.clean_text(text), flags=re.IGNORECASE)
def clean_clothing(text: str) -> str:
text = input_policy.clean_text(text)
text = re.sub(r",?\s*fashion editorial styling$", "", text, flags=re.IGNORECASE)
text = re.sub(r",?\s*resort styling$", "", text, flags=re.IGNORECASE)
return text.strip(" ,")