Extract caption naturalizer policy

This commit is contained in:
2026-06-27 01:38:00 +02:00
parent 5efa073bfb
commit 36ce394462
5 changed files with 163 additions and 61 deletions
+15 -59
View File
@@ -4,54 +4,26 @@ import re
from typing import Any from typing import Any
try: try:
from . import caption_policy
from . import formatter_input as input_policy from . import formatter_input as input_policy
from .hardcore_action_metadata import normalize_hardcore_action_family
from . import krea_cast as cast_policy from . import krea_cast as cast_policy
from .prompt_hygiene import sanitize_prose_text from .prompt_hygiene import sanitize_prose_text
except ImportError: # Allows local smoke tests with `python -c`. except ImportError: # Allows local smoke tests with `python -c`.
import caption_policy
import formatter_input as input_policy import formatter_input as input_policy
from hardcore_action_metadata import normalize_hardcore_action_family
import krea_cast as cast_policy import krea_cast as cast_policy
from prompt_hygiene import sanitize_prose_text from prompt_hygiene import sanitize_prose_text
OLD_TRIGGER = "sxcpinup_coloredpencil" OLD_TRIGGER = caption_policy.OLD_TRIGGER
DEFAULT_TRIGGER = "sxcppnl7" DEFAULT_TRIGGER = caption_policy.DEFAULT_TRIGGER
STYLE_TAILS = caption_policy.STYLE_TAILS
STYLE_TAILS = [
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured parchment paper",
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured paper",
]
PROMPT_FIELD_LABELS = input_policy.prompt_field_labels() PROMPT_FIELD_LABELS = input_policy.prompt_field_labels()
ITEM_LABELS = ( ITEM_LABELS = caption_policy.ITEM_LABELS
"Sexual pose", ACTION_FAMILY_CAPTION_LABELS = caption_policy.ACTION_FAMILY_CAPTION_LABELS
"Erotic outfit", POSITION_FAMILY_CAPTION_LABELS = caption_policy.POSITION_FAMILY_CAPTION_LABELS
"Clothing",
)
ACTION_FAMILY_CAPTION_LABELS = {
"foreplay": "foreplay action",
"outercourse": "non-penetrative action",
"oral": "oral action",
"penetration": "penetrative action",
"toy_double": "toy-assisted double-contact action",
"climax": "climax action",
}
POSITION_FAMILY_CAPTION_LABELS = {
"penetrative": "penetrative action",
"foreplay": "foreplay action",
"interaction": "interaction beat",
"manual": "manual action",
"oral": "oral action",
"outercourse": "non-penetrative action",
"anal": "anal action",
"climax": "climax action",
"threesome": "three-person action",
"group": "group action",
}
def _clean_text(value: Any) -> str: def _clean_text(value: Any) -> str:
@@ -105,13 +77,7 @@ def _human_join(parts: list[str]) -> str:
def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str: def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
position_family = _clean_text(row.get("position_family")).lower() return caption_policy.metadata_action_label(row, default)
if position_family in POSITION_FAMILY_CAPTION_LABELS:
return POSITION_FAMILY_CAPTION_LABELS[position_family]
action_family = normalize_hardcore_action_family(row.get("action_family"))
if action_family in ACTION_FAMILY_CAPTION_LABELS:
return ACTION_FAMILY_CAPTION_LABELS[action_family]
return default
def _prompt_cast_descriptors(text: str) -> str: def _prompt_cast_descriptors(text: str) -> str:
@@ -135,11 +101,7 @@ def _natural_label_text(text: Any, labels: list[str]) -> str:
def _strip_style_tail(text: str) -> str: def _strip_style_tail(text: str) -> str:
text = _clean_text(text) return caption_policy.strip_style_tail(text)
for tail in STYLE_TAILS:
if text.endswith(tail):
return text[: -len(tail)].strip(" ,")
return text
def _remove_trigger(text: str, trigger: str) -> str: def _remove_trigger(text: str, trigger: str) -> str:
@@ -185,14 +147,11 @@ def _field_from_any_prompt(text: str, labels: tuple[str, ...]) -> str:
def _normalize_composition(text: str) -> str: def _normalize_composition(text: str) -> str:
return re.sub(r"^vertical\s+", "", _clean_text(text), flags=re.IGNORECASE) return caption_policy.normalize_composition(text)
def _clean_clothing(text: str) -> str: def _clean_clothing(text: str) -> str:
text = _clean_text(text) return caption_policy.clean_clothing(text)
text = re.sub(r",?\s*fashion editorial styling$", "", text, flags=re.IGNORECASE)
text = re.sub(r",?\s*resort styling$", "", text, flags=re.IGNORECASE)
return text.strip(" ,")
def _body_phrase(body: Any, figure_note: Any = "") -> str: def _body_phrase(body: Any, figure_note: Any = "") -> str:
@@ -300,10 +259,7 @@ def _verb_for_row(row: dict[str, Any]) -> str:
def _detail_allows(level: str, dense_only: bool = False) -> bool: def _detail_allows(level: str, dense_only: bool = False) -> bool:
level = (level or "balanced").strip().lower() return caption_policy.detail_allows(level, dense_only=dense_only)
if dense_only:
return level == "dense"
return level != "concise"
def _single_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None: def _single_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None:
@@ -674,8 +630,8 @@ def naturalize_caption(
) -> tuple[str, str]: ) -> tuple[str, str]:
"""Rewrite tag-style prompt/caption text into compact natural language.""" """Rewrite tag-style prompt/caption text into compact natural language."""
input_hint = input_hint if input_hint in ("auto", "metadata_json", "caption_or_prompt") else "auto" input_hint = input_hint if input_hint in ("auto", "metadata_json", "caption_or_prompt") else "auto"
detail_level = detail_level if detail_level in ("concise", "balanced", "dense") else "balanced" detail_level = caption_policy.normalize_detail_level(detail_level)
keep_style = style_policy == "keep_style_terms" keep_style = caption_policy.keep_style_terms(style_policy)
row, row_method = _row_from_inputs(source_text, metadata_json, input_hint) row, row_method = _row_from_inputs(source_text, metadata_json, input_hint)
if row is not None: if row is not None:
prose, method = _metadata_to_prose(row, detail_level, keep_style) prose, method = _metadata_to_prose(row, detail_level, keep_style)
+99
View File
@@ -0,0 +1,99 @@
from __future__ import annotations
import re
from typing import Any
try:
from . import formatter_input as input_policy
from .hardcore_action_metadata import normalize_hardcore_action_family
except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`.
import formatter_input as input_policy
from hardcore_action_metadata import normalize_hardcore_action_family
OLD_TRIGGER = "sxcpinup_coloredpencil"
DEFAULT_TRIGGER = "sxcppnl7"
DETAIL_LEVELS = ("balanced", "concise", "dense")
STYLE_POLICIES = ("drop_style_tail", "keep_style_terms")
STYLE_TAILS = [
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured parchment paper",
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured paper",
]
ITEM_LABELS = (
"Sexual pose",
"Erotic outfit",
"Clothing",
)
ACTION_FAMILY_CAPTION_LABELS = {
"foreplay": "foreplay action",
"outercourse": "non-penetrative action",
"oral": "oral action",
"penetration": "penetrative action",
"toy_double": "toy-assisted double-contact action",
"climax": "climax action",
}
POSITION_FAMILY_CAPTION_LABELS = {
"penetrative": "penetrative action",
"foreplay": "foreplay action",
"interaction": "interaction beat",
"manual": "manual action",
"oral": "oral action",
"outercourse": "non-penetrative action",
"anal": "anal action",
"climax": "climax action",
"threesome": "three-person action",
"group": "group action",
}
def normalize_detail_level(value: str) -> str:
return value if value in DETAIL_LEVELS else "balanced"
def normalize_style_policy(value: str) -> str:
return value if value in STYLE_POLICIES else "drop_style_tail"
def keep_style_terms(style_policy: str) -> bool:
return normalize_style_policy(style_policy) == "keep_style_terms"
def detail_allows(level: str, dense_only: bool = False) -> bool:
level = normalize_detail_level((level or "balanced").strip().lower())
if dense_only:
return level == "dense"
return level != "concise"
def strip_style_tail(text: str) -> str:
text = input_policy.clean_text(text)
for tail in STYLE_TAILS:
if text.endswith(tail):
return text[: -len(tail)].strip(" ,")
return text
def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
position_family = input_policy.clean_text(row.get("position_family")).lower()
if position_family in POSITION_FAMILY_CAPTION_LABELS:
return POSITION_FAMILY_CAPTION_LABELS[position_family]
action_family = normalize_hardcore_action_family(row.get("action_family"))
if action_family in ACTION_FAMILY_CAPTION_LABELS:
return ACTION_FAMILY_CAPTION_LABELS[action_family]
return default
def normalize_composition(text: str) -> str:
return re.sub(r"^vertical\s+", "", input_policy.clean_text(text), flags=re.IGNORECASE)
def clean_clothing(text: str) -> str:
text = input_policy.clean_text(text)
text = re.sub(r",?\s*fashion editorial styling$", "", text, flags=re.IGNORECASE)
text = re.sub(r",?\s*resort styling$", "", text, flags=re.IGNORECASE)
return text.strip(" ,")
+5 -2
View File
@@ -302,10 +302,13 @@ Keep here:
- natural sentence caption assembly; - natural sentence caption assembly;
- training-caption trigger behavior; - training-caption trigger behavior;
- style-tail policy. - style-tail policy from `caption_policy.py`.
- metadata-family action labels from `action_family` and `position_family`. - metadata-family action labels from `action_family` and `position_family` via
`caption_policy.py`.
- shared formatter input parsing from `formatter_input.py`. - shared formatter input parsing from `formatter_input.py`.
- shared cast descriptor parsing and label replacement from `krea_cast.py`. - shared cast descriptor parsing and label replacement from `krea_cast.py`.
- caption detail-level/style-policy normalization, clothing cleanup, and
composition cleanup from `caption_policy.py`.
Improve later: Improve later:
+1
View File
@@ -97,6 +97,7 @@ Core helper ownership:
| `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. | | `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. |
| `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. | | `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
| `sdxl_presets.py` | SDXL style presets, quality presets, default negative prompt, and metadata-family tag hints used by the SDXL formatter and node choice lists. | | `sdxl_presets.py` | SDXL style presets, quality presets, default negative prompt, and metadata-family tag hints used by the SDXL formatter and node choice lists. |
| `caption_policy.py` | Caption naturalizer policy data and helpers: style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. |
## Node IO Map ## Node IO Map
+43
View File
@@ -24,6 +24,7 @@ if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT)) sys.path.insert(0, str(ROOT))
import caption_naturalizer # noqa: E402 import caption_naturalizer # noqa: E402
import caption_policy # noqa: E402
import character_config # noqa: E402 import character_config # noqa: E402
import character_profile # noqa: E402 import character_profile # noqa: E402
import category_cast_config # noqa: E402 import category_cast_config # noqa: E402
@@ -951,6 +952,47 @@ def smoke_formatter_cast_policy() -> None:
) )
def smoke_caption_policy() -> None:
_expect(
caption_naturalizer.STYLE_TAILS is caption_policy.STYLE_TAILS,
"Caption naturalizer style tails should delegate to caption_policy",
)
_expect(
caption_naturalizer.ITEM_LABELS is caption_policy.ITEM_LABELS,
"Caption naturalizer item labels should delegate to caption_policy",
)
_expect(
caption_naturalizer.ACTION_FAMILY_CAPTION_LABELS is caption_policy.ACTION_FAMILY_CAPTION_LABELS,
"Caption naturalizer action labels should delegate to caption_policy",
)
_expect(caption_policy.normalize_detail_level("bad") == "balanced", "Caption invalid detail fallback changed")
_expect(caption_policy.keep_style_terms("keep_style_terms") is True, "Caption style policy keep flag changed")
_expect(caption_policy.detail_allows("concise") is False, "Caption concise detail gate changed")
_expect(caption_policy.detail_allows("dense", dense_only=True) is True, "Caption dense-only gate changed")
style_tail = caption_policy.STYLE_TAILS[0]
_expect(
caption_policy.strip_style_tail(f"caption body{style_tail}") == "caption body",
"Caption style-tail stripping changed",
)
_expect(
caption_naturalizer._strip_style_tail(f"caption body{style_tail}") == "caption body",
"Caption naturalizer style-tail wrapper should delegate",
)
_expect(
caption_policy.normalize_composition("vertical centered body frame") == "centered body frame",
"Caption composition normalization changed",
)
_expect(
caption_policy.clean_clothing("silk dress, fashion editorial styling") == "silk dress",
"Caption clothing cleanup changed",
)
row = {"action_family": "oral", "position_family": ""}
_expect(caption_policy.metadata_action_label(row) == "oral action", "Caption action-family label changed")
row = {"action_family": "oral", "position_family": "anal"}
_expect(caption_naturalizer._metadata_action_label(row) == "anal action", "Caption position-family label priority changed")
def smoke_sdxl_presets_policy() -> None: def smoke_sdxl_presets_policy() -> None:
_expect( _expect(
sdxl_formatter.SDXL_STYLE_PRESETS is sdxl_presets.SDXL_STYLE_PRESETS, sdxl_formatter.SDXL_STYLE_PRESETS is sdxl_presets.SDXL_STYLE_PRESETS,
@@ -2964,6 +3006,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
("row_normalization_policy", smoke_row_normalization_policy), ("row_normalization_policy", smoke_row_normalization_policy),
("formatter_input_policy", smoke_formatter_input_policy), ("formatter_input_policy", smoke_formatter_input_policy),
("formatter_cast_policy", smoke_formatter_cast_policy), ("formatter_cast_policy", smoke_formatter_cast_policy),
("caption_policy", smoke_caption_policy),
("sdxl_presets_policy", smoke_sdxl_presets_policy), ("sdxl_presets_policy", smoke_sdxl_presets_policy),
("hardcore_position_config_policy", smoke_hardcore_position_config_policy), ("hardcore_position_config_policy", smoke_hardcore_position_config_policy),
("category_library_route", smoke_category_library_route), ("category_library_route", smoke_category_library_route),