Extract caption naturalizer policy
This commit is contained in:
+15
-59
@@ -4,54 +4,26 @@ import re
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
from . import caption_policy
|
||||
from . import formatter_input as input_policy
|
||||
from .hardcore_action_metadata import normalize_hardcore_action_family
|
||||
from . import krea_cast as cast_policy
|
||||
from .prompt_hygiene import sanitize_prose_text
|
||||
except ImportError: # Allows local smoke tests with `python -c`.
|
||||
import caption_policy
|
||||
import formatter_input as input_policy
|
||||
from hardcore_action_metadata import normalize_hardcore_action_family
|
||||
import krea_cast as cast_policy
|
||||
from prompt_hygiene import sanitize_prose_text
|
||||
|
||||
|
||||
OLD_TRIGGER = "sxcpinup_coloredpencil"
|
||||
DEFAULT_TRIGGER = "sxcppnl7"
|
||||
|
||||
STYLE_TAILS = [
|
||||
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured parchment paper",
|
||||
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured paper",
|
||||
]
|
||||
OLD_TRIGGER = caption_policy.OLD_TRIGGER
|
||||
DEFAULT_TRIGGER = caption_policy.DEFAULT_TRIGGER
|
||||
STYLE_TAILS = caption_policy.STYLE_TAILS
|
||||
|
||||
PROMPT_FIELD_LABELS = input_policy.prompt_field_labels()
|
||||
|
||||
ITEM_LABELS = (
|
||||
"Sexual pose",
|
||||
"Erotic outfit",
|
||||
"Clothing",
|
||||
)
|
||||
|
||||
ACTION_FAMILY_CAPTION_LABELS = {
|
||||
"foreplay": "foreplay action",
|
||||
"outercourse": "non-penetrative action",
|
||||
"oral": "oral action",
|
||||
"penetration": "penetrative action",
|
||||
"toy_double": "toy-assisted double-contact action",
|
||||
"climax": "climax action",
|
||||
}
|
||||
|
||||
POSITION_FAMILY_CAPTION_LABELS = {
|
||||
"penetrative": "penetrative action",
|
||||
"foreplay": "foreplay action",
|
||||
"interaction": "interaction beat",
|
||||
"manual": "manual action",
|
||||
"oral": "oral action",
|
||||
"outercourse": "non-penetrative action",
|
||||
"anal": "anal action",
|
||||
"climax": "climax action",
|
||||
"threesome": "three-person action",
|
||||
"group": "group action",
|
||||
}
|
||||
ITEM_LABELS = caption_policy.ITEM_LABELS
|
||||
ACTION_FAMILY_CAPTION_LABELS = caption_policy.ACTION_FAMILY_CAPTION_LABELS
|
||||
POSITION_FAMILY_CAPTION_LABELS = caption_policy.POSITION_FAMILY_CAPTION_LABELS
|
||||
|
||||
|
||||
def _clean_text(value: Any) -> str:
|
||||
@@ -105,13 +77,7 @@ def _human_join(parts: list[str]) -> str:
|
||||
|
||||
|
||||
def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
|
||||
position_family = _clean_text(row.get("position_family")).lower()
|
||||
if position_family in POSITION_FAMILY_CAPTION_LABELS:
|
||||
return POSITION_FAMILY_CAPTION_LABELS[position_family]
|
||||
action_family = normalize_hardcore_action_family(row.get("action_family"))
|
||||
if action_family in ACTION_FAMILY_CAPTION_LABELS:
|
||||
return ACTION_FAMILY_CAPTION_LABELS[action_family]
|
||||
return default
|
||||
return caption_policy.metadata_action_label(row, default)
|
||||
|
||||
|
||||
def _prompt_cast_descriptors(text: str) -> str:
|
||||
@@ -135,11 +101,7 @@ def _natural_label_text(text: Any, labels: list[str]) -> str:
|
||||
|
||||
|
||||
def _strip_style_tail(text: str) -> str:
|
||||
text = _clean_text(text)
|
||||
for tail in STYLE_TAILS:
|
||||
if text.endswith(tail):
|
||||
return text[: -len(tail)].strip(" ,")
|
||||
return text
|
||||
return caption_policy.strip_style_tail(text)
|
||||
|
||||
|
||||
def _remove_trigger(text: str, trigger: str) -> str:
|
||||
@@ -185,14 +147,11 @@ def _field_from_any_prompt(text: str, labels: tuple[str, ...]) -> str:
|
||||
|
||||
|
||||
def _normalize_composition(text: str) -> str:
|
||||
return re.sub(r"^vertical\s+", "", _clean_text(text), flags=re.IGNORECASE)
|
||||
return caption_policy.normalize_composition(text)
|
||||
|
||||
|
||||
def _clean_clothing(text: str) -> str:
|
||||
text = _clean_text(text)
|
||||
text = re.sub(r",?\s*fashion editorial styling$", "", text, flags=re.IGNORECASE)
|
||||
text = re.sub(r",?\s*resort styling$", "", text, flags=re.IGNORECASE)
|
||||
return text.strip(" ,")
|
||||
return caption_policy.clean_clothing(text)
|
||||
|
||||
|
||||
def _body_phrase(body: Any, figure_note: Any = "") -> str:
|
||||
@@ -300,10 +259,7 @@ def _verb_for_row(row: dict[str, Any]) -> str:
|
||||
|
||||
|
||||
def _detail_allows(level: str, dense_only: bool = False) -> bool:
|
||||
level = (level or "balanced").strip().lower()
|
||||
if dense_only:
|
||||
return level == "dense"
|
||||
return level != "concise"
|
||||
return caption_policy.detail_allows(level, dense_only=dense_only)
|
||||
|
||||
|
||||
def _single_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None:
|
||||
@@ -674,8 +630,8 @@ def naturalize_caption(
|
||||
) -> tuple[str, str]:
|
||||
"""Rewrite tag-style prompt/caption text into compact natural language."""
|
||||
input_hint = input_hint if input_hint in ("auto", "metadata_json", "caption_or_prompt") else "auto"
|
||||
detail_level = detail_level if detail_level in ("concise", "balanced", "dense") else "balanced"
|
||||
keep_style = style_policy == "keep_style_terms"
|
||||
detail_level = caption_policy.normalize_detail_level(detail_level)
|
||||
keep_style = caption_policy.keep_style_terms(style_policy)
|
||||
row, row_method = _row_from_inputs(source_text, metadata_json, input_hint)
|
||||
if row is not None:
|
||||
prose, method = _metadata_to_prose(row, detail_level, keep_style)
|
||||
|
||||
@@ -0,0 +1,99 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
from . import formatter_input as input_policy
|
||||
from .hardcore_action_metadata import normalize_hardcore_action_family
|
||||
except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`.
|
||||
import formatter_input as input_policy
|
||||
from hardcore_action_metadata import normalize_hardcore_action_family
|
||||
|
||||
|
||||
OLD_TRIGGER = "sxcpinup_coloredpencil"
|
||||
DEFAULT_TRIGGER = "sxcppnl7"
|
||||
|
||||
DETAIL_LEVELS = ("balanced", "concise", "dense")
|
||||
STYLE_POLICIES = ("drop_style_tail", "keep_style_terms")
|
||||
|
||||
STYLE_TAILS = [
|
||||
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured parchment paper",
|
||||
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured paper",
|
||||
]
|
||||
|
||||
ITEM_LABELS = (
|
||||
"Sexual pose",
|
||||
"Erotic outfit",
|
||||
"Clothing",
|
||||
)
|
||||
|
||||
ACTION_FAMILY_CAPTION_LABELS = {
|
||||
"foreplay": "foreplay action",
|
||||
"outercourse": "non-penetrative action",
|
||||
"oral": "oral action",
|
||||
"penetration": "penetrative action",
|
||||
"toy_double": "toy-assisted double-contact action",
|
||||
"climax": "climax action",
|
||||
}
|
||||
|
||||
POSITION_FAMILY_CAPTION_LABELS = {
|
||||
"penetrative": "penetrative action",
|
||||
"foreplay": "foreplay action",
|
||||
"interaction": "interaction beat",
|
||||
"manual": "manual action",
|
||||
"oral": "oral action",
|
||||
"outercourse": "non-penetrative action",
|
||||
"anal": "anal action",
|
||||
"climax": "climax action",
|
||||
"threesome": "three-person action",
|
||||
"group": "group action",
|
||||
}
|
||||
|
||||
|
||||
def normalize_detail_level(value: str) -> str:
|
||||
return value if value in DETAIL_LEVELS else "balanced"
|
||||
|
||||
|
||||
def normalize_style_policy(value: str) -> str:
|
||||
return value if value in STYLE_POLICIES else "drop_style_tail"
|
||||
|
||||
|
||||
def keep_style_terms(style_policy: str) -> bool:
|
||||
return normalize_style_policy(style_policy) == "keep_style_terms"
|
||||
|
||||
|
||||
def detail_allows(level: str, dense_only: bool = False) -> bool:
|
||||
level = normalize_detail_level((level or "balanced").strip().lower())
|
||||
if dense_only:
|
||||
return level == "dense"
|
||||
return level != "concise"
|
||||
|
||||
|
||||
def strip_style_tail(text: str) -> str:
|
||||
text = input_policy.clean_text(text)
|
||||
for tail in STYLE_TAILS:
|
||||
if text.endswith(tail):
|
||||
return text[: -len(tail)].strip(" ,")
|
||||
return text
|
||||
|
||||
|
||||
def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
|
||||
position_family = input_policy.clean_text(row.get("position_family")).lower()
|
||||
if position_family in POSITION_FAMILY_CAPTION_LABELS:
|
||||
return POSITION_FAMILY_CAPTION_LABELS[position_family]
|
||||
action_family = normalize_hardcore_action_family(row.get("action_family"))
|
||||
if action_family in ACTION_FAMILY_CAPTION_LABELS:
|
||||
return ACTION_FAMILY_CAPTION_LABELS[action_family]
|
||||
return default
|
||||
|
||||
|
||||
def normalize_composition(text: str) -> str:
|
||||
return re.sub(r"^vertical\s+", "", input_policy.clean_text(text), flags=re.IGNORECASE)
|
||||
|
||||
|
||||
def clean_clothing(text: str) -> str:
|
||||
text = input_policy.clean_text(text)
|
||||
text = re.sub(r",?\s*fashion editorial styling$", "", text, flags=re.IGNORECASE)
|
||||
text = re.sub(r",?\s*resort styling$", "", text, flags=re.IGNORECASE)
|
||||
return text.strip(" ,")
|
||||
@@ -302,10 +302,13 @@ Keep here:
|
||||
|
||||
- natural sentence caption assembly;
|
||||
- training-caption trigger behavior;
|
||||
- style-tail policy.
|
||||
- metadata-family action labels from `action_family` and `position_family`.
|
||||
- style-tail policy from `caption_policy.py`.
|
||||
- metadata-family action labels from `action_family` and `position_family` via
|
||||
`caption_policy.py`.
|
||||
- shared formatter input parsing from `formatter_input.py`.
|
||||
- shared cast descriptor parsing and label replacement from `krea_cast.py`.
|
||||
- caption detail-level/style-policy normalization, clothing cleanup, and
|
||||
composition cleanup from `caption_policy.py`.
|
||||
|
||||
Improve later:
|
||||
|
||||
|
||||
@@ -97,6 +97,7 @@ Core helper ownership:
|
||||
| `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. |
|
||||
| `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
|
||||
| `sdxl_presets.py` | SDXL style presets, quality presets, default negative prompt, and metadata-family tag hints used by the SDXL formatter and node choice lists. |
|
||||
| `caption_policy.py` | Caption naturalizer policy data and helpers: style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. |
|
||||
|
||||
## Node IO Map
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ if str(ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
import caption_naturalizer # noqa: E402
|
||||
import caption_policy # noqa: E402
|
||||
import character_config # noqa: E402
|
||||
import character_profile # noqa: E402
|
||||
import category_cast_config # noqa: E402
|
||||
@@ -951,6 +952,47 @@ def smoke_formatter_cast_policy() -> None:
|
||||
)
|
||||
|
||||
|
||||
def smoke_caption_policy() -> None:
|
||||
_expect(
|
||||
caption_naturalizer.STYLE_TAILS is caption_policy.STYLE_TAILS,
|
||||
"Caption naturalizer style tails should delegate to caption_policy",
|
||||
)
|
||||
_expect(
|
||||
caption_naturalizer.ITEM_LABELS is caption_policy.ITEM_LABELS,
|
||||
"Caption naturalizer item labels should delegate to caption_policy",
|
||||
)
|
||||
_expect(
|
||||
caption_naturalizer.ACTION_FAMILY_CAPTION_LABELS is caption_policy.ACTION_FAMILY_CAPTION_LABELS,
|
||||
"Caption naturalizer action labels should delegate to caption_policy",
|
||||
)
|
||||
_expect(caption_policy.normalize_detail_level("bad") == "balanced", "Caption invalid detail fallback changed")
|
||||
_expect(caption_policy.keep_style_terms("keep_style_terms") is True, "Caption style policy keep flag changed")
|
||||
_expect(caption_policy.detail_allows("concise") is False, "Caption concise detail gate changed")
|
||||
_expect(caption_policy.detail_allows("dense", dense_only=True) is True, "Caption dense-only gate changed")
|
||||
|
||||
style_tail = caption_policy.STYLE_TAILS[0]
|
||||
_expect(
|
||||
caption_policy.strip_style_tail(f"caption body{style_tail}") == "caption body",
|
||||
"Caption style-tail stripping changed",
|
||||
)
|
||||
_expect(
|
||||
caption_naturalizer._strip_style_tail(f"caption body{style_tail}") == "caption body",
|
||||
"Caption naturalizer style-tail wrapper should delegate",
|
||||
)
|
||||
_expect(
|
||||
caption_policy.normalize_composition("vertical centered body frame") == "centered body frame",
|
||||
"Caption composition normalization changed",
|
||||
)
|
||||
_expect(
|
||||
caption_policy.clean_clothing("silk dress, fashion editorial styling") == "silk dress",
|
||||
"Caption clothing cleanup changed",
|
||||
)
|
||||
row = {"action_family": "oral", "position_family": ""}
|
||||
_expect(caption_policy.metadata_action_label(row) == "oral action", "Caption action-family label changed")
|
||||
row = {"action_family": "oral", "position_family": "anal"}
|
||||
_expect(caption_naturalizer._metadata_action_label(row) == "anal action", "Caption position-family label priority changed")
|
||||
|
||||
|
||||
def smoke_sdxl_presets_policy() -> None:
|
||||
_expect(
|
||||
sdxl_formatter.SDXL_STYLE_PRESETS is sdxl_presets.SDXL_STYLE_PRESETS,
|
||||
@@ -2964,6 +3006,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
|
||||
("row_normalization_policy", smoke_row_normalization_policy),
|
||||
("formatter_input_policy", smoke_formatter_input_policy),
|
||||
("formatter_cast_policy", smoke_formatter_cast_policy),
|
||||
("caption_policy", smoke_caption_policy),
|
||||
("sdxl_presets_policy", smoke_sdxl_presets_policy),
|
||||
("hardcore_position_config_policy", smoke_hardcore_position_config_policy),
|
||||
("category_library_route", smoke_category_library_route),
|
||||
|
||||
Reference in New Issue
Block a user