Extract caption naturalizer policy
This commit is contained in:
+15
-59
@@ -4,54 +4,26 @@ import re
|
|||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
from . import caption_policy
|
||||||
from . import formatter_input as input_policy
|
from . import formatter_input as input_policy
|
||||||
from .hardcore_action_metadata import normalize_hardcore_action_family
|
|
||||||
from . import krea_cast as cast_policy
|
from . import krea_cast as cast_policy
|
||||||
from .prompt_hygiene import sanitize_prose_text
|
from .prompt_hygiene import sanitize_prose_text
|
||||||
except ImportError: # Allows local smoke tests with `python -c`.
|
except ImportError: # Allows local smoke tests with `python -c`.
|
||||||
|
import caption_policy
|
||||||
import formatter_input as input_policy
|
import formatter_input as input_policy
|
||||||
from hardcore_action_metadata import normalize_hardcore_action_family
|
|
||||||
import krea_cast as cast_policy
|
import krea_cast as cast_policy
|
||||||
from prompt_hygiene import sanitize_prose_text
|
from prompt_hygiene import sanitize_prose_text
|
||||||
|
|
||||||
|
|
||||||
OLD_TRIGGER = "sxcpinup_coloredpencil"
|
OLD_TRIGGER = caption_policy.OLD_TRIGGER
|
||||||
DEFAULT_TRIGGER = "sxcppnl7"
|
DEFAULT_TRIGGER = caption_policy.DEFAULT_TRIGGER
|
||||||
|
STYLE_TAILS = caption_policy.STYLE_TAILS
|
||||||
STYLE_TAILS = [
|
|
||||||
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured parchment paper",
|
|
||||||
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured paper",
|
|
||||||
]
|
|
||||||
|
|
||||||
PROMPT_FIELD_LABELS = input_policy.prompt_field_labels()
|
PROMPT_FIELD_LABELS = input_policy.prompt_field_labels()
|
||||||
|
|
||||||
ITEM_LABELS = (
|
ITEM_LABELS = caption_policy.ITEM_LABELS
|
||||||
"Sexual pose",
|
ACTION_FAMILY_CAPTION_LABELS = caption_policy.ACTION_FAMILY_CAPTION_LABELS
|
||||||
"Erotic outfit",
|
POSITION_FAMILY_CAPTION_LABELS = caption_policy.POSITION_FAMILY_CAPTION_LABELS
|
||||||
"Clothing",
|
|
||||||
)
|
|
||||||
|
|
||||||
ACTION_FAMILY_CAPTION_LABELS = {
|
|
||||||
"foreplay": "foreplay action",
|
|
||||||
"outercourse": "non-penetrative action",
|
|
||||||
"oral": "oral action",
|
|
||||||
"penetration": "penetrative action",
|
|
||||||
"toy_double": "toy-assisted double-contact action",
|
|
||||||
"climax": "climax action",
|
|
||||||
}
|
|
||||||
|
|
||||||
POSITION_FAMILY_CAPTION_LABELS = {
|
|
||||||
"penetrative": "penetrative action",
|
|
||||||
"foreplay": "foreplay action",
|
|
||||||
"interaction": "interaction beat",
|
|
||||||
"manual": "manual action",
|
|
||||||
"oral": "oral action",
|
|
||||||
"outercourse": "non-penetrative action",
|
|
||||||
"anal": "anal action",
|
|
||||||
"climax": "climax action",
|
|
||||||
"threesome": "three-person action",
|
|
||||||
"group": "group action",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _clean_text(value: Any) -> str:
|
def _clean_text(value: Any) -> str:
|
||||||
@@ -105,13 +77,7 @@ def _human_join(parts: list[str]) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
|
def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
|
||||||
position_family = _clean_text(row.get("position_family")).lower()
|
return caption_policy.metadata_action_label(row, default)
|
||||||
if position_family in POSITION_FAMILY_CAPTION_LABELS:
|
|
||||||
return POSITION_FAMILY_CAPTION_LABELS[position_family]
|
|
||||||
action_family = normalize_hardcore_action_family(row.get("action_family"))
|
|
||||||
if action_family in ACTION_FAMILY_CAPTION_LABELS:
|
|
||||||
return ACTION_FAMILY_CAPTION_LABELS[action_family]
|
|
||||||
return default
|
|
||||||
|
|
||||||
|
|
||||||
def _prompt_cast_descriptors(text: str) -> str:
|
def _prompt_cast_descriptors(text: str) -> str:
|
||||||
@@ -135,11 +101,7 @@ def _natural_label_text(text: Any, labels: list[str]) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def _strip_style_tail(text: str) -> str:
|
def _strip_style_tail(text: str) -> str:
|
||||||
text = _clean_text(text)
|
return caption_policy.strip_style_tail(text)
|
||||||
for tail in STYLE_TAILS:
|
|
||||||
if text.endswith(tail):
|
|
||||||
return text[: -len(tail)].strip(" ,")
|
|
||||||
return text
|
|
||||||
|
|
||||||
|
|
||||||
def _remove_trigger(text: str, trigger: str) -> str:
|
def _remove_trigger(text: str, trigger: str) -> str:
|
||||||
@@ -185,14 +147,11 @@ def _field_from_any_prompt(text: str, labels: tuple[str, ...]) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def _normalize_composition(text: str) -> str:
|
def _normalize_composition(text: str) -> str:
|
||||||
return re.sub(r"^vertical\s+", "", _clean_text(text), flags=re.IGNORECASE)
|
return caption_policy.normalize_composition(text)
|
||||||
|
|
||||||
|
|
||||||
def _clean_clothing(text: str) -> str:
|
def _clean_clothing(text: str) -> str:
|
||||||
text = _clean_text(text)
|
return caption_policy.clean_clothing(text)
|
||||||
text = re.sub(r",?\s*fashion editorial styling$", "", text, flags=re.IGNORECASE)
|
|
||||||
text = re.sub(r",?\s*resort styling$", "", text, flags=re.IGNORECASE)
|
|
||||||
return text.strip(" ,")
|
|
||||||
|
|
||||||
|
|
||||||
def _body_phrase(body: Any, figure_note: Any = "") -> str:
|
def _body_phrase(body: Any, figure_note: Any = "") -> str:
|
||||||
@@ -300,10 +259,7 @@ def _verb_for_row(row: dict[str, Any]) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def _detail_allows(level: str, dense_only: bool = False) -> bool:
|
def _detail_allows(level: str, dense_only: bool = False) -> bool:
|
||||||
level = (level or "balanced").strip().lower()
|
return caption_policy.detail_allows(level, dense_only=dense_only)
|
||||||
if dense_only:
|
|
||||||
return level == "dense"
|
|
||||||
return level != "concise"
|
|
||||||
|
|
||||||
|
|
||||||
def _single_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None:
|
def _single_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None:
|
||||||
@@ -674,8 +630,8 @@ def naturalize_caption(
|
|||||||
) -> tuple[str, str]:
|
) -> tuple[str, str]:
|
||||||
"""Rewrite tag-style prompt/caption text into compact natural language."""
|
"""Rewrite tag-style prompt/caption text into compact natural language."""
|
||||||
input_hint = input_hint if input_hint in ("auto", "metadata_json", "caption_or_prompt") else "auto"
|
input_hint = input_hint if input_hint in ("auto", "metadata_json", "caption_or_prompt") else "auto"
|
||||||
detail_level = detail_level if detail_level in ("concise", "balanced", "dense") else "balanced"
|
detail_level = caption_policy.normalize_detail_level(detail_level)
|
||||||
keep_style = style_policy == "keep_style_terms"
|
keep_style = caption_policy.keep_style_terms(style_policy)
|
||||||
row, row_method = _row_from_inputs(source_text, metadata_json, input_hint)
|
row, row_method = _row_from_inputs(source_text, metadata_json, input_hint)
|
||||||
if row is not None:
|
if row is not None:
|
||||||
prose, method = _metadata_to_prose(row, detail_level, keep_style)
|
prose, method = _metadata_to_prose(row, detail_level, keep_style)
|
||||||
|
|||||||
@@ -0,0 +1,99 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
try:
|
||||||
|
from . import formatter_input as input_policy
|
||||||
|
from .hardcore_action_metadata import normalize_hardcore_action_family
|
||||||
|
except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`.
|
||||||
|
import formatter_input as input_policy
|
||||||
|
from hardcore_action_metadata import normalize_hardcore_action_family
|
||||||
|
|
||||||
|
|
||||||
|
OLD_TRIGGER = "sxcpinup_coloredpencil"
|
||||||
|
DEFAULT_TRIGGER = "sxcppnl7"
|
||||||
|
|
||||||
|
DETAIL_LEVELS = ("balanced", "concise", "dense")
|
||||||
|
STYLE_POLICIES = ("drop_style_tail", "keep_style_terms")
|
||||||
|
|
||||||
|
STYLE_TAILS = [
|
||||||
|
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured parchment paper",
|
||||||
|
", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured paper",
|
||||||
|
]
|
||||||
|
|
||||||
|
ITEM_LABELS = (
|
||||||
|
"Sexual pose",
|
||||||
|
"Erotic outfit",
|
||||||
|
"Clothing",
|
||||||
|
)
|
||||||
|
|
||||||
|
ACTION_FAMILY_CAPTION_LABELS = {
|
||||||
|
"foreplay": "foreplay action",
|
||||||
|
"outercourse": "non-penetrative action",
|
||||||
|
"oral": "oral action",
|
||||||
|
"penetration": "penetrative action",
|
||||||
|
"toy_double": "toy-assisted double-contact action",
|
||||||
|
"climax": "climax action",
|
||||||
|
}
|
||||||
|
|
||||||
|
POSITION_FAMILY_CAPTION_LABELS = {
|
||||||
|
"penetrative": "penetrative action",
|
||||||
|
"foreplay": "foreplay action",
|
||||||
|
"interaction": "interaction beat",
|
||||||
|
"manual": "manual action",
|
||||||
|
"oral": "oral action",
|
||||||
|
"outercourse": "non-penetrative action",
|
||||||
|
"anal": "anal action",
|
||||||
|
"climax": "climax action",
|
||||||
|
"threesome": "three-person action",
|
||||||
|
"group": "group action",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_detail_level(value: str) -> str:
|
||||||
|
return value if value in DETAIL_LEVELS else "balanced"
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_style_policy(value: str) -> str:
|
||||||
|
return value if value in STYLE_POLICIES else "drop_style_tail"
|
||||||
|
|
||||||
|
|
||||||
|
def keep_style_terms(style_policy: str) -> bool:
|
||||||
|
return normalize_style_policy(style_policy) == "keep_style_terms"
|
||||||
|
|
||||||
|
|
||||||
|
def detail_allows(level: str, dense_only: bool = False) -> bool:
|
||||||
|
level = normalize_detail_level((level or "balanced").strip().lower())
|
||||||
|
if dense_only:
|
||||||
|
return level == "dense"
|
||||||
|
return level != "concise"
|
||||||
|
|
||||||
|
|
||||||
|
def strip_style_tail(text: str) -> str:
|
||||||
|
text = input_policy.clean_text(text)
|
||||||
|
for tail in STYLE_TAILS:
|
||||||
|
if text.endswith(tail):
|
||||||
|
return text[: -len(tail)].strip(" ,")
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
|
||||||
|
position_family = input_policy.clean_text(row.get("position_family")).lower()
|
||||||
|
if position_family in POSITION_FAMILY_CAPTION_LABELS:
|
||||||
|
return POSITION_FAMILY_CAPTION_LABELS[position_family]
|
||||||
|
action_family = normalize_hardcore_action_family(row.get("action_family"))
|
||||||
|
if action_family in ACTION_FAMILY_CAPTION_LABELS:
|
||||||
|
return ACTION_FAMILY_CAPTION_LABELS[action_family]
|
||||||
|
return default
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_composition(text: str) -> str:
|
||||||
|
return re.sub(r"^vertical\s+", "", input_policy.clean_text(text), flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def clean_clothing(text: str) -> str:
|
||||||
|
text = input_policy.clean_text(text)
|
||||||
|
text = re.sub(r",?\s*fashion editorial styling$", "", text, flags=re.IGNORECASE)
|
||||||
|
text = re.sub(r",?\s*resort styling$", "", text, flags=re.IGNORECASE)
|
||||||
|
return text.strip(" ,")
|
||||||
@@ -302,10 +302,13 @@ Keep here:
|
|||||||
|
|
||||||
- natural sentence caption assembly;
|
- natural sentence caption assembly;
|
||||||
- training-caption trigger behavior;
|
- training-caption trigger behavior;
|
||||||
- style-tail policy.
|
- style-tail policy from `caption_policy.py`.
|
||||||
- metadata-family action labels from `action_family` and `position_family`.
|
- metadata-family action labels from `action_family` and `position_family` via
|
||||||
|
`caption_policy.py`.
|
||||||
- shared formatter input parsing from `formatter_input.py`.
|
- shared formatter input parsing from `formatter_input.py`.
|
||||||
- shared cast descriptor parsing and label replacement from `krea_cast.py`.
|
- shared cast descriptor parsing and label replacement from `krea_cast.py`.
|
||||||
|
- caption detail-level/style-policy normalization, clothing cleanup, and
|
||||||
|
composition cleanup from `caption_policy.py`.
|
||||||
|
|
||||||
Improve later:
|
Improve later:
|
||||||
|
|
||||||
|
|||||||
@@ -97,6 +97,7 @@ Core helper ownership:
|
|||||||
| `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. |
|
| `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. |
|
||||||
| `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
|
| `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
|
||||||
| `sdxl_presets.py` | SDXL style presets, quality presets, default negative prompt, and metadata-family tag hints used by the SDXL formatter and node choice lists. |
|
| `sdxl_presets.py` | SDXL style presets, quality presets, default negative prompt, and metadata-family tag hints used by the SDXL formatter and node choice lists. |
|
||||||
|
| `caption_policy.py` | Caption naturalizer policy data and helpers: style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. |
|
||||||
|
|
||||||
## Node IO Map
|
## Node IO Map
|
||||||
|
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ if str(ROOT) not in sys.path:
|
|||||||
sys.path.insert(0, str(ROOT))
|
sys.path.insert(0, str(ROOT))
|
||||||
|
|
||||||
import caption_naturalizer # noqa: E402
|
import caption_naturalizer # noqa: E402
|
||||||
|
import caption_policy # noqa: E402
|
||||||
import character_config # noqa: E402
|
import character_config # noqa: E402
|
||||||
import character_profile # noqa: E402
|
import character_profile # noqa: E402
|
||||||
import category_cast_config # noqa: E402
|
import category_cast_config # noqa: E402
|
||||||
@@ -951,6 +952,47 @@ def smoke_formatter_cast_policy() -> None:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def smoke_caption_policy() -> None:
|
||||||
|
_expect(
|
||||||
|
caption_naturalizer.STYLE_TAILS is caption_policy.STYLE_TAILS,
|
||||||
|
"Caption naturalizer style tails should delegate to caption_policy",
|
||||||
|
)
|
||||||
|
_expect(
|
||||||
|
caption_naturalizer.ITEM_LABELS is caption_policy.ITEM_LABELS,
|
||||||
|
"Caption naturalizer item labels should delegate to caption_policy",
|
||||||
|
)
|
||||||
|
_expect(
|
||||||
|
caption_naturalizer.ACTION_FAMILY_CAPTION_LABELS is caption_policy.ACTION_FAMILY_CAPTION_LABELS,
|
||||||
|
"Caption naturalizer action labels should delegate to caption_policy",
|
||||||
|
)
|
||||||
|
_expect(caption_policy.normalize_detail_level("bad") == "balanced", "Caption invalid detail fallback changed")
|
||||||
|
_expect(caption_policy.keep_style_terms("keep_style_terms") is True, "Caption style policy keep flag changed")
|
||||||
|
_expect(caption_policy.detail_allows("concise") is False, "Caption concise detail gate changed")
|
||||||
|
_expect(caption_policy.detail_allows("dense", dense_only=True) is True, "Caption dense-only gate changed")
|
||||||
|
|
||||||
|
style_tail = caption_policy.STYLE_TAILS[0]
|
||||||
|
_expect(
|
||||||
|
caption_policy.strip_style_tail(f"caption body{style_tail}") == "caption body",
|
||||||
|
"Caption style-tail stripping changed",
|
||||||
|
)
|
||||||
|
_expect(
|
||||||
|
caption_naturalizer._strip_style_tail(f"caption body{style_tail}") == "caption body",
|
||||||
|
"Caption naturalizer style-tail wrapper should delegate",
|
||||||
|
)
|
||||||
|
_expect(
|
||||||
|
caption_policy.normalize_composition("vertical centered body frame") == "centered body frame",
|
||||||
|
"Caption composition normalization changed",
|
||||||
|
)
|
||||||
|
_expect(
|
||||||
|
caption_policy.clean_clothing("silk dress, fashion editorial styling") == "silk dress",
|
||||||
|
"Caption clothing cleanup changed",
|
||||||
|
)
|
||||||
|
row = {"action_family": "oral", "position_family": ""}
|
||||||
|
_expect(caption_policy.metadata_action_label(row) == "oral action", "Caption action-family label changed")
|
||||||
|
row = {"action_family": "oral", "position_family": "anal"}
|
||||||
|
_expect(caption_naturalizer._metadata_action_label(row) == "anal action", "Caption position-family label priority changed")
|
||||||
|
|
||||||
|
|
||||||
def smoke_sdxl_presets_policy() -> None:
|
def smoke_sdxl_presets_policy() -> None:
|
||||||
_expect(
|
_expect(
|
||||||
sdxl_formatter.SDXL_STYLE_PRESETS is sdxl_presets.SDXL_STYLE_PRESETS,
|
sdxl_formatter.SDXL_STYLE_PRESETS is sdxl_presets.SDXL_STYLE_PRESETS,
|
||||||
@@ -2964,6 +3006,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
|
|||||||
("row_normalization_policy", smoke_row_normalization_policy),
|
("row_normalization_policy", smoke_row_normalization_policy),
|
||||||
("formatter_input_policy", smoke_formatter_input_policy),
|
("formatter_input_policy", smoke_formatter_input_policy),
|
||||||
("formatter_cast_policy", smoke_formatter_cast_policy),
|
("formatter_cast_policy", smoke_formatter_cast_policy),
|
||||||
|
("caption_policy", smoke_caption_policy),
|
||||||
("sdxl_presets_policy", smoke_sdxl_presets_policy),
|
("sdxl_presets_policy", smoke_sdxl_presets_policy),
|
||||||
("hardcore_position_config_policy", smoke_hardcore_position_config_policy),
|
("hardcore_position_config_policy", smoke_hardcore_position_config_policy),
|
||||||
("category_library_route", smoke_category_library_route),
|
("category_library_route", smoke_category_library_route),
|
||||||
|
|||||||
Reference in New Issue
Block a user