Share formatter cast descriptor policy

This commit is contained in:
2026-06-27 01:30:00 +02:00
parent a128b2dc9a
commit 64887a2750
5 changed files with 74 additions and 46 deletions
+7 -31
View File
@@ -6,10 +6,12 @@ from typing import Any
try: try:
from . import formatter_input as input_policy from . import formatter_input as input_policy
from .hardcore_action_metadata import normalize_hardcore_action_family from .hardcore_action_metadata import normalize_hardcore_action_family
from . import krea_cast as cast_policy
from .prompt_hygiene import sanitize_prose_text from .prompt_hygiene import sanitize_prose_text
except ImportError: # Allows local smoke tests with `python -c`. except ImportError: # Allows local smoke tests with `python -c`.
import formatter_input as input_policy import formatter_input as input_policy
from hardcore_action_metadata import normalize_hardcore_action_family from hardcore_action_metadata import normalize_hardcore_action_family
import krea_cast as cast_policy
from prompt_hygiene import sanitize_prose_text from prompt_hygiene import sanitize_prose_text
@@ -132,49 +134,23 @@ def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") ->
def _prompt_cast_descriptors(text: str) -> str: def _prompt_cast_descriptors(text: str) -> str:
return _clean_text(text).replace("Woman A / primary creator:", "Woman A:") return cast_policy.prompt_cast_descriptors(text)
def _cast_entries(text: str) -> list[tuple[str, str]]: def _cast_entries(text: str) -> list[tuple[str, str]]:
text = _prompt_cast_descriptors(text) return cast_policy.cast_entries(text)
entries: list[tuple[str, str]] = []
for part in text.split(";"):
part = _clean_text(part)
match = re.match(r"^((?:Woman|Man) [A-Z]):\s*(.+)$", part)
if match:
entries.append((match.group(1), _clean_text(match.group(2))))
return entries
def _natural_cast_descriptor_text(text: str) -> str: def _natural_cast_descriptor_text(text: str) -> str:
entries = _cast_entries(text) return cast_policy.natural_cast_descriptor_text(text)
if not entries:
return _clean_text(text)
labels = [label for label, _descriptor in entries]
if labels == ["Woman A"] or labels == ["Man A"]:
return f"A {entries[0][1]}"
if set(labels) == {"Woman A", "Man A"} and len(labels) == 2:
by_label = {label: descriptor for label, descriptor in entries}
return f"A {by_label['Woman A']} alongside a {by_label['Man A']}"
return " ".join(f"{label} is {descriptor}." for label, descriptor in entries)
def _cast_labels(text: str) -> list[str]: def _cast_labels(text: str) -> list[str]:
return [label for label, _descriptor in _cast_entries(text)] return cast_policy.cast_labels(text)
def _natural_label_text(text: Any, labels: list[str]) -> str: def _natural_label_text(text: Any, labels: list[str]) -> str:
text = _clean_text(text) return cast_policy.natural_label_text(text, labels, capitalize_sentence_starts=False)
if not text:
return ""
if set(labels) == {"Woman A", "Man A"}:
text = re.sub(r"\bWoman A\b", "the woman", text)
text = re.sub(r"\bMan A\b", "the man", text)
elif labels == ["Woman A"]:
text = re.sub(r"\bWoman A\b", "the woman", text)
elif labels == ["Man A"]:
text = re.sub(r"\bMan A\b", "the man", text)
return text
def _strip_style_tail(text: str) -> str: def _strip_style_tail(text: str) -> str:
+4 -3
View File
@@ -234,8 +234,9 @@ Keep here:
Already isolated: Already isolated:
- `krea_cast.py` owns cast descriptor parsing, cast prose, label joining, and - `krea_cast.py` owns cast descriptor parsing, cast labels, cast prose, label
natural label replacement for formatter routes. joining, natural cast descriptor text, and label replacement for formatter
routes, including the caption naturalizer's cast metadata path.
- `krea_clothing.py` owns clothing-state cleanup and action-aware body-access - `krea_clothing.py` owns clothing-state cleanup and action-aware body-access
wording for formatter routes. wording for formatter routes.
- `krea_action_context.py` owns shared action-family predicates, axis context - `krea_action_context.py` owns shared action-family predicates, axis context
@@ -302,10 +303,10 @@ Keep here:
- style-tail policy. - style-tail policy.
- metadata-family action labels from `action_family` and `position_family`. - metadata-family action labels from `action_family` and `position_family`.
- shared formatter input parsing from `formatter_input.py`. - shared formatter input parsing from `formatter_input.py`.
- shared cast descriptor parsing and label replacement from `krea_cast.py`.
Improve later: Improve later:
- share more metadata readers with Krea without sharing Krea prose;
- add a `caption_profile` option for concise/dense LoRA caption styles. - add a `caption_profile` option for concise/dense LoRA caption styles.
### Category JSON Path ### Category JSON Path
+1
View File
@@ -92,6 +92,7 @@ Core helper ownership:
| `hardcore_role_climax.py` | Climax and ejaculation aftermath role graph wording for face/body/ass, lap, open-thigh, side-lying, and group front/back placement. | | `hardcore_role_climax.py` | Climax and ejaculation aftermath role graph wording for face/body/ass, lap, open-thigh, side-lying, and group front/back placement. |
| `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. | | `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. |
| `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. | | `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. |
| `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. |
| `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. | | `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. |
| `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. | | `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. |
| `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. | | `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
+31 -12
View File
@@ -3,13 +3,14 @@ from __future__ import annotations
import re import re
from typing import Any from typing import Any
try:
from . import formatter_input as input_policy
except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`.
import formatter_input as input_policy
def _clean(value: Any) -> str: def _clean(value: Any) -> str:
text = "" if value is None else str(value) return input_policy.clean_text(value)
text = text.replace("\n", " ")
text = re.sub(r"\s+", " ", text).strip()
text = re.sub(r"\s+([,.;:])", r"\1", text)
return text
def _with_indefinite_article(text: str) -> str: def _with_indefinite_article(text: str) -> str:
@@ -35,6 +36,23 @@ def cast_entries(text: str) -> list[tuple[str, str]]:
return entries return entries
def cast_labels(text: str) -> list[str]:
return [label for label, _descriptor in cast_entries(text)]
def natural_cast_descriptor_text(text: str) -> str:
entries = cast_entries(text)
if not entries:
return _clean(text)
labels = [label for label, _descriptor in entries]
if labels == ["Woman A"] or labels == ["Man A"]:
return f"A {entries[0][1]}"
if set(labels) == {"Woman A", "Man A"} and len(labels) == 2:
by_label = {label: descriptor for label, descriptor in entries}
return f"A {by_label['Woman A']} alongside a {by_label['Man A']}"
return " ".join(f"{label} is {descriptor}." for label, descriptor in entries)
def label_join(labels: list[str]) -> str: def label_join(labels: list[str]) -> str:
labels = [_clean(label) for label in labels if _clean(label)] labels = [_clean(label) for label in labels if _clean(label)]
if not labels: if not labels:
@@ -52,7 +70,7 @@ def label_join(labels: list[str]) -> str:
return f"{', '.join(labels[:-1])}, and {labels[-1]}" return f"{', '.join(labels[:-1])}, and {labels[-1]}"
def natural_label_text(text: Any, labels: list[str]) -> str: def natural_label_text(text: Any, labels: list[str], *, capitalize_sentence_starts: bool = True) -> str:
text = _clean(text) text = _clean(text)
if not text: if not text:
return "" return ""
@@ -63,12 +81,13 @@ def natural_label_text(text: Any, labels: list[str]) -> str:
text = re.sub(r"\bWoman A\b", "the woman", text) text = re.sub(r"\bWoman A\b", "the woman", text)
elif labels == ["Man A"]: elif labels == ["Man A"]:
text = re.sub(r"\bMan A\b", "the man", text) text = re.sub(r"\bMan A\b", "the man", text)
text = re.sub( if capitalize_sentence_starts:
r"(^|[.!?]\s+)(the woman|the man)\b", text = re.sub(
lambda match: match.group(1) + match.group(2).capitalize(), r"(^|[.!?]\s+)(the woman|the man)\b",
text, lambda match: match.group(1) + match.group(2).capitalize(),
flags=re.IGNORECASE, text,
) flags=re.IGNORECASE,
)
return text return text
+31
View File
@@ -33,6 +33,7 @@ import formatter_input # noqa: E402
import hardcore_position_config # noqa: E402 import hardcore_position_config # noqa: E402
import __init__ as sxcp_nodes # noqa: E402 import __init__ as sxcp_nodes # noqa: E402
import generation_profile_config # noqa: E402 import generation_profile_config # noqa: E402
import krea_cast # noqa: E402
import krea_formatter # noqa: E402 import krea_formatter # noqa: E402
import location_config # noqa: E402 import location_config # noqa: E402
import prompt_builder as pb # noqa: E402 import prompt_builder as pb # noqa: E402
@@ -900,6 +901,35 @@ def smoke_formatter_input_policy() -> None:
_expect_text("formatter_input.caption", caption, 20) _expect_text("formatter_input.caption", caption, 20)
def smoke_formatter_cast_policy() -> None:
descriptor = (
"Woman A / primary creator: 25-year-old adult woman, average figure, warm skin, dark hair; "
"Man A: 40-year-old adult man, average figure, tan skin, short dark hair"
)
entries = [
("Woman A", "25-year-old adult woman, average figure, warm skin, dark hair"),
("Man A", "40-year-old adult man, average figure, tan skin, short dark hair"),
]
_expect(krea_cast.cast_entries(descriptor) == entries, "Shared cast entry parser changed")
_expect(caption_naturalizer._cast_entries(descriptor) == entries, "Caption cast parser should delegate to shared cast policy")
_expect(krea_cast.cast_labels(descriptor) == ["Woman A", "Man A"], "Shared cast label parser changed")
_expect(
caption_naturalizer._cast_labels(descriptor) == krea_cast.cast_labels(descriptor),
"Caption cast labels should delegate to shared cast policy",
)
natural = krea_cast.natural_cast_descriptor_text(descriptor)
_expect(natural.startswith("A 25-year-old adult woman"), "Shared natural cast descriptor text changed")
_expect(caption_naturalizer._natural_cast_descriptor_text(descriptor) == natural, "Caption cast descriptor text should delegate")
_expect(
krea_cast.natural_label_text("Woman A faces Man A.", ["Woman A", "Man A"]) == "The woman faces the man.",
"Krea natural label text should keep sentence capitalization",
)
_expect(
caption_naturalizer._natural_label_text("Woman A faces Man A.", ["Woman A", "Man A"]) == "the woman faces the man.",
"Caption natural label text should preserve previous lowercase inline behavior",
)
def smoke_sdxl_presets_policy() -> None: def smoke_sdxl_presets_policy() -> None:
_expect( _expect(
sdxl_formatter.SDXL_STYLE_PRESETS is sdxl_presets.SDXL_STYLE_PRESETS, sdxl_formatter.SDXL_STYLE_PRESETS is sdxl_presets.SDXL_STYLE_PRESETS,
@@ -2912,6 +2942,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
("character_profile_policy", smoke_character_profile_policy), ("character_profile_policy", smoke_character_profile_policy),
("row_normalization_policy", smoke_row_normalization_policy), ("row_normalization_policy", smoke_row_normalization_policy),
("formatter_input_policy", smoke_formatter_input_policy), ("formatter_input_policy", smoke_formatter_input_policy),
("formatter_cast_policy", smoke_formatter_cast_policy),
("sdxl_presets_policy", smoke_sdxl_presets_policy), ("sdxl_presets_policy", smoke_sdxl_presets_policy),
("hardcore_position_config_policy", smoke_hardcore_position_config_policy), ("hardcore_position_config_policy", smoke_hardcore_position_config_policy),
("category_library_route", smoke_category_library_route), ("category_library_route", smoke_category_library_route),