Share formatter cast descriptor policy

This commit is contained in:
2026-06-27 01:30:00 +02:00
parent a128b2dc9a
commit 64887a2750
5 changed files with 74 additions and 46 deletions
+7 -31
View File
@@ -6,10 +6,12 @@ from typing import Any
try:
from . import formatter_input as input_policy
from .hardcore_action_metadata import normalize_hardcore_action_family
from . import krea_cast as cast_policy
from .prompt_hygiene import sanitize_prose_text
except ImportError: # Allows local smoke tests with `python -c`.
import formatter_input as input_policy
from hardcore_action_metadata import normalize_hardcore_action_family
import krea_cast as cast_policy
from prompt_hygiene import sanitize_prose_text
@@ -132,49 +134,23 @@ def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") ->
def _prompt_cast_descriptors(text: str) -> str:
return _clean_text(text).replace("Woman A / primary creator:", "Woman A:")
return cast_policy.prompt_cast_descriptors(text)
def _cast_entries(text: str) -> list[tuple[str, str]]:
text = _prompt_cast_descriptors(text)
entries: list[tuple[str, str]] = []
for part in text.split(";"):
part = _clean_text(part)
match = re.match(r"^((?:Woman|Man) [A-Z]):\s*(.+)$", part)
if match:
entries.append((match.group(1), _clean_text(match.group(2))))
return entries
return cast_policy.cast_entries(text)
def _natural_cast_descriptor_text(text: str) -> str:
entries = _cast_entries(text)
if not entries:
return _clean_text(text)
labels = [label for label, _descriptor in entries]
if labels == ["Woman A"] or labels == ["Man A"]:
return f"A {entries[0][1]}"
if set(labels) == {"Woman A", "Man A"} and len(labels) == 2:
by_label = {label: descriptor for label, descriptor in entries}
return f"A {by_label['Woman A']} alongside a {by_label['Man A']}"
return " ".join(f"{label} is {descriptor}." for label, descriptor in entries)
return cast_policy.natural_cast_descriptor_text(text)
def _cast_labels(text: str) -> list[str]:
return [label for label, _descriptor in _cast_entries(text)]
return cast_policy.cast_labels(text)
def _natural_label_text(text: Any, labels: list[str]) -> str:
text = _clean_text(text)
if not text:
return ""
if set(labels) == {"Woman A", "Man A"}:
text = re.sub(r"\bWoman A\b", "the woman", text)
text = re.sub(r"\bMan A\b", "the man", text)
elif labels == ["Woman A"]:
text = re.sub(r"\bWoman A\b", "the woman", text)
elif labels == ["Man A"]:
text = re.sub(r"\bMan A\b", "the man", text)
return text
return cast_policy.natural_label_text(text, labels, capitalize_sentence_starts=False)
def _strip_style_tail(text: str) -> str:
+4 -3
View File
@@ -234,8 +234,9 @@ Keep here:
Already isolated:
- `krea_cast.py` owns cast descriptor parsing, cast prose, label joining, and
natural label replacement for formatter routes.
- `krea_cast.py` owns cast descriptor parsing, cast labels, cast prose, label
joining, natural cast descriptor text, and label replacement for formatter
routes, including the caption naturalizer's cast metadata path.
- `krea_clothing.py` owns clothing-state cleanup and action-aware body-access
wording for formatter routes.
- `krea_action_context.py` owns shared action-family predicates, axis context
@@ -302,10 +303,10 @@ Keep here:
- style-tail policy.
- metadata-family action labels from `action_family` and `position_family`.
- shared formatter input parsing from `formatter_input.py`.
- shared cast descriptor parsing and label replacement from `krea_cast.py`.
Improve later:
- share more metadata readers with Krea without sharing Krea prose;
- add a `caption_profile` option for concise/dense LoRA caption styles.
### Category JSON Path
+1
View File
@@ -92,6 +92,7 @@ Core helper ownership:
| `hardcore_role_climax.py` | Climax and ejaculation aftermath role graph wording for face/body/ass, lap, open-thigh, side-lying, and group front/back placement. |
| `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. |
| `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. |
| `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. |
| `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. |
| `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. |
| `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
+31 -12
View File
@@ -3,13 +3,14 @@ from __future__ import annotations
import re
from typing import Any
try:
from . import formatter_input as input_policy
except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`.
import formatter_input as input_policy
def _clean(value: Any) -> str:
text = "" if value is None else str(value)
text = text.replace("\n", " ")
text = re.sub(r"\s+", " ", text).strip()
text = re.sub(r"\s+([,.;:])", r"\1", text)
return text
return input_policy.clean_text(value)
def _with_indefinite_article(text: str) -> str:
@@ -35,6 +36,23 @@ def cast_entries(text: str) -> list[tuple[str, str]]:
return entries
def cast_labels(text: str) -> list[str]:
return [label for label, _descriptor in cast_entries(text)]
def natural_cast_descriptor_text(text: str) -> str:
entries = cast_entries(text)
if not entries:
return _clean(text)
labels = [label for label, _descriptor in entries]
if labels == ["Woman A"] or labels == ["Man A"]:
return f"A {entries[0][1]}"
if set(labels) == {"Woman A", "Man A"} and len(labels) == 2:
by_label = {label: descriptor for label, descriptor in entries}
return f"A {by_label['Woman A']} alongside a {by_label['Man A']}"
return " ".join(f"{label} is {descriptor}." for label, descriptor in entries)
def label_join(labels: list[str]) -> str:
labels = [_clean(label) for label in labels if _clean(label)]
if not labels:
@@ -52,7 +70,7 @@ def label_join(labels: list[str]) -> str:
return f"{', '.join(labels[:-1])}, and {labels[-1]}"
def natural_label_text(text: Any, labels: list[str]) -> str:
def natural_label_text(text: Any, labels: list[str], *, capitalize_sentence_starts: bool = True) -> str:
text = _clean(text)
if not text:
return ""
@@ -63,12 +81,13 @@ def natural_label_text(text: Any, labels: list[str]) -> str:
text = re.sub(r"\bWoman A\b", "the woman", text)
elif labels == ["Man A"]:
text = re.sub(r"\bMan A\b", "the man", text)
text = re.sub(
r"(^|[.!?]\s+)(the woman|the man)\b",
lambda match: match.group(1) + match.group(2).capitalize(),
text,
flags=re.IGNORECASE,
)
if capitalize_sentence_starts:
text = re.sub(
r"(^|[.!?]\s+)(the woman|the man)\b",
lambda match: match.group(1) + match.group(2).capitalize(),
text,
flags=re.IGNORECASE,
)
return text
+31
View File
@@ -33,6 +33,7 @@ import formatter_input # noqa: E402
import hardcore_position_config # noqa: E402
import __init__ as sxcp_nodes # noqa: E402
import generation_profile_config # noqa: E402
import krea_cast # noqa: E402
import krea_formatter # noqa: E402
import location_config # noqa: E402
import prompt_builder as pb # noqa: E402
@@ -900,6 +901,35 @@ def smoke_formatter_input_policy() -> None:
_expect_text("formatter_input.caption", caption, 20)
def smoke_formatter_cast_policy() -> None:
descriptor = (
"Woman A / primary creator: 25-year-old adult woman, average figure, warm skin, dark hair; "
"Man A: 40-year-old adult man, average figure, tan skin, short dark hair"
)
entries = [
("Woman A", "25-year-old adult woman, average figure, warm skin, dark hair"),
("Man A", "40-year-old adult man, average figure, tan skin, short dark hair"),
]
_expect(krea_cast.cast_entries(descriptor) == entries, "Shared cast entry parser changed")
_expect(caption_naturalizer._cast_entries(descriptor) == entries, "Caption cast parser should delegate to shared cast policy")
_expect(krea_cast.cast_labels(descriptor) == ["Woman A", "Man A"], "Shared cast label parser changed")
_expect(
caption_naturalizer._cast_labels(descriptor) == krea_cast.cast_labels(descriptor),
"Caption cast labels should delegate to shared cast policy",
)
natural = krea_cast.natural_cast_descriptor_text(descriptor)
_expect(natural.startswith("A 25-year-old adult woman"), "Shared natural cast descriptor text changed")
_expect(caption_naturalizer._natural_cast_descriptor_text(descriptor) == natural, "Caption cast descriptor text should delegate")
_expect(
krea_cast.natural_label_text("Woman A faces Man A.", ["Woman A", "Man A"]) == "The woman faces the man.",
"Krea natural label text should keep sentence capitalization",
)
_expect(
caption_naturalizer._natural_label_text("Woman A faces Man A.", ["Woman A", "Man A"]) == "the woman faces the man.",
"Caption natural label text should preserve previous lowercase inline behavior",
)
def smoke_sdxl_presets_policy() -> None:
_expect(
sdxl_formatter.SDXL_STYLE_PRESETS is sdxl_presets.SDXL_STYLE_PRESETS,
@@ -2912,6 +2942,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
("character_profile_policy", smoke_character_profile_policy),
("row_normalization_policy", smoke_row_normalization_policy),
("formatter_input_policy", smoke_formatter_input_policy),
("formatter_cast_policy", smoke_formatter_cast_policy),
("sdxl_presets_policy", smoke_sdxl_presets_policy),
("hardcore_position_config_policy", smoke_hardcore_position_config_policy),
("category_library_route", smoke_category_library_route),