Share formatter cast descriptor policy

2026-06-27 01:30:00 +02:00
parent a128b2dc9a
commit 64887a2750
5 changed files with 74 additions and 46 deletions
@@ -6,10 +6,12 @@ from typing import Any
 try:
    from . import formatter_input as input_policy
    from .hardcore_action_metadata import normalize_hardcore_action_family
    from . import krea_cast as cast_policy
    from .prompt_hygiene import sanitize_prose_text
 except ImportError:  # Allows local smoke tests with `python -c`.
    import formatter_input as input_policy
    from hardcore_action_metadata import normalize_hardcore_action_family
    import krea_cast as cast_policy
    from prompt_hygiene import sanitize_prose_text
@@ -132,49 +134,23 @@ def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") ->
 def _prompt_cast_descriptors(text: str) -> str:
-    return _clean_text(text).replace("Woman A / primary creator:", "Woman A:")
+    return cast_policy.prompt_cast_descriptors(text)
 def _cast_entries(text: str) -> list[tuple[str, str]]:
-    text = _prompt_cast_descriptors(text)
+    return cast_policy.cast_entries(text)
    entries: list[tuple[str, str]] = []
    for part in text.split(";"):
        part = _clean_text(part)
        match = re.match(r"^((?:Woman|Man) [A-Z]):\s*(.+)$", part)
        if match:
            entries.append((match.group(1), _clean_text(match.group(2))))
    return entries
 def _natural_cast_descriptor_text(text: str) -> str:
-    entries = _cast_entries(text)
+    return cast_policy.natural_cast_descriptor_text(text)
    if not entries:
        return _clean_text(text)
    labels = [label for label, _descriptor in entries]
    if labels == ["Woman A"] or labels == ["Man A"]:
        return f"A {entries[0][1]}"
    if set(labels) == {"Woman A", "Man A"} and len(labels) == 2:
        by_label = {label: descriptor for label, descriptor in entries}
        return f"A {by_label['Woman A']} alongside a {by_label['Man A']}"
    return " ".join(f"{label} is {descriptor}." for label, descriptor in entries)
 def _cast_labels(text: str) -> list[str]:
-    return [label for label, _descriptor in _cast_entries(text)]
+    return cast_policy.cast_labels(text)
 def _natural_label_text(text: Any, labels: list[str]) -> str:
-    text = _clean_text(text)
+    return cast_policy.natural_label_text(text, labels, capitalize_sentence_starts=False)
    if not text:
        return ""
    if set(labels) == {"Woman A", "Man A"}:
        text = re.sub(r"\bWoman A\b", "the woman", text)
        text = re.sub(r"\bMan A\b", "the man", text)
    elif labels == ["Woman A"]:
        text = re.sub(r"\bWoman A\b", "the woman", text)
    elif labels == ["Man A"]:
        text = re.sub(r"\bMan A\b", "the man", text)
    return text
 def _strip_style_tail(text: str) -> str:
@@ -234,8 +234,9 @@ Keep here:
 Already isolated:
- `krea_cast.py` owns cast descriptor parsing, cast prose, label joining, and
+- `krea_cast.py` owns cast descriptor parsing, cast labels, cast prose, label
-  natural label replacement for formatter routes.
+  joining, natural cast descriptor text, and label replacement for formatter
  routes, including the caption naturalizer's cast metadata path.
 - `krea_clothing.py` owns clothing-state cleanup and action-aware body-access
  wording for formatter routes.
 - `krea_action_context.py` owns shared action-family predicates, axis context
@@ -302,10 +303,10 @@ Keep here:
 - style-tail policy.
 - metadata-family action labels from `action_family` and `position_family`.
 - shared formatter input parsing from `formatter_input.py`.
 - shared cast descriptor parsing and label replacement from `krea_cast.py`.
 Improve later:
 - share more metadata readers with Krea without sharing Krea prose;
 - add a `caption_profile` option for concise/dense LoRA caption styles.
 ### Category JSON Path
@@ -92,6 +92,7 @@ Core helper ownership:
 | `hardcore_role_climax.py` | Climax and ejaculation aftermath role graph wording for face/body/ass, lap, open-thigh, side-lying, and group front/back placement. |
 | `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. |
 | `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. |
 | `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. |
 | `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. |
 | `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. |
 | `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
@@ -3,13 +3,14 @@ from __future__ import annotations
 import re
 from typing import Any
 try:
    from . import formatter_input as input_policy
 except ImportError:  # Allows local smoke tests with `python tools/prompt_smoke.py`.
    import formatter_input as input_policy
 def _clean(value: Any) -> str:
-    text = "" if value is None else str(value)
+    return input_policy.clean_text(value)
    text = text.replace("\n", " ")
    text = re.sub(r"\s+", " ", text).strip()
    text = re.sub(r"\s+([,.;:])", r"\1", text)
    return text
 def _with_indefinite_article(text: str) -> str:
@@ -35,6 +36,23 @@ def cast_entries(text: str) -> list[tuple[str, str]]:
    return entries
 def cast_labels(text: str) -> list[str]:
    return [label for label, _descriptor in cast_entries(text)]
 def natural_cast_descriptor_text(text: str) -> str:
    entries = cast_entries(text)
    if not entries:
        return _clean(text)
    labels = [label for label, _descriptor in entries]
    if labels == ["Woman A"] or labels == ["Man A"]:
        return f"A {entries[0][1]}"
    if set(labels) == {"Woman A", "Man A"} and len(labels) == 2:
        by_label = {label: descriptor for label, descriptor in entries}
        return f"A {by_label['Woman A']} alongside a {by_label['Man A']}"
    return " ".join(f"{label} is {descriptor}." for label, descriptor in entries)
 def label_join(labels: list[str]) -> str:
    labels = [_clean(label) for label in labels if _clean(label)]
    if not labels:
@@ -52,7 +70,7 @@ def label_join(labels: list[str]) -> str:
    return f"{', '.join(labels[:-1])}, and {labels[-1]}"
-def natural_label_text(text: Any, labels: list[str]) -> str:
+def natural_label_text(text: Any, labels: list[str], *, capitalize_sentence_starts: bool = True) -> str:
    text = _clean(text)
    if not text:
        return ""
@@ -63,12 +81,13 @@ def natural_label_text(text: Any, labels: list[str]) -> str:
        text = re.sub(r"\bWoman A\b", "the woman", text)
    elif labels == ["Man A"]:
        text = re.sub(r"\bMan A\b", "the man", text)
-    text = re.sub(
+    if capitalize_sentence_starts:
-        r"(^|[.!?]\s+)(the woman|the man)\b",
+        text = re.sub(
-        lambda match: match.group(1) + match.group(2).capitalize(),
+            r"(^|[.!?]\s+)(the woman|the man)\b",
-        text,
+            lambda match: match.group(1) + match.group(2).capitalize(),
-        flags=re.IGNORECASE,
+            text,
-    )
+            flags=re.IGNORECASE,
        )
    return text
@@ -33,6 +33,7 @@ import formatter_input  # noqa: E402
 import hardcore_position_config  # noqa: E402
 import __init__ as sxcp_nodes  # noqa: E402
 import generation_profile_config  # noqa: E402
 import krea_cast  # noqa: E402
 import krea_formatter  # noqa: E402
 import location_config  # noqa: E402
 import prompt_builder as pb  # noqa: E402
@@ -900,6 +901,35 @@ def smoke_formatter_input_policy() -> None:
    _expect_text("formatter_input.caption", caption, 20)
 def smoke_formatter_cast_policy() -> None:
    descriptor = (
        "Woman A / primary creator: 25-year-old adult woman, average figure, warm skin, dark hair; "
        "Man A: 40-year-old adult man, average figure, tan skin, short dark hair"
    )
    entries = [
        ("Woman A", "25-year-old adult woman, average figure, warm skin, dark hair"),
        ("Man A", "40-year-old adult man, average figure, tan skin, short dark hair"),
    ]
    _expect(krea_cast.cast_entries(descriptor) == entries, "Shared cast entry parser changed")
    _expect(caption_naturalizer._cast_entries(descriptor) == entries, "Caption cast parser should delegate to shared cast policy")
    _expect(krea_cast.cast_labels(descriptor) == ["Woman A", "Man A"], "Shared cast label parser changed")
    _expect(
        caption_naturalizer._cast_labels(descriptor) == krea_cast.cast_labels(descriptor),
        "Caption cast labels should delegate to shared cast policy",
    )
    natural = krea_cast.natural_cast_descriptor_text(descriptor)
    _expect(natural.startswith("A 25-year-old adult woman"), "Shared natural cast descriptor text changed")
    _expect(caption_naturalizer._natural_cast_descriptor_text(descriptor) == natural, "Caption cast descriptor text should delegate")
    _expect(
        krea_cast.natural_label_text("Woman A faces Man A.", ["Woman A", "Man A"]) == "The woman faces the man.",
        "Krea natural label text should keep sentence capitalization",
    )
    _expect(
        caption_naturalizer._natural_label_text("Woman A faces Man A.", ["Woman A", "Man A"]) == "the woman faces the man.",
        "Caption natural label text should preserve previous lowercase inline behavior",
    )
 def smoke_sdxl_presets_policy() -> None:
    _expect(
        sdxl_formatter.SDXL_STYLE_PRESETS is sdxl_presets.SDXL_STYLE_PRESETS,
@@ -2912,6 +2942,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
    ("character_profile_policy", smoke_character_profile_policy),
    ("row_normalization_policy", smoke_row_normalization_policy),
    ("formatter_input_policy", smoke_formatter_input_policy),
    ("formatter_cast_policy", smoke_formatter_cast_policy),
    ("sdxl_presets_policy", smoke_sdxl_presets_policy),
    ("hardcore_position_config_policy", smoke_hardcore_position_config_policy),
    ("category_library_route", smoke_category_library_route),