From 64887a27506093e993c9d61691b514ff40f867dc Mon Sep 17 00:00:00 2001
From: Ethanfel <ethan.fel@ts-pc.fr>
Date: Sat, 27 Jun 2026 01:30:00 +0200
Subject: [PATCH] Share formatter cast descriptor policy

---
 caption_naturalizer.py                       | 38 ++++-------------
 docs/prompt-architecture-improvement-plan.md |  7 ++--
 docs/prompt-pool-routing-map.md              |  1 +
 krea_cast.py                                 | 43 ++++++++++++++------
 tools/prompt_smoke.py                        | 31 ++++++++++++++
 5 files changed, 74 insertions(+), 46 deletions(-)

diff --git a/caption_naturalizer.py b/caption_naturalizer.py
index 3bcb1f0..f06bb37 100644
--- a/caption_naturalizer.py
+++ b/caption_naturalizer.py
@@ -6,10 +6,12 @@ from typing import Any
 try:
     from . import formatter_input as input_policy
     from .hardcore_action_metadata import normalize_hardcore_action_family
+    from . import krea_cast as cast_policy
     from .prompt_hygiene import sanitize_prose_text
 except ImportError:  # Allows local smoke tests with `python -c`.
     import formatter_input as input_policy
     from hardcore_action_metadata import normalize_hardcore_action_family
+    import krea_cast as cast_policy
     from prompt_hygiene import sanitize_prose_text
 
 
@@ -132,49 +134,23 @@ def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") ->
 
 
 def _prompt_cast_descriptors(text: str) -> str:
-    return _clean_text(text).replace("Woman A / primary creator:", "Woman A:")
+    return cast_policy.prompt_cast_descriptors(text)
 
 
 def _cast_entries(text: str) -> list[tuple[str, str]]:
-    text = _prompt_cast_descriptors(text)
-    entries: list[tuple[str, str]] = []
-    for part in text.split(";"):
-        part = _clean_text(part)
-        match = re.match(r"^((?:Woman|Man) [A-Z]):\s*(.+)$", part)
-        if match:
-            entries.append((match.group(1), _clean_text(match.group(2))))
-    return entries
+    return cast_policy.cast_entries(text)
 
 
 def _natural_cast_descriptor_text(text: str) -> str:
-    entries = _cast_entries(text)
-    if not entries:
-        return _clean_text(text)
-    labels = [label for label, _descriptor in entries]
-    if labels == ["Woman A"] or labels == ["Man A"]:
-        return f"A {entries[0][1]}"
-    if set(labels) == {"Woman A", "Man A"} and len(labels) == 2:
-        by_label = {label: descriptor for label, descriptor in entries}
-        return f"A {by_label['Woman A']} alongside a {by_label['Man A']}"
-    return " ".join(f"{label} is {descriptor}." for label, descriptor in entries)
+    return cast_policy.natural_cast_descriptor_text(text)
 
 
 def _cast_labels(text: str) -> list[str]:
-    return [label for label, _descriptor in _cast_entries(text)]
+    return cast_policy.cast_labels(text)
 
 
 def _natural_label_text(text: Any, labels: list[str]) -> str:
-    text = _clean_text(text)
-    if not text:
-        return ""
-    if set(labels) == {"Woman A", "Man A"}:
-        text = re.sub(r"\bWoman A\b", "the woman", text)
-        text = re.sub(r"\bMan A\b", "the man", text)
-    elif labels == ["Woman A"]:
-        text = re.sub(r"\bWoman A\b", "the woman", text)
-    elif labels == ["Man A"]:
-        text = re.sub(r"\bMan A\b", "the man", text)
-    return text
+    return cast_policy.natural_label_text(text, labels, capitalize_sentence_starts=False)
 
 
 def _strip_style_tail(text: str) -> str:
diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md
index 70c3c64..525d3a9 100644
--- a/docs/prompt-architecture-improvement-plan.md
+++ b/docs/prompt-architecture-improvement-plan.md
@@ -234,8 +234,9 @@ Keep here:
 
 Already isolated:
 
-- `krea_cast.py` owns cast descriptor parsing, cast prose, label joining, and
-  natural label replacement for formatter routes.
+- `krea_cast.py` owns cast descriptor parsing, cast labels, cast prose, label
+  joining, natural cast descriptor text, and label replacement for formatter
+  routes, including the caption naturalizer's cast metadata path.
 - `krea_clothing.py` owns clothing-state cleanup and action-aware body-access
   wording for formatter routes.
 - `krea_action_context.py` owns shared action-family predicates, axis context
@@ -302,10 +303,10 @@ Keep here:
 - style-tail policy.
 - metadata-family action labels from `action_family` and `position_family`.
 - shared formatter input parsing from `formatter_input.py`.
+- shared cast descriptor parsing and label replacement from `krea_cast.py`.
 
 Improve later:
 
-- share more metadata readers with Krea without sharing Krea prose;
 - add a `caption_profile` option for concise/dense LoRA caption styles.
 
 ### Category JSON Path
diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md
index 4fee002..bf93ab7 100644
--- a/docs/prompt-pool-routing-map.md
+++ b/docs/prompt-pool-routing-map.md
@@ -92,6 +92,7 @@ Core helper ownership:
 | `hardcore_role_climax.py` | Climax and ejaculation aftermath role graph wording for face/body/ass, lap, open-thigh, side-lying, and group front/back placement. |
 | `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. |
 | `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. |
+| `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. |
 | `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. |
 | `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. |
 | `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
diff --git a/krea_cast.py b/krea_cast.py
index 3463d2d..50a4172 100644
--- a/krea_cast.py
+++ b/krea_cast.py
@@ -3,13 +3,14 @@ from __future__ import annotations
 import re
 from typing import Any
 
+try:
+    from . import formatter_input as input_policy
+except ImportError:  # Allows local smoke tests with `python tools/prompt_smoke.py`.
+    import formatter_input as input_policy
+
 
 def _clean(value: Any) -> str:
-    text = "" if value is None else str(value)
-    text = text.replace("\n", " ")
-    text = re.sub(r"\s+", " ", text).strip()
-    text = re.sub(r"\s+([,.;:])", r"\1", text)
-    return text
+    return input_policy.clean_text(value)
 
 
 def _with_indefinite_article(text: str) -> str:
@@ -35,6 +36,23 @@ def cast_entries(text: str) -> list[tuple[str, str]]:
     return entries
 
 
+def cast_labels(text: str) -> list[str]:
+    return [label for label, _descriptor in cast_entries(text)]
+
+
+def natural_cast_descriptor_text(text: str) -> str:
+    entries = cast_entries(text)
+    if not entries:
+        return _clean(text)
+    labels = [label for label, _descriptor in entries]
+    if labels == ["Woman A"] or labels == ["Man A"]:
+        return f"A {entries[0][1]}"
+    if set(labels) == {"Woman A", "Man A"} and len(labels) == 2:
+        by_label = {label: descriptor for label, descriptor in entries}
+        return f"A {by_label['Woman A']} alongside a {by_label['Man A']}"
+    return " ".join(f"{label} is {descriptor}." for label, descriptor in entries)
+
+
 def label_join(labels: list[str]) -> str:
     labels = [_clean(label) for label in labels if _clean(label)]
     if not labels:
@@ -52,7 +70,7 @@ def label_join(labels: list[str]) -> str:
     return f"{', '.join(labels[:-1])}, and {labels[-1]}"
 
 
-def natural_label_text(text: Any, labels: list[str]) -> str:
+def natural_label_text(text: Any, labels: list[str], *, capitalize_sentence_starts: bool = True) -> str:
     text = _clean(text)
     if not text:
         return ""
@@ -63,12 +81,13 @@ def natural_label_text(text: Any, labels: list[str]) -> str:
         text = re.sub(r"\bWoman A\b", "the woman", text)
     elif labels == ["Man A"]:
         text = re.sub(r"\bMan A\b", "the man", text)
-    text = re.sub(
-        r"(^|[.!?]\s+)(the woman|the man)\b",
-        lambda match: match.group(1) + match.group(2).capitalize(),
-        text,
-        flags=re.IGNORECASE,
-    )
+    if capitalize_sentence_starts:
+        text = re.sub(
+            r"(^|[.!?]\s+)(the woman|the man)\b",
+            lambda match: match.group(1) + match.group(2).capitalize(),
+            text,
+            flags=re.IGNORECASE,
+        )
     return text
 
 
diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py
index b529d6f..9607bbe 100644
--- a/tools/prompt_smoke.py
+++ b/tools/prompt_smoke.py
@@ -33,6 +33,7 @@ import formatter_input  # noqa: E402
 import hardcore_position_config  # noqa: E402
 import __init__ as sxcp_nodes  # noqa: E402
 import generation_profile_config  # noqa: E402
+import krea_cast  # noqa: E402
 import krea_formatter  # noqa: E402
 import location_config  # noqa: E402
 import prompt_builder as pb  # noqa: E402
@@ -900,6 +901,35 @@ def smoke_formatter_input_policy() -> None:
     _expect_text("formatter_input.caption", caption, 20)
 
 
+def smoke_formatter_cast_policy() -> None:
+    descriptor = (
+        "Woman A / primary creator: 25-year-old adult woman, average figure, warm skin, dark hair; "
+        "Man A: 40-year-old adult man, average figure, tan skin, short dark hair"
+    )
+    entries = [
+        ("Woman A", "25-year-old adult woman, average figure, warm skin, dark hair"),
+        ("Man A", "40-year-old adult man, average figure, tan skin, short dark hair"),
+    ]
+    _expect(krea_cast.cast_entries(descriptor) == entries, "Shared cast entry parser changed")
+    _expect(caption_naturalizer._cast_entries(descriptor) == entries, "Caption cast parser should delegate to shared cast policy")
+    _expect(krea_cast.cast_labels(descriptor) == ["Woman A", "Man A"], "Shared cast label parser changed")
+    _expect(
+        caption_naturalizer._cast_labels(descriptor) == krea_cast.cast_labels(descriptor),
+        "Caption cast labels should delegate to shared cast policy",
+    )
+    natural = krea_cast.natural_cast_descriptor_text(descriptor)
+    _expect(natural.startswith("A 25-year-old adult woman"), "Shared natural cast descriptor text changed")
+    _expect(caption_naturalizer._natural_cast_descriptor_text(descriptor) == natural, "Caption cast descriptor text should delegate")
+    _expect(
+        krea_cast.natural_label_text("Woman A faces Man A.", ["Woman A", "Man A"]) == "The woman faces the man.",
+        "Krea natural label text should keep sentence capitalization",
+    )
+    _expect(
+        caption_naturalizer._natural_label_text("Woman A faces Man A.", ["Woman A", "Man A"]) == "the woman faces the man.",
+        "Caption natural label text should preserve previous lowercase inline behavior",
+    )
+
+
 def smoke_sdxl_presets_policy() -> None:
     _expect(
         sdxl_formatter.SDXL_STYLE_PRESETS is sdxl_presets.SDXL_STYLE_PRESETS,
@@ -2912,6 +2942,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
     ("character_profile_policy", smoke_character_profile_policy),
     ("row_normalization_policy", smoke_row_normalization_policy),
     ("formatter_input_policy", smoke_formatter_input_policy),
+    ("formatter_cast_policy", smoke_formatter_cast_policy),
     ("sdxl_presets_policy", smoke_sdxl_presets_policy),
     ("hardcore_position_config_policy", smoke_hardcore_position_config_policy),
     ("category_library_route", smoke_category_library_route),