From 64887a27506093e993c9d61691b514ff40f867dc Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sat, 27 Jun 2026 01:30:00 +0200 Subject: [PATCH] Share formatter cast descriptor policy --- caption_naturalizer.py | 38 ++++------------- docs/prompt-architecture-improvement-plan.md | 7 ++-- docs/prompt-pool-routing-map.md | 1 + krea_cast.py | 43 ++++++++++++++------ tools/prompt_smoke.py | 31 ++++++++++++++ 5 files changed, 74 insertions(+), 46 deletions(-) diff --git a/caption_naturalizer.py b/caption_naturalizer.py index 3bcb1f0..f06bb37 100644 --- a/caption_naturalizer.py +++ b/caption_naturalizer.py @@ -6,10 +6,12 @@ from typing import Any try: from . import formatter_input as input_policy from .hardcore_action_metadata import normalize_hardcore_action_family + from . import krea_cast as cast_policy from .prompt_hygiene import sanitize_prose_text except ImportError: # Allows local smoke tests with `python -c`. import formatter_input as input_policy from hardcore_action_metadata import normalize_hardcore_action_family + import krea_cast as cast_policy from prompt_hygiene import sanitize_prose_text @@ -132,49 +134,23 @@ def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> def _prompt_cast_descriptors(text: str) -> str: - return _clean_text(text).replace("Woman A / primary creator:", "Woman A:") + return cast_policy.prompt_cast_descriptors(text) def _cast_entries(text: str) -> list[tuple[str, str]]: - text = _prompt_cast_descriptors(text) - entries: list[tuple[str, str]] = [] - for part in text.split(";"): - part = _clean_text(part) - match = re.match(r"^((?:Woman|Man) [A-Z]):\s*(.+)$", part) - if match: - entries.append((match.group(1), _clean_text(match.group(2)))) - return entries + return cast_policy.cast_entries(text) def _natural_cast_descriptor_text(text: str) -> str: - entries = _cast_entries(text) - if not entries: - return _clean_text(text) - labels = [label for label, _descriptor in entries] - if labels == ["Woman A"] or labels == ["Man A"]: - return f"A {entries[0][1]}" - if set(labels) == {"Woman A", "Man A"} and len(labels) == 2: - by_label = {label: descriptor for label, descriptor in entries} - return f"A {by_label['Woman A']} alongside a {by_label['Man A']}" - return " ".join(f"{label} is {descriptor}." for label, descriptor in entries) + return cast_policy.natural_cast_descriptor_text(text) def _cast_labels(text: str) -> list[str]: - return [label for label, _descriptor in _cast_entries(text)] + return cast_policy.cast_labels(text) def _natural_label_text(text: Any, labels: list[str]) -> str: - text = _clean_text(text) - if not text: - return "" - if set(labels) == {"Woman A", "Man A"}: - text = re.sub(r"\bWoman A\b", "the woman", text) - text = re.sub(r"\bMan A\b", "the man", text) - elif labels == ["Woman A"]: - text = re.sub(r"\bWoman A\b", "the woman", text) - elif labels == ["Man A"]: - text = re.sub(r"\bMan A\b", "the man", text) - return text + return cast_policy.natural_label_text(text, labels, capitalize_sentence_starts=False) def _strip_style_tail(text: str) -> str: diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md index 70c3c64..525d3a9 100644 --- a/docs/prompt-architecture-improvement-plan.md +++ b/docs/prompt-architecture-improvement-plan.md @@ -234,8 +234,9 @@ Keep here: Already isolated: -- `krea_cast.py` owns cast descriptor parsing, cast prose, label joining, and - natural label replacement for formatter routes. +- `krea_cast.py` owns cast descriptor parsing, cast labels, cast prose, label + joining, natural cast descriptor text, and label replacement for formatter + routes, including the caption naturalizer's cast metadata path. - `krea_clothing.py` owns clothing-state cleanup and action-aware body-access wording for formatter routes. - `krea_action_context.py` owns shared action-family predicates, axis context @@ -302,10 +303,10 @@ Keep here: - style-tail policy. - metadata-family action labels from `action_family` and `position_family`. - shared formatter input parsing from `formatter_input.py`. +- shared cast descriptor parsing and label replacement from `krea_cast.py`. Improve later: -- share more metadata readers with Krea without sharing Krea prose; - add a `caption_profile` option for concise/dense LoRA caption styles. ### Category JSON Path diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index 4fee002..bf93ab7 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -92,6 +92,7 @@ Core helper ownership: | `hardcore_role_climax.py` | Climax and ejaculation aftermath role graph wording for face/body/ass, lap, open-thigh, side-lying, and group front/back placement. | | `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. | | `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. | +| `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. | | `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. | | `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. | | `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. | diff --git a/krea_cast.py b/krea_cast.py index 3463d2d..50a4172 100644 --- a/krea_cast.py +++ b/krea_cast.py @@ -3,13 +3,14 @@ from __future__ import annotations import re from typing import Any +try: + from . import formatter_input as input_policy +except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`. + import formatter_input as input_policy + def _clean(value: Any) -> str: - text = "" if value is None else str(value) - text = text.replace("\n", " ") - text = re.sub(r"\s+", " ", text).strip() - text = re.sub(r"\s+([,.;:])", r"\1", text) - return text + return input_policy.clean_text(value) def _with_indefinite_article(text: str) -> str: @@ -35,6 +36,23 @@ def cast_entries(text: str) -> list[tuple[str, str]]: return entries +def cast_labels(text: str) -> list[str]: + return [label for label, _descriptor in cast_entries(text)] + + +def natural_cast_descriptor_text(text: str) -> str: + entries = cast_entries(text) + if not entries: + return _clean(text) + labels = [label for label, _descriptor in entries] + if labels == ["Woman A"] or labels == ["Man A"]: + return f"A {entries[0][1]}" + if set(labels) == {"Woman A", "Man A"} and len(labels) == 2: + by_label = {label: descriptor for label, descriptor in entries} + return f"A {by_label['Woman A']} alongside a {by_label['Man A']}" + return " ".join(f"{label} is {descriptor}." for label, descriptor in entries) + + def label_join(labels: list[str]) -> str: labels = [_clean(label) for label in labels if _clean(label)] if not labels: @@ -52,7 +70,7 @@ def label_join(labels: list[str]) -> str: return f"{', '.join(labels[:-1])}, and {labels[-1]}" -def natural_label_text(text: Any, labels: list[str]) -> str: +def natural_label_text(text: Any, labels: list[str], *, capitalize_sentence_starts: bool = True) -> str: text = _clean(text) if not text: return "" @@ -63,12 +81,13 @@ def natural_label_text(text: Any, labels: list[str]) -> str: text = re.sub(r"\bWoman A\b", "the woman", text) elif labels == ["Man A"]: text = re.sub(r"\bMan A\b", "the man", text) - text = re.sub( - r"(^|[.!?]\s+)(the woman|the man)\b", - lambda match: match.group(1) + match.group(2).capitalize(), - text, - flags=re.IGNORECASE, - ) + if capitalize_sentence_starts: + text = re.sub( + r"(^|[.!?]\s+)(the woman|the man)\b", + lambda match: match.group(1) + match.group(2).capitalize(), + text, + flags=re.IGNORECASE, + ) return text diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index b529d6f..9607bbe 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -33,6 +33,7 @@ import formatter_input # noqa: E402 import hardcore_position_config # noqa: E402 import __init__ as sxcp_nodes # noqa: E402 import generation_profile_config # noqa: E402 +import krea_cast # noqa: E402 import krea_formatter # noqa: E402 import location_config # noqa: E402 import prompt_builder as pb # noqa: E402 @@ -900,6 +901,35 @@ def smoke_formatter_input_policy() -> None: _expect_text("formatter_input.caption", caption, 20) +def smoke_formatter_cast_policy() -> None: + descriptor = ( + "Woman A / primary creator: 25-year-old adult woman, average figure, warm skin, dark hair; " + "Man A: 40-year-old adult man, average figure, tan skin, short dark hair" + ) + entries = [ + ("Woman A", "25-year-old adult woman, average figure, warm skin, dark hair"), + ("Man A", "40-year-old adult man, average figure, tan skin, short dark hair"), + ] + _expect(krea_cast.cast_entries(descriptor) == entries, "Shared cast entry parser changed") + _expect(caption_naturalizer._cast_entries(descriptor) == entries, "Caption cast parser should delegate to shared cast policy") + _expect(krea_cast.cast_labels(descriptor) == ["Woman A", "Man A"], "Shared cast label parser changed") + _expect( + caption_naturalizer._cast_labels(descriptor) == krea_cast.cast_labels(descriptor), + "Caption cast labels should delegate to shared cast policy", + ) + natural = krea_cast.natural_cast_descriptor_text(descriptor) + _expect(natural.startswith("A 25-year-old adult woman"), "Shared natural cast descriptor text changed") + _expect(caption_naturalizer._natural_cast_descriptor_text(descriptor) == natural, "Caption cast descriptor text should delegate") + _expect( + krea_cast.natural_label_text("Woman A faces Man A.", ["Woman A", "Man A"]) == "The woman faces the man.", + "Krea natural label text should keep sentence capitalization", + ) + _expect( + caption_naturalizer._natural_label_text("Woman A faces Man A.", ["Woman A", "Man A"]) == "the woman faces the man.", + "Caption natural label text should preserve previous lowercase inline behavior", + ) + + def smoke_sdxl_presets_policy() -> None: _expect( sdxl_formatter.SDXL_STYLE_PRESETS is sdxl_presets.SDXL_STYLE_PRESETS, @@ -2912,6 +2942,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [ ("character_profile_policy", smoke_character_profile_policy), ("row_normalization_policy", smoke_row_normalization_policy), ("formatter_input_policy", smoke_formatter_input_policy), + ("formatter_cast_policy", smoke_formatter_cast_policy), ("sdxl_presets_policy", smoke_sdxl_presets_policy), ("hardcore_position_config_policy", smoke_hardcore_position_config_policy), ("category_library_route", smoke_category_library_route),