Extract Krea cast and hardcore cleanup helpers

This commit is contained in:
2026-06-26 15:24:19 +02:00
parent a4a8a7a28e
commit 92469daf03
6 changed files with 242 additions and 225 deletions
+22 -155
View File
@@ -5,8 +5,30 @@ import re
from typing import Any
try:
from .hardcore_text_cleanup import (
sanitize_hardcore_axis_values as _sanitize_hardcore_axis_values,
sanitize_hardcore_environment_anchors as _sanitize_hardcore_environment_anchors,
)
from .krea_cast import (
cast_prose as _cast_prose,
label_join as _label_join,
lowercase_for_inline_join as _lowercase_for_inline_join,
natural_label_text as _natural_label_text,
prompt_cast_descriptors as _prompt_cast_descriptors,
)
from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text
except ImportError: # Allows local smoke tests with `python -c`.
from hardcore_text_cleanup import (
sanitize_hardcore_axis_values as _sanitize_hardcore_axis_values,
sanitize_hardcore_environment_anchors as _sanitize_hardcore_environment_anchors,
)
from krea_cast import (
cast_prose as _cast_prose,
label_join as _label_join,
lowercase_for_inline_join as _lowercase_for_inline_join,
natural_label_text as _natural_label_text,
prompt_cast_descriptors as _prompt_cast_descriptors,
)
from prompt_hygiene import sanitize_negative_text, sanitize_prose_text
@@ -46,72 +68,6 @@ def _clean(value: Any) -> str:
return text
HARDCORE_ENVIRONMENT_ANCHOR_REPLACEMENTS = (
(r"\bon against a wall\b", "against a wall"),
(r"\bstacked bodies on the bed\b", "close body alignment"),
(r"\bstacked bodies with close body alignment\b", "close body alignment"),
(r"\boverhead tangled-body anal frame\b", "overhead rear-entry anal frame"),
(r"\btangled-body\b", "close-body"),
(r"\bthree bodies tangled on the bed\b", "three bodies tangled in close contact"),
(r"\ba triangle of bodies on the mattress\b", "a triangle of bodies in close contact"),
(r"\bbodies tangled on the sheets\b", "bodies tangled in close contact"),
(r"\bwet bodies tangled on sheets\b", "wet bodies tangled in close contact"),
(r"\bbody arched on rumpled sheets\b", "body arched with clear skin contact"),
(r"\bface-down ass-up on the mattress\b", "face-down ass-up position"),
(r"\bsitting on the edge of the bed\b", "sitting on a raised edge"),
(r"\blying at the bed edge with thighs open\b", "lying near a raised edge with thighs open"),
(r"\bedge[- ]of[- ]bed\b", "edge-supported"),
(r"\bbed[- ]edge\b", "raised edge"),
(r"\bedge of (?:the )?bed\b", "raised edge"),
(r"\bbed edge\b", "raised edge"),
(r"\bhands? braced on the bed\b", "hands braced beside the body"),
(r"\bone hand pressing into the mattress\b", "one hand braced beside the body"),
(r"\bone foot planted on the bed\b", "one foot planted for leverage"),
(r"\bfingers gripping sheets and skin\b", "fingers gripping skin"),
(r"\bfingers gripping sheets\b", "fingers gripping skin"),
(r"\bhands gripping sheets\b", "hands gripping skin"),
(r"\bone hand gripping the sheets\b", "one hand gripping skin"),
(r"\brumpled bed sheets\b", "wrinkled body-contact fabric"),
(r"\bwet sheets beneath the bodies\b", "visible wetness beneath the bodies"),
(r"\bsexual fluids on sheets\b", "sexual fluids visible on skin"),
(r"\bcum dripping onto sheets\b", "cum visible on skin"),
(r"\bfluid dripping onto sheets\b", "fluid visible on skin"),
(r"\bsquirting fluid on the sheets\b", "squirting fluid visible on skin"),
(r"\bsoft sheets\b", "soft fabric"),
(r"\bsilk sheets\b", "silk fabric"),
(r"\bsheets\b", "fabric"),
(r"\bmattress\b", "low support surface"),
(r"\ba low support surface\b", "a low body support"),
(r"\ba low mattress\b", "a low body support"),
(r"\ba wide couch\b", "a wide body support"),
(r"\bwide couch\b", "wide body support"),
(r"\bcouch\b", "body support"),
(r"\bsofa\b", "body support"),
(r"\bon the bed\b", "on a body support"),
(r"\bon a bed\b", "on a body support"),
(r"\bbedroom-floor\b", "floor-level"),
(r"\bbedroom floor\b", "floor-level"),
)
def _sanitize_hardcore_environment_anchors(value: Any) -> str:
text = _clean(value)
if not text:
return ""
for pattern, replacement in HARDCORE_ENVIRONMENT_ANCHOR_REPLACEMENTS:
text = re.sub(pattern, replacement, text, flags=re.IGNORECASE)
text = re.sub(r"\s+,", ",", text)
text = re.sub(r",\s*,", ",", text)
text = re.sub(r"\s{2,}", " ", text)
return text.strip()
def _sanitize_hardcore_axis_values(values: Any) -> dict[str, str]:
if not isinstance(values, dict):
return {}
return {str(key): _sanitize_hardcore_environment_anchors(value) for key, value in values.items()}
def _is_false(value: Any) -> bool:
if isinstance(value, bool):
return value is False
@@ -255,95 +211,6 @@ def _combine_negative(*parts: str) -> str:
return ", ".join(cleaned)
def _prompt_cast_descriptors(text: str) -> str:
return _clean(text).replace("Woman A / primary creator:", "Woman A:")
def _cast_entries(text: str) -> list[tuple[str, str]]:
text = _prompt_cast_descriptors(text)
entries: list[tuple[str, str]] = []
for part in text.split(";"):
part = _clean(part)
match = re.match(r"^((?:Woman|Man) [A-Z]):\s*(.+)$", part)
if match:
entries.append((match.group(1), _clean(match.group(2))))
return entries
def _label_join(labels: list[str]) -> str:
labels = [_clean(label) for label in labels if _clean(label)]
if not labels:
return "the named adults"
if set(labels) == {"Woman A", "Man A"}:
return "the woman and man"
if len(labels) == 1:
if labels[0] == "Woman A":
return "the woman"
if labels[0] == "Man A":
return "the man"
return labels[0]
if len(labels) == 2:
return f"{labels[0]} and {labels[1]}"
return f"{', '.join(labels[:-1])}, and {labels[-1]}"
def _natural_label_text(text: Any, labels: list[str]) -> str:
text = _clean(text)
if not text:
return ""
if set(labels) == {"Woman A", "Man A"}:
text = re.sub(r"\bWoman A\b", "the woman", text)
text = re.sub(r"\bMan A\b", "the man", text)
elif labels == ["Woman A"]:
text = re.sub(r"\bWoman A\b", "the woman", text)
elif labels == ["Man A"]:
text = re.sub(r"\bMan A\b", "the man", text)
text = re.sub(
r"(^|[.!?]\s+)(the woman|the man)\b",
lambda match: match.group(1) + match.group(2).capitalize(),
text,
flags=re.IGNORECASE,
)
return text
def _lowercase_for_inline_join(text: str) -> str:
return re.sub(
r"^(The woman|The man|The viewer|The named adults)\b",
lambda match: match.group(1).lower(),
_clean(text),
flags=re.IGNORECASE,
)
def _cast_prose(
text: str,
central_label: str = "Woman A",
omit_labels: list[str] | set[str] | tuple[str, ...] = (),
) -> tuple[str, list[str]]:
raw_entries = _cast_entries(text)
omitted = set(omit_labels or [])
entries = [(label, descriptor) for label, descriptor in raw_entries if label not in omitted]
if raw_entries and not entries:
return "", []
if not entries:
return (f"{central_label} is {_clean(text)}" if _clean(text) else "", [])
labels = [label for label, _descriptor in entries]
if labels == ["Woman A"]:
return _with_indefinite_article(entries[0][1]), labels
if labels == ["Man A"]:
return _with_indefinite_article(entries[0][1]), labels
if set(labels) == {"Woman A", "Man A"} and len(labels) == 2:
by_label = {label: descriptor for label, descriptor in entries}
return f"{_with_indefinite_article(by_label['Woman A'])} alongside {_with_indefinite_article(by_label['Man A'])}", labels
sentences = []
for label, descriptor in entries:
sentences.append(f"{label} is {descriptor}.")
if central_label in labels:
sentences.append(f"{central_label} is the central subject.")
return " ".join(sentences), labels
def _pov_labels_from_value(value: Any) -> list[str]:
labels: list[str] = []
if isinstance(value, list):