from __future__ import annotations import re from typing import Any def _clean(value: Any) -> str: text = "" if value is None else str(value) text = text.replace("\n", " ") text = re.sub(r"\s+", " ", text).strip() text = re.sub(r"\s+([,.;:])", r"\1", text) return text def _with_indefinite_article(text: str) -> str: text = _clean(text) if not text or text.lower().startswith(("a ", "an ")): return text article = "an" if text[:1].lower() in "aeiou" else "a" return f"{article} {text}" def prompt_cast_descriptors(text: str) -> str: return _clean(text).replace("Woman A / primary creator:", "Woman A:") def cast_entries(text: str) -> list[tuple[str, str]]: text = prompt_cast_descriptors(text) entries: list[tuple[str, str]] = [] for part in text.split(";"): part = _clean(part) match = re.match(r"^((?:Woman|Man) [A-Z]):\s*(.+)$", part) if match: entries.append((match.group(1), _clean(match.group(2)))) return entries def label_join(labels: list[str]) -> str: labels = [_clean(label) for label in labels if _clean(label)] if not labels: return "the named adults" if set(labels) == {"Woman A", "Man A"}: return "the woman and man" if len(labels) == 1: if labels[0] == "Woman A": return "the woman" if labels[0] == "Man A": return "the man" return labels[0] if len(labels) == 2: return f"{labels[0]} and {labels[1]}" return f"{', '.join(labels[:-1])}, and {labels[-1]}" def natural_label_text(text: Any, labels: list[str]) -> str: text = _clean(text) if not text: return "" if set(labels) == {"Woman A", "Man A"}: text = re.sub(r"\bWoman A\b", "the woman", text) text = re.sub(r"\bMan A\b", "the man", text) elif labels == ["Woman A"]: text = re.sub(r"\bWoman A\b", "the woman", text) elif labels == ["Man A"]: text = re.sub(r"\bMan A\b", "the man", text) text = re.sub( r"(^|[.!?]\s+)(the woman|the man)\b", lambda match: match.group(1) + match.group(2).capitalize(), text, flags=re.IGNORECASE, ) return text def lowercase_for_inline_join(text: str) -> str: return re.sub( r"^(The woman|The man|The viewer|The named adults)\b", lambda match: match.group(1).lower(), _clean(text), flags=re.IGNORECASE, ) def cast_prose( text: str, central_label: str = "Woman A", omit_labels: list[str] | set[str] | tuple[str, ...] = (), ) -> tuple[str, list[str]]: raw_entries = cast_entries(text) omitted = set(omit_labels or []) entries = [(label, descriptor) for label, descriptor in raw_entries if label not in omitted] if raw_entries and not entries: return "", [] if not entries: return (f"{central_label} is {_clean(text)}" if _clean(text) else "", []) labels = [label for label, _descriptor in entries] if labels == ["Woman A"]: return _with_indefinite_article(entries[0][1]), labels if labels == ["Man A"]: return _with_indefinite_article(entries[0][1]), labels if set(labels) == {"Woman A", "Man A"} and len(labels) == 2: by_label = {label: descriptor for label, descriptor in entries} return f"{_with_indefinite_article(by_label['Woman A'])} alongside {_with_indefinite_article(by_label['Man A'])}", labels sentences = [] for label, descriptor in entries: sentences.append(f"{label} is {descriptor}.") if central_label in labels: sentences.append(f"{central_label} is the central subject.") return " ".join(sentences), labels