from __future__ import annotations import re from typing import Any, Callable try: from . import caption_metadata_routes from . import caption_policy from . import formatter_input as input_policy from . import item_axis_policy from . import krea_cast as cast_policy from . import route_metadata as route_metadata_policy from . import softcore_text_policy except ImportError: # Allows local smoke tests with `python -c`. import caption_metadata_routes import caption_policy import formatter_input as input_policy import item_axis_policy import krea_cast as cast_policy import route_metadata as route_metadata_policy import softcore_text_policy OLD_TRIGGER = caption_policy.OLD_TRIGGER DEFAULT_TRIGGER = caption_policy.DEFAULT_TRIGGER PROMPT_FIELD_LABELS = input_policy.prompt_field_labels() ITEM_LABELS = caption_policy.ITEM_LABELS def clean_text(value: Any) -> str: return input_policy.clean_text(value) def is_false(value: Any) -> bool: if isinstance(value, bool): return value is False if isinstance(value, str): return value.strip().lower() in ("false", "0", "no", "off") return False def expression_disabled(row: dict[str, Any]) -> bool: return bool(row.get("expression_disabled")) or is_false(row.get("expression_enabled", True)) def cap_first(text: str) -> str: text = clean_text(text).strip(" ,") return text[:1].upper() + text[1:] if text else "" def article(noun_phrase: str) -> str: word = noun_phrase.lstrip().lower() if word.startswith("hour") or word[:1] in "aeiou": return "an" return "a" def sentence(text: str) -> str: text = clean_text(text).strip(" ,;") if not text: return "" if text[-1] not in ".!?": text += "." return cap_first(text) def join_sentences(parts: list[str]) -> str: return " ".join(part for part in (sentence(part) for part in parts) if part) def formatter_hint_parts(row: dict[str, Any]) -> list[str]: hints: list[str] = [] if not isinstance(row, dict): return hints for hint in route_metadata_policy.row_formatter_hints(row, "caption"): hint = clean_text(hint).strip(" .") if hint and hint not in hints: hints.append(hint) return hints def append_formatter_hints(prose: str, row: dict[str, Any]) -> str: hints = formatter_hint_parts(row) if not hints: return prose return join_sentences([prose, *hints]) def human_join(parts: list[str]) -> str: parts = [part for part in (clean_text(part) for part in parts) if part] if len(parts) <= 1: return "".join(parts) if len(parts) == 2: return f"{parts[0]} and {parts[1]}" return f"{', '.join(parts[:-1])}, and {parts[-1]}" def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str: return caption_policy.metadata_action_label(row, default) def item_axis_detail_text(row: dict[str, Any], existing_text: str = "") -> str: details = item_axis_policy.row_axis_value_texts( row, skip_keys=item_axis_policy.METADATA_AXIS_KEYS, existing_text=existing_text, ) return human_join(details) def prompt_cast_descriptors(text: str) -> str: return cast_policy.prompt_cast_descriptors(text) def cast_entries(text: str) -> list[tuple[str, str]]: return cast_policy.cast_entries(text) def natural_cast_descriptor_text(text: str) -> str: return cast_policy.natural_cast_descriptor_text(text) def cast_labels(text: str) -> list[str]: return cast_policy.cast_labels(text) def natural_label_text(text: Any, labels: list[str]) -> str: return cast_policy.natural_label_text(text, labels, capitalize_sentence_starts=False) def strip_style_tail(text: str) -> str: return caption_policy.strip_style_tail(text) def remove_trigger(text: str, trigger: str) -> str: return input_policy.strip_trigger_prefix( text, (trigger, OLD_TRIGGER, DEFAULT_TRIGGER), remove_exact=True, ) def with_trigger(text: str, trigger: str, include_trigger: bool) -> str: text = join_sentences([text]) if "." not in text else clean_text(text) trigger = clean_text(trigger or DEFAULT_TRIGGER) if not include_trigger or not trigger: return text if text.lower().startswith(trigger.lower() + "."): return text return f"{trigger}. {text}" def prompt_field(text: str, label: str) -> str: return input_policy.prompt_field(text, label, field_labels=PROMPT_FIELD_LABELS) def row_value(row: dict[str, Any], key: str, labels: tuple[str, ...] = ()) -> str: return input_policy.row_value(row, key, labels, field_labels=PROMPT_FIELD_LABELS) def field_row_value(row: dict[str, Any], key: str) -> str: return row_value(row, key) def field_from_any_prompt(text: str, labels: tuple[str, ...]) -> str: for label in labels: value = input_policy.prompt_field(text, label, field_labels=PROMPT_FIELD_LABELS) if value: return value return "" def normalize_composition(text: str) -> str: return caption_policy.normalize_composition(text) def clean_clothing(text: str) -> str: return caption_policy.clean_clothing(text) def body_phrase(body: Any, figure_note: Any = "") -> str: body = clean_text(body) figure_note = clean_text(figure_note) if not body: return figure_note if not figure_note: return f"{body} figure" if "figure" in figure_note.lower(): return f"{body} build and {figure_note}" return f"{body} figure with {figure_note}" def single_caption_front(row: dict[str, Any]) -> dict[str, str]: caption = clean_text(row.get("caption")) if not caption: return {} caption = remove_trigger(strip_style_tail(caption), clean_text(row.get("trigger")) or DEFAULT_TRIGGER) caption = remove_trigger(caption, OLD_TRIGGER) subject = clean_text(row.get("primary_subject")) age = clean_text(row.get("age_band") or row.get("age")) phrase = clean_text(row.get("body_phrase")) if not phrase: body = clean_text(row.get("body_type") or row.get("body")) figure = clean_text(row.get("figure")) phrase = body_phrase(body, figure) front = f"{subject}, {age}, {phrase}, " if subject in ("woman", "man") and age and phrase and caption.startswith(front): try: skin, hair, eyes, _rest = caption[len(front) :].split(", ", 3) except ValueError: return {} else: pieces = [piece.strip() for piece in caption.split(", ", 6)] if len(pieces) < 7: return {} subject, age, phrase, skin, hair, eyes, _rest = pieces if subject not in ("woman", "man"): return {} return { "caption_subject": subject, "caption_age": age, "caption_body_phrase": phrase, "caption_skin": skin, "caption_hair": hair, "caption_eyes": eyes, } def pose_clause(pose: str) -> str: pose = clean_text(pose) if not pose: return "" first = pose.split(None, 1)[0].lower() if first.endswith("ing") or first in ("seated", "reclined", "posed"): return pose return f"posing in {pose}" def age_subject(age: str, subject: str) -> str: age = clean_text(age) subject = clean_text(subject) or "person" if not age: return f"An adult {subject}" clean_age = re.sub(r"\s+adults?$", "", age).strip() if "year-old" in clean_age: return f"A {clean_age} adult {subject}" if re.search(r"\d", clean_age): poss = "her" if subject == "woman" else "his" return f"An adult {subject} in {poss} {clean_age}" return f"An adult {clean_age} {subject}" def clean_age_phrase(age: str) -> str: age = clean_text(age) age = re.sub(r"\s+adults?$", "", age).strip() return age.replace("-year-old", " years old") def subject_phrase_from_counts(row: dict[str, Any]) -> str: subject = clean_text(row.get("subject_phrase")) if subject: return subject try: women = int(row.get("women_count") or 0) men = int(row.get("men_count") or 0) except (TypeError, ValueError): return clean_text(row.get("primary_subject")) or "adult scene" parts = [] if women: parts.append(f"{women} adult {'woman' if women == 1 else 'women'}") if men: parts.append(f"{men} adult {'man' if men == 1 else 'men'}") if not parts: return clean_text(row.get("primary_subject")) or "adult scene" return " and ".join(parts) def verb_for_row(row: dict[str, Any]) -> str: try: return "is" if int(row.get("person_count") or 0) == 1 else "are" except (TypeError, ValueError): return "are" def detail_allows(level: str, dense_only: bool = False) -> bool: return caption_policy.detail_allows(level, dense_only=dense_only) def metadata_route_dependencies( metadata_to_prose: Callable[..., tuple[str, str]], ) -> caption_metadata_routes.CaptionMetadataRouteDependencies: return caption_metadata_routes.CaptionMetadataRouteDependencies( item_labels=ITEM_LABELS, clean_text=clean_text, row_value=row_value, field_row_value=field_row_value, clean_clothing=clean_clothing, normalize_composition=normalize_composition, expression_disabled=expression_disabled, detail_allows=detail_allows, join_sentences=join_sentences, human_join=human_join, article=article, cap_first=cap_first, body_phrase=body_phrase, single_caption_front=single_caption_front, pose_clause=pose_clause, age_subject=age_subject, clean_age_phrase=clean_age_phrase, subject_phrase_from_counts=subject_phrase_from_counts, verb_for_row=verb_for_row, metadata_action_label=metadata_action_label, item_axis_detail_text=item_axis_detail_text, natural_cast_descriptor_text=natural_cast_descriptor_text, cast_labels=cast_labels, natural_label_text=natural_label_text, softcore_caption_setup_phrase=softcore_text_policy.softcore_caption_setup_phrase, metadata_to_prose=metadata_to_prose, )