from __future__ import annotations import re from typing import Any try: from . import caption_policy from . import formatter_input as input_policy from . import krea_cast as cast_policy from . import route_metadata as route_metadata_policy from .prompt_hygiene import sanitize_prose_text except ImportError: # Allows local smoke tests with `python -c`. import caption_policy import formatter_input as input_policy import krea_cast as cast_policy import route_metadata as route_metadata_policy from prompt_hygiene import sanitize_prose_text OLD_TRIGGER = caption_policy.OLD_TRIGGER DEFAULT_TRIGGER = caption_policy.DEFAULT_TRIGGER STYLE_TAILS = caption_policy.STYLE_TAILS PROMPT_FIELD_LABELS = input_policy.prompt_field_labels() ITEM_LABELS = caption_policy.ITEM_LABELS ACTION_FAMILY_CAPTION_LABELS = caption_policy.ACTION_FAMILY_CAPTION_LABELS POSITION_FAMILY_CAPTION_LABELS = caption_policy.POSITION_FAMILY_CAPTION_LABELS def _clean_text(value: Any) -> str: return input_policy.clean_text(value) def _is_false(value: Any) -> bool: if isinstance(value, bool): return value is False if isinstance(value, str): return value.strip().lower() in ("false", "0", "no", "off") return False def _expression_disabled(row: dict[str, Any]) -> bool: return bool(row.get("expression_disabled")) or _is_false(row.get("expression_enabled", True)) def _cap_first(text: str) -> str: text = _clean_text(text).strip(" ,") return text[:1].upper() + text[1:] if text else "" def _article(noun_phrase: str) -> str: word = noun_phrase.lstrip().lower() if word.startswith("hour") or word[:1] in "aeiou": return "an" return "a" def _sentence(text: str) -> str: text = _clean_text(text).strip(" ,;") if not text: return "" if text[-1] not in ".!?": text += "." return _cap_first(text) def _join_sentences(parts: list[str]) -> str: return " ".join(part for part in (_sentence(part) for part in parts) if part) def _formatter_hint_parts(row: dict[str, Any]) -> list[str]: hints: list[str] = [] if not isinstance(row, dict): return hints for hint in route_metadata_policy.row_formatter_hints(row, "caption"): hint = _clean_text(hint).strip(" .") if hint and hint not in hints: hints.append(hint) return hints def _append_formatter_hints(prose: str, row: dict[str, Any]) -> str: hints = _formatter_hint_parts(row) if not hints: return prose return _join_sentences([prose, *hints]) def _human_join(parts: list[str]) -> str: parts = [part for part in (_clean_text(part) for part in parts) if part] if len(parts) <= 1: return "".join(parts) if len(parts) == 2: return f"{parts[0]} and {parts[1]}" return f"{', '.join(parts[:-1])}, and {parts[-1]}" def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str: return caption_policy.metadata_action_label(row, default) def _prompt_cast_descriptors(text: str) -> str: return cast_policy.prompt_cast_descriptors(text) def _cast_entries(text: str) -> list[tuple[str, str]]: return cast_policy.cast_entries(text) def _natural_cast_descriptor_text(text: str) -> str: return cast_policy.natural_cast_descriptor_text(text) def _cast_labels(text: str) -> list[str]: return cast_policy.cast_labels(text) def _natural_label_text(text: Any, labels: list[str]) -> str: return cast_policy.natural_label_text(text, labels, capitalize_sentence_starts=False) def _strip_style_tail(text: str) -> str: return caption_policy.strip_style_tail(text) def _remove_trigger(text: str, trigger: str) -> str: return input_policy.strip_trigger_prefix( text, (trigger, OLD_TRIGGER, DEFAULT_TRIGGER), remove_exact=True, ) def _with_trigger(text: str, trigger: str, include_trigger: bool) -> str: text = _join_sentences([text]) if "." not in text else _clean_text(text) trigger = _clean_text(trigger or DEFAULT_TRIGGER) if not include_trigger or not trigger: return text if text.lower().startswith(trigger.lower() + "."): return text return f"{trigger}. {text}" def _maybe_json(text: str) -> dict[str, Any] | None: return input_policy.maybe_json(text) def _row_from_inputs(source_text: str, metadata_json: str, input_hint: str) -> tuple[dict[str, Any] | None, str]: return input_policy.row_from_inputs(source_text, metadata_json, input_hint) def _prompt_field(text: str, label: str) -> str: return input_policy.prompt_field(text, label, field_labels=PROMPT_FIELD_LABELS) def _row_value(row: dict[str, Any], key: str, labels: tuple[str, ...] = ()) -> str: return input_policy.row_value(row, key, labels, field_labels=PROMPT_FIELD_LABELS) def _field_from_any_prompt(text: str, labels: tuple[str, ...]) -> str: for label in labels: value = input_policy.prompt_field(text, label, field_labels=PROMPT_FIELD_LABELS) if value: return value return "" def _normalize_composition(text: str) -> str: return caption_policy.normalize_composition(text) def _clean_clothing(text: str) -> str: return caption_policy.clean_clothing(text) def _body_phrase(body: Any, figure_note: Any = "") -> str: body = _clean_text(body) figure_note = _clean_text(figure_note) if not body: return figure_note if not figure_note: return f"{body} figure" if "figure" in figure_note.lower(): return f"{body} build and {figure_note}" return f"{body} figure with {figure_note}" def _single_caption_front(row: dict[str, Any]) -> dict[str, str]: caption = _clean_text(row.get("caption")) if not caption: return {} caption = _remove_trigger(_strip_style_tail(caption), _clean_text(row.get("trigger")) or DEFAULT_TRIGGER) caption = _remove_trigger(caption, OLD_TRIGGER) subject = _clean_text(row.get("primary_subject")) age = _clean_text(row.get("age_band") or row.get("age")) body_phrase = _clean_text(row.get("body_phrase")) if not body_phrase: body = _clean_text(row.get("body_type") or row.get("body")) figure = _clean_text(row.get("figure")) body_phrase = _body_phrase(body, figure) front = f"{subject}, {age}, {body_phrase}, " if subject in ("woman", "man") and age and body_phrase and caption.startswith(front): try: skin, hair, eyes, _rest = caption[len(front) :].split(", ", 3) except ValueError: return {} else: pieces = [piece.strip() for piece in caption.split(", ", 6)] if len(pieces) < 7: return {} subject, age, body_phrase, skin, hair, eyes, _rest = pieces if subject not in ("woman", "man"): return {} return { "caption_subject": subject, "caption_age": age, "caption_body_phrase": body_phrase, "caption_skin": skin, "caption_hair": hair, "caption_eyes": eyes, } def _pose_clause(pose: str) -> str: pose = _clean_text(pose) if not pose: return "" first = pose.split(None, 1)[0].lower() if first.endswith("ing") or first in ("seated", "reclined", "posed"): return pose return f"posing in {pose}" def _age_subject(age: str, subject: str) -> str: age = _clean_text(age) subject = _clean_text(subject) or "person" if not age: return f"An adult {subject}" clean_age = re.sub(r"\s+adults?$", "", age).strip() if "year-old" in clean_age: return f"A {clean_age} adult {subject}" if re.search(r"\d", clean_age): poss = "her" if subject == "woman" else "his" return f"An adult {subject} in {poss} {clean_age}" return f"An adult {clean_age} {subject}" def _clean_age_phrase(age: str) -> str: age = _clean_text(age) age = re.sub(r"\s+adults?$", "", age).strip() return age.replace("-year-old", " years old") def _subject_phrase_from_counts(row: dict[str, Any]) -> str: subject = _clean_text(row.get("subject_phrase")) if subject: return subject try: women = int(row.get("women_count") or 0) men = int(row.get("men_count") or 0) except (TypeError, ValueError): return _clean_text(row.get("primary_subject")) or "adult scene" parts = [] if women: parts.append(f"{women} adult {'woman' if women == 1 else 'women'}") if men: parts.append(f"{men} adult {'man' if men == 1 else 'men'}") if not parts: return _clean_text(row.get("primary_subject")) or "adult scene" return " and ".join(parts) def _verb_for_row(row: dict[str, Any]) -> str: try: return "is" if int(row.get("person_count") or 0) == 1 else "are" except (TypeError, ValueError): return "are" def _detail_allows(level: str, dense_only: bool = False) -> bool: return caption_policy.detail_allows(level, dense_only=dense_only) def _single_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None: subject = _clean_text(row.get("primary_subject") or row.get("subject") or "") if subject not in ("woman", "man"): return None caption_front = _single_caption_front(row) age = _clean_text(row.get("age") or row.get("age_band") or caption_front.get("caption_age") or "") body_phrase = _row_value(row, "body_phrase") or caption_front.get("caption_body_phrase", "") if not body_phrase: body = _clean_text(row.get("body_type") or row.get("body") or "") figure = _clean_text(row.get("figure")) body_phrase = _body_phrase(body, figure) skin = _row_value(row, "skin") or caption_front.get("caption_skin", "") hair = _row_value(row, "hair") or caption_front.get("caption_hair", "") eyes = _row_value(row, "eyes") or caption_front.get("caption_eyes", "") item = _row_value(row, "item", ITEM_LABELS) if item: item = _clean_clothing(item) if not item: item = _clean_clothing(_row_value(row, "clothing", ("Clothing", "Erotic outfit"))) scene = _row_value(row, "scene_text", ("Scene", "Setting")) pose = _row_value(row, "pose", ("Pose",)) expression = "" if _expression_disabled(row) else _row_value(row, "expression", ("Facial expression", "Facial expressions")) composition = _normalize_composition(_row_value(row, "composition", ("Composition",))) camera_scene = _clean_text(row.get("camera_scene_directive")) prop = _row_value(row, "prop", ("Prop/detail",)) style = _row_value(row, "style") if keep_style else "" parts = [] opener = _age_subject(age, subject) appearance_details = [piece for piece in (skin, hair, eyes) if piece] if body_phrase: parts.append(f"{opener} has {_article(body_phrase)} {body_phrase}") elif appearance_details: parts.append(f"{opener} has {_human_join(appearance_details)}") else: parts.append(opener) if body_phrase and appearance_details: parts.append(f"{pronoun(subject)} has {_human_join(appearance_details)}") if item: verb = "wears" if subject == "woman" else "is dressed in" parts.append(f"{pronoun(subject)} {verb} {item}") if prop: parts.append(f"{pronoun(subject)} is {prop}") if pose: parts.append(f"{pronoun(subject)} is {_pose_clause(pose)}") if expression: parts.append(f"{possessive_pronoun(subject)} expression is {expression}") if scene: parts.append(f"The setting is {scene}") if _detail_allows(detail_level) and camera_scene: parts.append(camera_scene) if _detail_allows(detail_level) and composition: parts.append(f"The composition is {composition}") if keep_style and style: parts.append(f"The visual style is {style}") return _join_sentences(parts), "metadata(single)" def pronoun(subject: str) -> str: return "She" if subject == "woman" else "He" def possessive_pronoun(subject: str) -> str: return "Her" if subject == "woman" else "His" def _couple_clothing_sentence(clothing: str) -> str: clothing = _clean_text(clothing) lower = clothing.lower() partner_text = re.sub(r"\bPartner ([AB]) wears\b", r"Partner \1 wearing", clothing) partner_text = re.sub(r"\bPartner ([AB]) has\b", r"Partner \1 with", partner_text) if lower.startswith("partner a "): return f"The outfits show {partner_text}" if lower.startswith(("two ", "paired ", "coordinated ")): return f"The outfits are {partner_text}" return f"They wear {clothing}" def _couple_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None: subject = _clean_text(row.get("subject_phrase") or row.get("primary_subject")) primary = _clean_text(row.get("primary_subject")) if "couple" not in primary and subject not in ("two women", "two men", "a woman and a man"): if not primary.startswith("two ") and " and " not in subject: return None if subject == "woman and man": subject = "a woman and a man" ages = _row_value(row, "age", ("Ages",)) or _clean_text(row.get("age_band")) body = _row_value(row, "body", ("Body types",)) or _clean_text(row.get("body_type")) pose = _row_value(row, "pose", ("Pose",)) pose = pose.replace(", affectionate and flirtatious but non-explicit", "") clothing = _clean_clothing(_row_value(row, "item", ITEM_LABELS) or _row_value(row, "clothing", ("Clothing",))) scene = _row_value(row, "scene_text", ("Scene", "Setting")) expression = "" if not _expression_disabled(row): expression = _row_value(row, "character_expression_text") or _row_value(row, "expression", ("Facial expressions", "Facial expression")) composition = _normalize_composition(_row_value(row, "composition", ("Composition",))) camera_scene = _clean_text(row.get("camera_scene_directive")) style = _row_value(row, "style") if keep_style else "" parts = [f"{_cap_first(subject)} are adults"] if ages: parts.append(f"The age detail is {_clean_age_phrase(ages)}") if body: parts.append(f"Their body types are {body}") if clothing: parts.append(_couple_clothing_sentence(clothing)) if pose: parts.append(f"The pose is {pose}") if scene: parts.append(f"The setting is {scene}") if _detail_allows(detail_level) and camera_scene: parts.append(camera_scene) if expression: parts.append(f"Their expressions are {expression}") if _detail_allows(detail_level) and composition: parts.append(f"The composition is {composition}") if keep_style and style: parts.append(f"The visual style is {style}") return _join_sentences(parts), "metadata(couple)" def _configured_cast_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None: if _clean_text(row.get("subject_type")) != "configured_cast": if "hardcore sexual poses" not in _clean_text(row.get("main_category")).lower(): return None subject = _subject_phrase_from_counts(row) verb = _verb_for_row(row) cast = _row_value(row, "cast_summary", ("Cast",)) role_graph = _row_value(row, "role_graph", ("Role graph",)) item = _row_value(row, "item", ITEM_LABELS) scene = _row_value(row, "scene_text", ("Setting", "Scene")) expression = "" if not _expression_disabled(row): expression = _row_value(row, "character_expression_text") or _row_value(row, "expression", ("Facial expressions", "Facial expression")) composition = _normalize_composition(_row_value(row, "composition", ("Composition",))) camera_scene = _clean_text(row.get("camera_scene_directive")) cast_descriptor_text = _row_value(row, "cast_descriptor_text", ("Characters", "Cast descriptors")) scene_kind = _row_value(row, "scene_kind") or "explicit adult sex scene" style = _row_value(row, "style") if keep_style else "" parts = [f"{_cap_first(subject)} {verb} shown as a consensual {scene_kind}"] if cast_descriptor_text: parts.append(_natural_cast_descriptor_text(cast_descriptor_text)) if cast and not cast_descriptor_text: parts.append(f"The cast is {cast}") if role_graph: parts.append(role_graph) if item: parts.append(f"The {_metadata_action_label(row)} is {item}") scene_bits = [] if scene: scene_bits.append(f"set in {scene}") if expression: scene_bits.append(f"with {expression}") if composition: scene_bits.append(f"framed as {composition}") if scene_bits and _detail_allows(detail_level): parts.append(", ".join(scene_bits)) if _detail_allows(detail_level) and camera_scene: parts.append(camera_scene) if keep_style and style: parts.append(f"The visual style is {style}") return _join_sentences(parts), "metadata(configured_cast)" def _group_or_layout_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None: primary = _clean_text(row.get("primary_subject")) if "group" not in primary and primary != "layout scene": return None subject = _row_value(row, "subject_phrase") or primary age = _row_value(row, "age", ("Ages",)) or _clean_text(row.get("age_band")) item = _clean_clothing(_row_value(row, "item", ITEM_LABELS) or _row_value(row, "clothing", ("Clothing",))) scene = _row_value(row, "scene_text", ("Scene", "Setting")) expression = "" if not _expression_disabled(row): expression = _row_value(row, "character_expression_text") or _row_value(row, "expression", ("Facial expressions", "Facial expression")) composition = _normalize_composition(_row_value(row, "composition", ("Composition",))) camera_scene = _clean_text(row.get("camera_scene_directive")) style = _row_value(row, "style") if keep_style else "" if primary == "layout scene": parts = [f"{_cap_first(subject)} is arranged as an adults-only designed illustration layout"] if expression: parts.append(f"The featured expression is {expression}") else: parts = [f"{_cap_first(subject)} includes adults"] if age: parts[0] += f" ages {age}" if item: parts.append(f"They wear {item}") if expression: parts.append(f"They show {expression}") if scene: parts.append(f"The setting is {scene}") if _detail_allows(detail_level) and camera_scene: parts.append(camera_scene) if _detail_allows(detail_level) and composition: parts.append(f"The composition is {composition}") if keep_style and style: parts.append(f"The visual style is {style}") return _join_sentences(parts), "metadata(group_layout)" def _insta_of_pair_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None: if _clean_text(row.get("mode")).lower() != "insta/of": return None soft_row = row.get("softcore_row") hard_row = row.get("hardcore_row") if not isinstance(soft_row, dict) or not isinstance(hard_row, dict): return None hard_row_for_text = dict(hard_row) options = row.get("options") if isinstance(options, dict) and options.get("continuity") == "same_creator_same_room": if soft_row.get("scene_text"): hard_row_for_text["scene_text"] = soft_row["scene_text"] if soft_row.get("composition"): hard_row_for_text["composition"] = soft_row["composition"] soft_text, _soft_method = _metadata_to_prose(soft_row, detail_level, keep_style) hard_text, _hard_method = _metadata_to_prose(hard_row_for_text, detail_level, keep_style) descriptor = _clean_text(row.get("shared_descriptor")) options = row.get("options") if isinstance(row.get("options"), dict) else {} cast_descriptors = row.get("shared_cast_descriptors") if isinstance(cast_descriptors, list): cast_descriptor_text = "; ".join(_clean_text(item) for item in cast_descriptors if _clean_text(item)) else: cast_descriptor_text = _clean_text(cast_descriptors) labels = _cast_labels(cast_descriptor_text) same_soft_cast = options.get("softcore_cast") == "same_as_hardcore" parts = [] if cast_descriptor_text and same_soft_cast: parts.append(_natural_cast_descriptor_text(cast_descriptor_text)) elif descriptor: parts.append(f"A {descriptor}") if cast_descriptor_text and not same_soft_cast: parts.append(_natural_cast_descriptor_text(cast_descriptor_text)) if same_soft_cast: parts.append("The softcore version keeps the same adult cast present together in a non-explicit teaser setup") partner_styling = row.get("softcore_partner_styling") if isinstance(partner_styling, dict): outfits = partner_styling.get("outfits") if isinstance(outfits, list): outfit_text = _human_join([_clean_text(item) for item in outfits if _clean_text(item)]) outfit_text = _natural_label_text(outfit_text, labels) if outfit_text: parts.append(f"Softcore partner styling: {outfit_text}") pose = _clean_text(partner_styling.get("pose")) if pose: parts.append(f"The shared softcore cast pose is {pose}") if soft_text: parts.append(f"Softcore version: {soft_text}") if hard_text: parts.append(f"Hardcore version: {hard_text}") if not parts: return None return _join_sentences(parts), "metadata(insta_of_pair)" def _metadata_to_prose(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str]: for builder in ( _insta_of_pair_from_row, _configured_cast_from_row, _single_from_row, _couple_from_row, _group_or_layout_from_row, ): result = builder(row, detail_level, keep_style) if result: prose, method = result return _append_formatter_hints(prose, row), method prose, method = _text_to_prose(_clean_text(row.get("caption") or row.get("prompt")), detail_level, keep_style) return _append_formatter_hints(prose, row), method def _prompt_to_prose(text: str, detail_level: str, keep_style: bool) -> tuple[str, str] | None: if ":" not in text: return None cast = _field_from_any_prompt(text, ("Cast",)) item = _field_from_any_prompt(text, ITEM_LABELS) scene = _field_from_any_prompt(text, ("Setting", "Scene")) pose = _field_from_any_prompt(text, ("Pose",)) role_graph = _field_from_any_prompt(text, ("Role graph",)) expression = _field_from_any_prompt(text, ("Facial expressions", "Facial expression")) composition = _normalize_composition(_field_from_any_prompt(text, ("Composition",))) if not any((cast, item, scene, pose, role_graph, expression, composition)): return None subject = _clean_text(text.split(":", 1)[0]) parts = [] if subject: parts.append(f"{_cap_first(subject)}") if cast: parts.append(f"The cast is {cast}") if role_graph: parts.append(role_graph) if item: item_label = "sexual pose" if _field_from_any_prompt(text, ("Sexual pose",)) else "key detail" parts.append(f"The {item_label} is {item}") elif pose: parts.append(f"The pose is {pose}") scene_bits = [] if scene: scene_bits.append(f"set in {scene}") if expression: scene_bits.append(f"with {expression}") if composition: scene_bits.append(f"framed as {composition}") if scene_bits and _detail_allows(detail_level): parts.append(", ".join(scene_bits)) if keep_style: style = _clean_text(text.split(":", 1)[1].split(".", 1)[0]) if style: parts.append(f"The visual style is {style}") return _join_sentences(parts), "prompt(labels)" def _parts_to_sentence(parts: list[str], detail_level: str) -> str: parts = [part for part in (_clean_text(part).strip(" ,.") for part in parts) if part] if not parts: return "" if len(parts) == 1: return _sentence(parts[0]) subject = parts[0] trailing_style = "" if parts[-1].lower().endswith("illustration"): trailing_style = parts.pop() composition = parts[-1] if len(parts) >= 2 else "" scene = parts[-2] if len(parts) >= 3 else "" details = parts[1:-2] if len(parts) >= 3 else parts[1:] sentences = [f"{_cap_first(subject)} includes {', '.join(details)}" if details else _cap_first(subject)] if _detail_allows(detail_level) and scene: sentences.append(f"The setting is {scene}") if _detail_allows(detail_level) and composition: sentences.append(f"The composition is {composition}") if trailing_style and _detail_allows(detail_level, dense_only=True): sentences.append(f"The visual style is {trailing_style}") return _join_sentences(sentences) def _text_to_prose(text: str, detail_level: str, keep_style: bool) -> tuple[str, str]: text = _clean_text(text) prompt_result = _prompt_to_prose(text, detail_level, keep_style) if prompt_result: return prompt_result text = _remove_trigger(_strip_style_tail(text), DEFAULT_TRIGGER) text = _remove_trigger(text, OLD_TRIGGER) parts = [part.strip() for part in text.split(",")] prose = _parts_to_sentence(parts, detail_level) return prose or _sentence(text), "text(fallback)" def naturalize_caption( source_text: str, metadata_json: str = "", input_hint: str = "auto", trigger: str = DEFAULT_TRIGGER, include_trigger: bool = True, detail_level: str = "balanced", style_policy: str = "drop_style_tail", caption_profile: str = caption_policy.CAPTION_PROFILE_DEFAULT, ) -> tuple[str, str]: """Rewrite tag-style prompt/caption text into compact natural language.""" input_hint = input_hint if input_hint in ("auto", "metadata_json", "caption_or_prompt") else "auto" detail_level, style_policy, include_trigger = caption_policy.apply_caption_profile( caption_profile, detail_level=detail_level, style_policy=style_policy, include_trigger=include_trigger, ) keep_style = caption_policy.keep_style_terms(style_policy) row, row_method = _row_from_inputs(source_text, metadata_json, input_hint) if row is not None: prose, method = _metadata_to_prose(row, detail_level, keep_style) caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,)) return caption, f"{row_method}:{method}" prose, method = _text_to_prose(source_text, detail_level, keep_style) caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,)) return caption, method