diff --git a/caption_metadata_routes.py b/caption_metadata_routes.py new file mode 100644 index 0000000..b21e312 --- /dev/null +++ b/caption_metadata_routes.py @@ -0,0 +1,391 @@ +from __future__ import annotations + +import re +from dataclasses import dataclass +from typing import Any, Callable + + +@dataclass(frozen=True) +class CaptionMetadataRouteRequest: + row: dict[str, Any] + detail_level: str + keep_style: bool + + +@dataclass(frozen=True) +class CaptionMetadataRoute: + prose: str + method: str + + def as_tuple(self) -> tuple[str, str]: + return self.prose, self.method + + +@dataclass(frozen=True) +class CaptionMetadataRouteDependencies: + item_labels: tuple[str, ...] + clean_text: Callable[[Any], str] + row_value: Callable[[dict[str, Any], str, tuple[str, ...]], str] + field_row_value: Callable[[dict[str, Any], str], str] + clean_clothing: Callable[[str], str] + normalize_composition: Callable[[str], str] + expression_disabled: Callable[[dict[str, Any]], bool] + detail_allows: Callable[..., bool] + join_sentences: Callable[[list[str]], str] + human_join: Callable[[list[str]], str] + article: Callable[[str], str] + cap_first: Callable[[str], str] + body_phrase: Callable[[Any, Any], str] + single_caption_front: Callable[[dict[str, Any]], dict[str, str]] + pose_clause: Callable[[str], str] + age_subject: Callable[[str, str], str] + clean_age_phrase: Callable[[str], str] + subject_phrase_from_counts: Callable[[dict[str, Any]], str] + verb_for_row: Callable[[dict[str, Any]], str] + metadata_action_label: Callable[[dict[str, Any]], str] + natural_cast_descriptor_text: Callable[[str], str] + cast_labels: Callable[[str], list[str]] + natural_label_text: Callable[[Any, list[str]], str] + metadata_to_prose: Callable[[dict[str, Any], str, bool], tuple[str, str]] + + +def pronoun(subject: str) -> str: + return "She" if subject == "woman" else "He" + + +def possessive_pronoun(subject: str) -> str: + return "Her" if subject == "woman" else "His" + + +def couple_clothing_sentence(clothing: str, clean_text: Callable[[Any], str]) -> str: + clothing = clean_text(clothing) + lower = clothing.lower() + partner_text = re.sub(r"\bPartner ([AB]) wears\b", r"Partner \1 wearing", clothing) + partner_text = re.sub(r"\bPartner ([AB]) has\b", r"Partner \1 with", partner_text) + if lower.startswith("partner a "): + return f"The outfits show {partner_text}" + if lower.startswith(("two ", "paired ", "coordinated ")): + return f"The outfits are {partner_text}" + return f"They wear {clothing}" + + +def single_from_row_result( + request: CaptionMetadataRouteRequest, + deps: CaptionMetadataRouteDependencies, +) -> CaptionMetadataRoute | None: + row = request.row + detail_level = request.detail_level + keep_style = request.keep_style + subject = deps.clean_text(row.get("primary_subject") or row.get("subject") or "") + if subject not in ("woman", "man"): + return None + + caption_front = deps.single_caption_front(row) + age = deps.clean_text(row.get("age") or row.get("age_band") or caption_front.get("caption_age") or "") + body_phrase = deps.field_row_value(row, "body_phrase") or caption_front.get("caption_body_phrase", "") + if not body_phrase: + body = deps.clean_text(row.get("body_type") or row.get("body") or "") + figure = deps.clean_text(row.get("figure")) + body_phrase = deps.body_phrase(body, figure) + + skin = deps.field_row_value(row, "skin") or caption_front.get("caption_skin", "") + hair = deps.field_row_value(row, "hair") or caption_front.get("caption_hair", "") + eyes = deps.field_row_value(row, "eyes") or caption_front.get("caption_eyes", "") + item = deps.row_value(row, "item", deps.item_labels) + if item: + item = deps.clean_clothing(item) + if not item: + item = deps.clean_clothing(deps.row_value(row, "clothing", ("Clothing", "Erotic outfit"))) + scene = deps.row_value(row, "scene_text", ("Scene", "Setting")) + pose = deps.row_value(row, "pose", ("Pose",)) + expression = "" if deps.expression_disabled(row) else deps.row_value(row, "expression", ("Facial expression", "Facial expressions")) + composition = deps.normalize_composition(deps.row_value(row, "composition", ("Composition",))) + camera_scene = deps.clean_text(row.get("camera_scene_directive")) + prop = deps.row_value(row, "prop", ("Prop/detail",)) + style = deps.field_row_value(row, "style") if keep_style else "" + + parts = [] + opener = deps.age_subject(age, subject) + appearance_details = [piece for piece in (skin, hair, eyes) if piece] + if body_phrase: + parts.append(f"{opener} has {deps.article(body_phrase)} {body_phrase}") + elif appearance_details: + parts.append(f"{opener} has {deps.human_join(appearance_details)}") + else: + parts.append(opener) + if body_phrase and appearance_details: + parts.append(f"{pronoun(subject)} has {deps.human_join(appearance_details)}") + if item: + verb = "wears" if subject == "woman" else "is dressed in" + parts.append(f"{pronoun(subject)} {verb} {item}") + if prop: + parts.append(f"{pronoun(subject)} is {prop}") + if pose: + parts.append(f"{pronoun(subject)} is {deps.pose_clause(pose)}") + if expression: + parts.append(f"{possessive_pronoun(subject)} expression is {expression}") + if scene: + parts.append(f"The setting is {scene}") + if deps.detail_allows(detail_level) and camera_scene: + parts.append(camera_scene) + if deps.detail_allows(detail_level) and composition: + parts.append(f"The composition is {composition}") + if keep_style and style: + parts.append(f"The visual style is {style}") + return CaptionMetadataRoute(deps.join_sentences(parts), "metadata(single)") + + +def couple_from_row_result( + request: CaptionMetadataRouteRequest, + deps: CaptionMetadataRouteDependencies, +) -> CaptionMetadataRoute | None: + row = request.row + detail_level = request.detail_level + keep_style = request.keep_style + subject = deps.clean_text(row.get("subject_phrase") or row.get("primary_subject")) + primary = deps.clean_text(row.get("primary_subject")) + if "couple" not in primary and subject not in ("two women", "two men", "a woman and a man"): + if not primary.startswith("two ") and " and " not in subject: + return None + if subject == "woman and man": + subject = "a woman and a man" + + ages = deps.row_value(row, "age", ("Ages",)) or deps.clean_text(row.get("age_band")) + body = deps.row_value(row, "body", ("Body types",)) or deps.clean_text(row.get("body_type")) + pose = deps.row_value(row, "pose", ("Pose",)) + pose = pose.replace(", affectionate and flirtatious but non-explicit", "") + clothing = deps.clean_clothing(deps.row_value(row, "item", deps.item_labels) or deps.row_value(row, "clothing", ("Clothing",))) + scene = deps.row_value(row, "scene_text", ("Scene", "Setting")) + expression = "" + if not deps.expression_disabled(row): + expression = deps.row_value(row, "character_expression_text") or deps.row_value( + row, + "expression", + ("Facial expressions", "Facial expression"), + ) + composition = deps.normalize_composition(deps.row_value(row, "composition", ("Composition",))) + camera_scene = deps.clean_text(row.get("camera_scene_directive")) + style = deps.field_row_value(row, "style") if keep_style else "" + + parts = [f"{deps.cap_first(subject)} are adults"] + if ages: + parts.append(f"The age detail is {deps.clean_age_phrase(ages)}") + if body: + parts.append(f"Their body types are {body}") + if clothing: + parts.append(couple_clothing_sentence(clothing, deps.clean_text)) + if pose: + parts.append(f"The pose is {pose}") + if scene: + parts.append(f"The setting is {scene}") + if deps.detail_allows(detail_level) and camera_scene: + parts.append(camera_scene) + if expression: + parts.append(f"Their expressions are {expression}") + if deps.detail_allows(detail_level) and composition: + parts.append(f"The composition is {composition}") + if keep_style and style: + parts.append(f"The visual style is {style}") + return CaptionMetadataRoute(deps.join_sentences(parts), "metadata(couple)") + + +def configured_cast_from_row_result( + request: CaptionMetadataRouteRequest, + deps: CaptionMetadataRouteDependencies, +) -> CaptionMetadataRoute | None: + row = request.row + detail_level = request.detail_level + keep_style = request.keep_style + if deps.clean_text(row.get("subject_type")) != "configured_cast": + if "hardcore sexual poses" not in deps.clean_text(row.get("main_category")).lower(): + return None + + subject = deps.subject_phrase_from_counts(row) + verb = deps.verb_for_row(row) + cast = deps.row_value(row, "cast_summary", ("Cast",)) + role_graph = deps.row_value(row, "role_graph", ("Role graph",)) + item = deps.row_value(row, "item", deps.item_labels) + scene = deps.row_value(row, "scene_text", ("Setting", "Scene")) + expression = "" + if not deps.expression_disabled(row): + expression = deps.row_value(row, "character_expression_text") or deps.row_value( + row, + "expression", + ("Facial expressions", "Facial expression"), + ) + composition = deps.normalize_composition(deps.row_value(row, "composition", ("Composition",))) + camera_scene = deps.clean_text(row.get("camera_scene_directive")) + cast_descriptor_text = deps.row_value(row, "cast_descriptor_text", ("Characters", "Cast descriptors")) + scene_kind = deps.field_row_value(row, "scene_kind") or "explicit adult sex scene" + style = deps.field_row_value(row, "style") if keep_style else "" + + parts = [f"{deps.cap_first(subject)} {verb} shown as a consensual {scene_kind}"] + if cast_descriptor_text: + parts.append(deps.natural_cast_descriptor_text(cast_descriptor_text)) + if cast and not cast_descriptor_text: + parts.append(f"The cast is {cast}") + if role_graph: + parts.append(role_graph) + if item: + parts.append(f"The {deps.metadata_action_label(row)} is {item}") + scene_bits = [] + if scene: + scene_bits.append(f"set in {scene}") + if expression: + scene_bits.append(f"with {expression}") + if composition: + scene_bits.append(f"framed as {composition}") + if scene_bits and deps.detail_allows(detail_level): + parts.append(", ".join(scene_bits)) + if deps.detail_allows(detail_level) and camera_scene: + parts.append(camera_scene) + if keep_style and style: + parts.append(f"The visual style is {style}") + return CaptionMetadataRoute(deps.join_sentences(parts), "metadata(configured_cast)") + + +def group_or_layout_from_row_result( + request: CaptionMetadataRouteRequest, + deps: CaptionMetadataRouteDependencies, +) -> CaptionMetadataRoute | None: + row = request.row + detail_level = request.detail_level + keep_style = request.keep_style + primary = deps.clean_text(row.get("primary_subject")) + if "group" not in primary and primary != "layout scene": + return None + + subject = deps.field_row_value(row, "subject_phrase") or primary + age = deps.row_value(row, "age", ("Ages",)) or deps.clean_text(row.get("age_band")) + item = deps.clean_clothing(deps.row_value(row, "item", deps.item_labels) or deps.row_value(row, "clothing", ("Clothing",))) + scene = deps.row_value(row, "scene_text", ("Scene", "Setting")) + expression = "" + if not deps.expression_disabled(row): + expression = deps.row_value(row, "character_expression_text") or deps.row_value( + row, + "expression", + ("Facial expressions", "Facial expression"), + ) + composition = deps.normalize_composition(deps.row_value(row, "composition", ("Composition",))) + camera_scene = deps.clean_text(row.get("camera_scene_directive")) + style = deps.field_row_value(row, "style") if keep_style else "" + + if primary == "layout scene": + parts = [f"{deps.cap_first(subject)} is arranged as an adults-only designed illustration layout"] + if expression: + parts.append(f"The featured expression is {expression}") + else: + parts = [f"{deps.cap_first(subject)} includes adults"] + if age: + parts[0] += f" ages {age}" + if item: + parts.append(f"They wear {item}") + if expression: + parts.append(f"They show {expression}") + if scene: + parts.append(f"The setting is {scene}") + if deps.detail_allows(detail_level) and camera_scene: + parts.append(camera_scene) + if deps.detail_allows(detail_level) and composition: + parts.append(f"The composition is {composition}") + if keep_style and style: + parts.append(f"The visual style is {style}") + return CaptionMetadataRoute(deps.join_sentences(parts), "metadata(group_layout)") + + +def insta_of_pair_from_row_result( + request: CaptionMetadataRouteRequest, + deps: CaptionMetadataRouteDependencies, +) -> CaptionMetadataRoute | None: + row = request.row + detail_level = request.detail_level + keep_style = request.keep_style + if deps.clean_text(row.get("mode")).lower() != "insta/of": + return None + soft_row = row.get("softcore_row") + hard_row = row.get("hardcore_row") + if not isinstance(soft_row, dict) or not isinstance(hard_row, dict): + return None + + hard_row_for_text = dict(hard_row) + options = row.get("options") + if isinstance(options, dict) and options.get("continuity") == "same_creator_same_room": + if soft_row.get("scene_text"): + hard_row_for_text["scene_text"] = soft_row["scene_text"] + if soft_row.get("composition"): + hard_row_for_text["composition"] = soft_row["composition"] + + soft_text, _soft_method = deps.metadata_to_prose(soft_row, detail_level, keep_style) + hard_text, _hard_method = deps.metadata_to_prose(hard_row_for_text, detail_level, keep_style) + descriptor = deps.clean_text(row.get("shared_descriptor")) + options = row.get("options") if isinstance(row.get("options"), dict) else {} + cast_descriptors = row.get("shared_cast_descriptors") + if isinstance(cast_descriptors, list): + cast_descriptor_text = "; ".join(deps.clean_text(item) for item in cast_descriptors if deps.clean_text(item)) + else: + cast_descriptor_text = deps.clean_text(cast_descriptors) + labels = deps.cast_labels(cast_descriptor_text) + + same_soft_cast = options.get("softcore_cast") == "same_as_hardcore" + + parts = [] + if cast_descriptor_text and same_soft_cast: + parts.append(deps.natural_cast_descriptor_text(cast_descriptor_text)) + elif descriptor: + parts.append(f"A {descriptor}") + if cast_descriptor_text and not same_soft_cast: + parts.append(deps.natural_cast_descriptor_text(cast_descriptor_text)) + if same_soft_cast: + parts.append("The softcore version keeps the same adult cast present together in a non-explicit teaser setup") + partner_styling = row.get("softcore_partner_styling") + if isinstance(partner_styling, dict): + outfits = partner_styling.get("outfits") + if isinstance(outfits, list): + outfit_text = deps.human_join([deps.clean_text(item) for item in outfits if deps.clean_text(item)]) + outfit_text = deps.natural_label_text(outfit_text, labels) + if outfit_text: + parts.append(f"Softcore partner styling: {outfit_text}") + pose = deps.clean_text(partner_styling.get("pose")) + if pose: + parts.append(f"The shared softcore cast pose is {pose}") + if soft_text: + parts.append(f"Softcore version: {soft_text}") + if hard_text: + parts.append(f"Hardcore version: {hard_text}") + if not parts: + return None + return CaptionMetadataRoute(deps.join_sentences(parts), "metadata(insta_of_pair)") + + +def single_from_row(request: CaptionMetadataRouteRequest, deps: CaptionMetadataRouteDependencies) -> tuple[str, str] | None: + result = single_from_row_result(request, deps) + return result.as_tuple() if result else None + + +def couple_from_row(request: CaptionMetadataRouteRequest, deps: CaptionMetadataRouteDependencies) -> tuple[str, str] | None: + result = couple_from_row_result(request, deps) + return result.as_tuple() if result else None + + +def configured_cast_from_row( + request: CaptionMetadataRouteRequest, + deps: CaptionMetadataRouteDependencies, +) -> tuple[str, str] | None: + result = configured_cast_from_row_result(request, deps) + return result.as_tuple() if result else None + + +def group_or_layout_from_row( + request: CaptionMetadataRouteRequest, + deps: CaptionMetadataRouteDependencies, +) -> tuple[str, str] | None: + result = group_or_layout_from_row_result(request, deps) + return result.as_tuple() if result else None + + +def insta_of_pair_from_row( + request: CaptionMetadataRouteRequest, + deps: CaptionMetadataRouteDependencies, +) -> tuple[str, str] | None: + result = insta_of_pair_from_row_result(request, deps) + return result.as_tuple() if result else None diff --git a/caption_naturalizer.py b/caption_naturalizer.py index 235a2cf..891af22 100644 --- a/caption_naturalizer.py +++ b/caption_naturalizer.py @@ -4,12 +4,14 @@ import re from typing import Any try: + from . import caption_metadata_routes from . import caption_policy from . import formatter_input as input_policy from . import krea_cast as cast_policy from . import route_metadata as route_metadata_policy from .prompt_hygiene import sanitize_prose_text except ImportError: # Allows local smoke tests with `python -c`. + import caption_metadata_routes import caption_policy import formatter_input as input_policy import krea_cast as cast_policy @@ -282,270 +284,92 @@ def _detail_allows(level: str, dense_only: bool = False) -> bool: return caption_policy.detail_allows(level, dense_only=dense_only) +def _caption_metadata_route_dependencies() -> caption_metadata_routes.CaptionMetadataRouteDependencies: + return caption_metadata_routes.CaptionMetadataRouteDependencies( + item_labels=ITEM_LABELS, + clean_text=_clean_text, + row_value=_row_value, + field_row_value=lambda row, key: _row_value(row, key), + clean_clothing=_clean_clothing, + normalize_composition=_normalize_composition, + expression_disabled=_expression_disabled, + detail_allows=_detail_allows, + join_sentences=_join_sentences, + human_join=_human_join, + article=_article, + cap_first=_cap_first, + body_phrase=_body_phrase, + single_caption_front=_single_caption_front, + pose_clause=_pose_clause, + age_subject=_age_subject, + clean_age_phrase=_clean_age_phrase, + subject_phrase_from_counts=_subject_phrase_from_counts, + verb_for_row=_verb_for_row, + metadata_action_label=_metadata_action_label, + natural_cast_descriptor_text=_natural_cast_descriptor_text, + cast_labels=_cast_labels, + natural_label_text=_natural_label_text, + metadata_to_prose=_metadata_to_prose, + ) + + +def _caption_metadata_route_request( + row: dict[str, Any], + detail_level: str, + keep_style: bool, +) -> caption_metadata_routes.CaptionMetadataRouteRequest: + return caption_metadata_routes.CaptionMetadataRouteRequest( + row=row, + detail_level=detail_level, + keep_style=keep_style, + ) + + def _single_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None: - subject = _clean_text(row.get("primary_subject") or row.get("subject") or "") - if subject not in ("woman", "man"): - return None - - caption_front = _single_caption_front(row) - age = _clean_text(row.get("age") or row.get("age_band") or caption_front.get("caption_age") or "") - body_phrase = _row_value(row, "body_phrase") or caption_front.get("caption_body_phrase", "") - if not body_phrase: - body = _clean_text(row.get("body_type") or row.get("body") or "") - figure = _clean_text(row.get("figure")) - body_phrase = _body_phrase(body, figure) - - skin = _row_value(row, "skin") or caption_front.get("caption_skin", "") - hair = _row_value(row, "hair") or caption_front.get("caption_hair", "") - eyes = _row_value(row, "eyes") or caption_front.get("caption_eyes", "") - item = _row_value(row, "item", ITEM_LABELS) - if item: - item = _clean_clothing(item) - if not item: - item = _clean_clothing(_row_value(row, "clothing", ("Clothing", "Erotic outfit"))) - scene = _row_value(row, "scene_text", ("Scene", "Setting")) - pose = _row_value(row, "pose", ("Pose",)) - expression = "" if _expression_disabled(row) else _row_value(row, "expression", ("Facial expression", "Facial expressions")) - composition = _normalize_composition(_row_value(row, "composition", ("Composition",))) - camera_scene = _clean_text(row.get("camera_scene_directive")) - prop = _row_value(row, "prop", ("Prop/detail",)) - style = _row_value(row, "style") if keep_style else "" - - parts = [] - opener = _age_subject(age, subject) - appearance_details = [piece for piece in (skin, hair, eyes) if piece] - if body_phrase: - parts.append(f"{opener} has {_article(body_phrase)} {body_phrase}") - elif appearance_details: - parts.append(f"{opener} has {_human_join(appearance_details)}") - else: - parts.append(opener) - if body_phrase and appearance_details: - parts.append(f"{pronoun(subject)} has {_human_join(appearance_details)}") - if item: - verb = "wears" if subject == "woman" else "is dressed in" - parts.append(f"{pronoun(subject)} {verb} {item}") - if prop: - parts.append(f"{pronoun(subject)} is {prop}") - if pose: - parts.append(f"{pronoun(subject)} is {_pose_clause(pose)}") - if expression: - parts.append(f"{possessive_pronoun(subject)} expression is {expression}") - if scene: - parts.append(f"The setting is {scene}") - if _detail_allows(detail_level) and camera_scene: - parts.append(camera_scene) - if _detail_allows(detail_level) and composition: - parts.append(f"The composition is {composition}") - if keep_style and style: - parts.append(f"The visual style is {style}") - return _join_sentences(parts), "metadata(single)" + return caption_metadata_routes.single_from_row( + _caption_metadata_route_request(row, detail_level, keep_style), + _caption_metadata_route_dependencies(), + ) def pronoun(subject: str) -> str: - return "She" if subject == "woman" else "He" + return caption_metadata_routes.pronoun(subject) def possessive_pronoun(subject: str) -> str: - return "Her" if subject == "woman" else "His" + return caption_metadata_routes.possessive_pronoun(subject) def _couple_clothing_sentence(clothing: str) -> str: - clothing = _clean_text(clothing) - lower = clothing.lower() - partner_text = re.sub(r"\bPartner ([AB]) wears\b", r"Partner \1 wearing", clothing) - partner_text = re.sub(r"\bPartner ([AB]) has\b", r"Partner \1 with", partner_text) - if lower.startswith("partner a "): - return f"The outfits show {partner_text}" - if lower.startswith(("two ", "paired ", "coordinated ")): - return f"The outfits are {partner_text}" - return f"They wear {clothing}" + return caption_metadata_routes.couple_clothing_sentence(clothing, _clean_text) def _couple_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None: - subject = _clean_text(row.get("subject_phrase") or row.get("primary_subject")) - primary = _clean_text(row.get("primary_subject")) - if "couple" not in primary and subject not in ("two women", "two men", "a woman and a man"): - if not primary.startswith("two ") and " and " not in subject: - return None - if subject == "woman and man": - subject = "a woman and a man" - - ages = _row_value(row, "age", ("Ages",)) or _clean_text(row.get("age_band")) - body = _row_value(row, "body", ("Body types",)) or _clean_text(row.get("body_type")) - pose = _row_value(row, "pose", ("Pose",)) - pose = pose.replace(", affectionate and flirtatious but non-explicit", "") - clothing = _clean_clothing(_row_value(row, "item", ITEM_LABELS) or _row_value(row, "clothing", ("Clothing",))) - scene = _row_value(row, "scene_text", ("Scene", "Setting")) - expression = "" - if not _expression_disabled(row): - expression = _row_value(row, "character_expression_text") or _row_value(row, "expression", ("Facial expressions", "Facial expression")) - composition = _normalize_composition(_row_value(row, "composition", ("Composition",))) - camera_scene = _clean_text(row.get("camera_scene_directive")) - style = _row_value(row, "style") if keep_style else "" - - parts = [f"{_cap_first(subject)} are adults"] - if ages: - parts.append(f"The age detail is {_clean_age_phrase(ages)}") - if body: - parts.append(f"Their body types are {body}") - if clothing: - parts.append(_couple_clothing_sentence(clothing)) - if pose: - parts.append(f"The pose is {pose}") - if scene: - parts.append(f"The setting is {scene}") - if _detail_allows(detail_level) and camera_scene: - parts.append(camera_scene) - if expression: - parts.append(f"Their expressions are {expression}") - if _detail_allows(detail_level) and composition: - parts.append(f"The composition is {composition}") - if keep_style and style: - parts.append(f"The visual style is {style}") - return _join_sentences(parts), "metadata(couple)" + return caption_metadata_routes.couple_from_row( + _caption_metadata_route_request(row, detail_level, keep_style), + _caption_metadata_route_dependencies(), + ) def _configured_cast_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None: - if _clean_text(row.get("subject_type")) != "configured_cast": - if "hardcore sexual poses" not in _clean_text(row.get("main_category")).lower(): - return None - - subject = _subject_phrase_from_counts(row) - verb = _verb_for_row(row) - cast = _row_value(row, "cast_summary", ("Cast",)) - role_graph = _row_value(row, "role_graph", ("Role graph",)) - item = _row_value(row, "item", ITEM_LABELS) - scene = _row_value(row, "scene_text", ("Setting", "Scene")) - expression = "" - if not _expression_disabled(row): - expression = _row_value(row, "character_expression_text") or _row_value(row, "expression", ("Facial expressions", "Facial expression")) - composition = _normalize_composition(_row_value(row, "composition", ("Composition",))) - camera_scene = _clean_text(row.get("camera_scene_directive")) - cast_descriptor_text = _row_value(row, "cast_descriptor_text", ("Characters", "Cast descriptors")) - scene_kind = _row_value(row, "scene_kind") or "explicit adult sex scene" - style = _row_value(row, "style") if keep_style else "" - - parts = [f"{_cap_first(subject)} {verb} shown as a consensual {scene_kind}"] - if cast_descriptor_text: - parts.append(_natural_cast_descriptor_text(cast_descriptor_text)) - if cast and not cast_descriptor_text: - parts.append(f"The cast is {cast}") - if role_graph: - parts.append(role_graph) - if item: - parts.append(f"The {_metadata_action_label(row)} is {item}") - scene_bits = [] - if scene: - scene_bits.append(f"set in {scene}") - if expression: - scene_bits.append(f"with {expression}") - if composition: - scene_bits.append(f"framed as {composition}") - if scene_bits and _detail_allows(detail_level): - parts.append(", ".join(scene_bits)) - if _detail_allows(detail_level) and camera_scene: - parts.append(camera_scene) - if keep_style and style: - parts.append(f"The visual style is {style}") - return _join_sentences(parts), "metadata(configured_cast)" + return caption_metadata_routes.configured_cast_from_row( + _caption_metadata_route_request(row, detail_level, keep_style), + _caption_metadata_route_dependencies(), + ) def _group_or_layout_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None: - primary = _clean_text(row.get("primary_subject")) - if "group" not in primary and primary != "layout scene": - return None - - subject = _row_value(row, "subject_phrase") or primary - age = _row_value(row, "age", ("Ages",)) or _clean_text(row.get("age_band")) - item = _clean_clothing(_row_value(row, "item", ITEM_LABELS) or _row_value(row, "clothing", ("Clothing",))) - scene = _row_value(row, "scene_text", ("Scene", "Setting")) - expression = "" - if not _expression_disabled(row): - expression = _row_value(row, "character_expression_text") or _row_value(row, "expression", ("Facial expressions", "Facial expression")) - composition = _normalize_composition(_row_value(row, "composition", ("Composition",))) - camera_scene = _clean_text(row.get("camera_scene_directive")) - style = _row_value(row, "style") if keep_style else "" - - if primary == "layout scene": - parts = [f"{_cap_first(subject)} is arranged as an adults-only designed illustration layout"] - if expression: - parts.append(f"The featured expression is {expression}") - else: - parts = [f"{_cap_first(subject)} includes adults"] - if age: - parts[0] += f" ages {age}" - if item: - parts.append(f"They wear {item}") - if expression: - parts.append(f"They show {expression}") - if scene: - parts.append(f"The setting is {scene}") - if _detail_allows(detail_level) and camera_scene: - parts.append(camera_scene) - if _detail_allows(detail_level) and composition: - parts.append(f"The composition is {composition}") - if keep_style and style: - parts.append(f"The visual style is {style}") - return _join_sentences(parts), "metadata(group_layout)" + return caption_metadata_routes.group_or_layout_from_row( + _caption_metadata_route_request(row, detail_level, keep_style), + _caption_metadata_route_dependencies(), + ) def _insta_of_pair_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None: - if _clean_text(row.get("mode")).lower() != "insta/of": - return None - soft_row = row.get("softcore_row") - hard_row = row.get("hardcore_row") - if not isinstance(soft_row, dict) or not isinstance(hard_row, dict): - return None - - hard_row_for_text = dict(hard_row) - options = row.get("options") - if isinstance(options, dict) and options.get("continuity") == "same_creator_same_room": - if soft_row.get("scene_text"): - hard_row_for_text["scene_text"] = soft_row["scene_text"] - if soft_row.get("composition"): - hard_row_for_text["composition"] = soft_row["composition"] - - soft_text, _soft_method = _metadata_to_prose(soft_row, detail_level, keep_style) - hard_text, _hard_method = _metadata_to_prose(hard_row_for_text, detail_level, keep_style) - descriptor = _clean_text(row.get("shared_descriptor")) - options = row.get("options") if isinstance(row.get("options"), dict) else {} - cast_descriptors = row.get("shared_cast_descriptors") - if isinstance(cast_descriptors, list): - cast_descriptor_text = "; ".join(_clean_text(item) for item in cast_descriptors if _clean_text(item)) - else: - cast_descriptor_text = _clean_text(cast_descriptors) - labels = _cast_labels(cast_descriptor_text) - - same_soft_cast = options.get("softcore_cast") == "same_as_hardcore" - - parts = [] - if cast_descriptor_text and same_soft_cast: - parts.append(_natural_cast_descriptor_text(cast_descriptor_text)) - elif descriptor: - parts.append(f"A {descriptor}") - if cast_descriptor_text and not same_soft_cast: - parts.append(_natural_cast_descriptor_text(cast_descriptor_text)) - if same_soft_cast: - parts.append("The softcore version keeps the same adult cast present together in a non-explicit teaser setup") - partner_styling = row.get("softcore_partner_styling") - if isinstance(partner_styling, dict): - outfits = partner_styling.get("outfits") - if isinstance(outfits, list): - outfit_text = _human_join([_clean_text(item) for item in outfits if _clean_text(item)]) - outfit_text = _natural_label_text(outfit_text, labels) - if outfit_text: - parts.append(f"Softcore partner styling: {outfit_text}") - pose = _clean_text(partner_styling.get("pose")) - if pose: - parts.append(f"The shared softcore cast pose is {pose}") - if soft_text: - parts.append(f"Softcore version: {soft_text}") - if hard_text: - parts.append(f"Hardcore version: {hard_text}") - if not parts: - return None - return _join_sentences(parts), "metadata(insta_of_pair)" + return caption_metadata_routes.insta_of_pair_from_row( + _caption_metadata_route_request(row, detail_level, keep_style), + _caption_metadata_route_dependencies(), + ) def _metadata_to_prose(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str]: diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md index 4fec46c..3032b7a 100644 --- a/docs/prompt-architecture-improvement-plan.md +++ b/docs/prompt-architecture-improvement-plan.md @@ -427,9 +427,17 @@ Owner: `caption_naturalizer.py`. Keep here: -- natural sentence caption assembly; +- top-level natural caption orchestration; - training-caption trigger behavior; - style-tail policy from `caption_policy.py`. + +Already isolated: + +- `caption_metadata_routes.py` owns metadata row natural-language assembly for + single, couple, configured-cast, group/layout, and Insta/OF pair routes behind + `CaptionMetadataRouteRequest`, `CaptionMetadataRouteDependencies`, and + `CaptionMetadataRoute`; `caption_naturalizer.py` keeps compatibility wrappers, + profile handling, trigger behavior, and text fallback. - metadata-family action labels from `action_family` and `position_family` via `caption_policy.py`. - shared row route metadata reads from `route_metadata.py`. diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index 81874ec..8db088e 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -724,7 +724,9 @@ JSON category pools unless the raw builder text is also wrong. ### Naturalizer `naturalize_caption` chooses metadata-specific renderers such as -`_configured_cast_from_row`, `_couple_from_row`, and single/group renderers. +`caption_metadata_routes.configured_cast_from_row_result`, +`caption_metadata_routes.couple_from_row_result`, and the other metadata route +renderers through compatibility wrappers. Use this route when the row metadata is correct but the sentence-style caption is too mechanical or unsuitable for training captions. @@ -733,9 +735,9 @@ Naturalizer field consumption: | Branch | Reads most from | Key functions | | --- | --- | --- | -| Normal single/couple/group | subject fields, age/body, item, scene, expression, composition, camera scene | `_single_from_row`, `_couple_from_row`, `_group_or_layout_from_row` | -| Configured cast/hardcore | `cast_descriptor_text`, `action_family`, `position_family`, `role_graph`, `item`, `scene_text`, expression, composition | `_configured_cast_from_row`, `_metadata_action_label` | -| Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity | `_insta_pair_from_row` | +| Normal single/couple/group | subject fields, age/body, item, scene, expression, composition, camera scene | `caption_metadata_routes.single_from_row_result`, `caption_metadata_routes.couple_from_row_result`, `caption_metadata_routes.group_or_layout_from_row_result` | +| Configured cast/hardcore | `cast_descriptor_text`, `action_family`, `position_family`, `role_graph`, `item`, `scene_text`, expression, composition | `caption_metadata_routes.configured_cast_from_row_result`, `_metadata_action_label` | +| Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity | `caption_metadata_routes.insta_of_pair_from_row_result` | | Text fallback | `caption` or `prompt` text | `_text_to_prose` | ### Final Text Hygiene diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index 74eeb7b..6a3b860 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -25,6 +25,7 @@ if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) import caption_naturalizer # noqa: E402 +import caption_metadata_routes # noqa: E402 import caption_policy # noqa: E402 import cast_context # noqa: E402 import category_extensions # noqa: E402 @@ -2258,6 +2259,121 @@ def smoke_caption_policy() -> None: _expect(browsing_method == "text(fallback)", "Caption browsing profile changed fallback method") +def _expect_caption_route_parity( + name: str, + row: dict[str, Any], + route_builder: Callable[ + [caption_metadata_routes.CaptionMetadataRouteRequest, caption_metadata_routes.CaptionMetadataRouteDependencies], + caption_metadata_routes.CaptionMetadataRoute | None, + ], + wrapper: Callable[[dict[str, Any], str, bool], tuple[str, str] | None], + expected_method: str, +) -> None: + request = caption_naturalizer._caption_metadata_route_request(row, "balanced", False) + deps = caption_naturalizer._caption_metadata_route_dependencies() + typed_route = route_builder(request, deps) + legacy_route = wrapper(row, "balanced", False) + _expect(typed_route is not None, f"{name} typed caption metadata route did not match") + assert typed_route is not None + _expect( + typed_route.as_tuple() == legacy_route, + f"{name} typed caption metadata route should match legacy wrapper output", + ) + _expect(typed_route.method == expected_method, f"{name} caption route method changed") + _expect_text(f"{name}.caption_route", typed_route.prose, 20) + + +def smoke_caption_metadata_routes() -> None: + single = { + "primary_subject": "woman", + "age_band": "25-year-old adult", + "body_phrase": "slim figure", + "skin": "fair skin", + "hair": "long blonde hair", + "eyes": "blue eyes", + "item": "silk dress", + "pose": "standing beside a window", + "scene_text": "quiet studio with warm daylight", + "expression": "soft smile", + "composition": "vertical centered portrait", + } + _expect_caption_route_parity( + "caption_route_single", + single, + caption_metadata_routes.single_from_row_result, + caption_naturalizer._single_from_row, + "metadata(single)", + ) + + couple = { + "primary_subject": "a woman and a man", + "subject_phrase": "woman and man", + "age": "25-year-old adult and 40-year-old adult", + "body": "slim and average builds", + "item": "Partner A wears black dress; Partner B wears dark shirt", + "pose": "standing close together", + "scene_text": "private lounge with soft lamps", + "expression": "shared confident gaze", + "composition": "two-person editorial frame", + } + _expect_caption_route_parity( + "caption_route_couple", + couple, + caption_metadata_routes.couple_from_row_result, + caption_naturalizer._couple_from_row, + "metadata(couple)", + ) + + configured = _fixture_hardcore_row() + _expect_caption_route_parity( + "caption_route_configured_cast", + configured, + caption_metadata_routes.configured_cast_from_row_result, + caption_naturalizer._configured_cast_from_row, + "metadata(configured_cast)", + ) + + group = { + "primary_subject": "group scene", + "subject_phrase": "three adult friends", + "age": "late 20s adults", + "item": "coordinated evening outfits", + "scene_text": "rooftop lounge with city lights", + "expression": "relaxed shared smiles", + "composition": "wide group frame", + } + _expect_caption_route_parity( + "caption_route_group", + group, + caption_metadata_routes.group_or_layout_from_row_result, + caption_naturalizer._group_or_layout_from_row, + "metadata(group_layout)", + ) + + pair = pb.build_insta_of_pair( + row_number=1, + start_index=1, + seed=3511, + ethnicity="any", + figure="random", + no_plus_women=False, + no_black=False, + trigger=Trigger, + prepend_trigger_to_prompt=True, + options_json=_insta_options(hardcore_clothing_continuity="partially_removed"), + character_cast=_character_cast(), + hardcore_position_config=_action_filter("penetration_only"), + ) + _expect_pair(pair, "caption_route_pair") + _expect_caption_route_parity( + "caption_route_insta_pair", + pair, + caption_metadata_routes.insta_of_pair_from_row_result, + caption_naturalizer._insta_of_pair_from_row, + "metadata(insta_of_pair)", + ) + + def smoke_sdxl_presets_policy() -> None: _expect( sdxl_formatter.SDXL_STYLE_PRESETS is sdxl_presets.SDXL_STYLE_PRESETS, @@ -5151,6 +5267,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [ ("formatter_input_policy", smoke_formatter_input_policy), ("formatter_cast_policy", smoke_formatter_cast_policy), ("caption_policy", smoke_caption_policy), + ("caption_metadata_routes", smoke_caption_metadata_routes), ("sdxl_presets_policy", smoke_sdxl_presets_policy), ("sdxl_tag_routes", smoke_sdxl_tag_routes), ("hardcore_position_config_policy", smoke_hardcore_position_config_policy),