348 lines
12 KiB
Python
348 lines
12 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
|
|
try:
|
|
from . import caption_metadata_routes
|
|
from . import caption_policy
|
|
from . import caption_text_policy
|
|
from . import formatter_input as input_policy
|
|
from .prompt_hygiene import sanitize_prose_text
|
|
except ImportError: # Allows local smoke tests with `python -c`.
|
|
import caption_metadata_routes
|
|
import caption_policy
|
|
import caption_text_policy
|
|
import formatter_input as input_policy
|
|
from prompt_hygiene import sanitize_prose_text
|
|
|
|
|
|
OLD_TRIGGER = caption_policy.OLD_TRIGGER
|
|
DEFAULT_TRIGGER = caption_policy.DEFAULT_TRIGGER
|
|
STYLE_TAILS = caption_policy.STYLE_TAILS
|
|
|
|
PROMPT_FIELD_LABELS = caption_text_policy.PROMPT_FIELD_LABELS
|
|
ITEM_LABELS = caption_policy.ITEM_LABELS
|
|
ACTION_FAMILY_CAPTION_LABELS = caption_policy.ACTION_FAMILY_CAPTION_LABELS
|
|
POSITION_FAMILY_CAPTION_LABELS = caption_policy.POSITION_FAMILY_CAPTION_LABELS
|
|
|
|
|
|
def _clean_text(value: Any) -> str:
|
|
return caption_text_policy.clean_text(value)
|
|
|
|
|
|
def _is_false(value: Any) -> bool:
|
|
return caption_text_policy.is_false(value)
|
|
|
|
|
|
def _expression_disabled(row: dict[str, Any]) -> bool:
|
|
return caption_text_policy.expression_disabled(row)
|
|
|
|
|
|
def _cap_first(text: str) -> str:
|
|
return caption_text_policy.cap_first(text)
|
|
|
|
|
|
def _article(noun_phrase: str) -> str:
|
|
return caption_text_policy.article(noun_phrase)
|
|
|
|
|
|
def _sentence(text: str) -> str:
|
|
return caption_text_policy.sentence(text)
|
|
|
|
|
|
def _join_sentences(parts: list[str]) -> str:
|
|
return caption_text_policy.join_sentences(parts)
|
|
|
|
|
|
def _formatter_hint_parts(row: dict[str, Any]) -> list[str]:
|
|
return caption_text_policy.formatter_hint_parts(row)
|
|
|
|
|
|
def _append_formatter_hints(prose: str, row: dict[str, Any]) -> str:
|
|
return caption_text_policy.append_formatter_hints(prose, row)
|
|
|
|
|
|
def _human_join(parts: list[str]) -> str:
|
|
return caption_text_policy.human_join(parts)
|
|
|
|
|
|
def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
|
|
return caption_text_policy.metadata_action_label(row, default)
|
|
|
|
|
|
def _prompt_cast_descriptors(text: str) -> str:
|
|
return caption_text_policy.prompt_cast_descriptors(text)
|
|
|
|
|
|
def _cast_entries(text: str) -> list[tuple[str, str]]:
|
|
return caption_text_policy.cast_entries(text)
|
|
|
|
|
|
def _natural_cast_descriptor_text(text: str) -> str:
|
|
return caption_text_policy.natural_cast_descriptor_text(text)
|
|
|
|
|
|
def _cast_labels(text: str) -> list[str]:
|
|
return caption_text_policy.cast_labels(text)
|
|
|
|
|
|
def _natural_label_text(text: Any, labels: list[str]) -> str:
|
|
return caption_text_policy.natural_label_text(text, labels)
|
|
|
|
|
|
def _strip_style_tail(text: str) -> str:
|
|
return caption_text_policy.strip_style_tail(text)
|
|
|
|
|
|
def _remove_trigger(text: str, trigger: str) -> str:
|
|
return caption_text_policy.remove_trigger(text, trigger)
|
|
|
|
|
|
def _with_trigger(text: str, trigger: str, include_trigger: bool) -> str:
|
|
return caption_text_policy.with_trigger(text, trigger, include_trigger)
|
|
|
|
|
|
def _maybe_json(text: str) -> dict[str, Any] | None:
|
|
return input_policy.maybe_json(text)
|
|
|
|
|
|
def _row_from_inputs(source_text: str, metadata_json: str, input_hint: str) -> tuple[dict[str, Any] | None, str]:
|
|
return input_policy.row_from_inputs(source_text, metadata_json, input_hint)
|
|
|
|
|
|
def _prompt_field(text: str, label: str) -> str:
|
|
return caption_text_policy.prompt_field(text, label)
|
|
|
|
|
|
def _row_value(row: dict[str, Any], key: str, labels: tuple[str, ...] = ()) -> str:
|
|
return caption_text_policy.row_value(row, key, labels)
|
|
|
|
|
|
def _field_from_any_prompt(text: str, labels: tuple[str, ...]) -> str:
|
|
return caption_text_policy.field_from_any_prompt(text, labels)
|
|
|
|
|
|
def _normalize_composition(text: str) -> str:
|
|
return caption_text_policy.normalize_composition(text)
|
|
|
|
|
|
def _clean_clothing(text: str) -> str:
|
|
return caption_text_policy.clean_clothing(text)
|
|
|
|
|
|
def _body_phrase(body: Any, figure_note: Any = "") -> str:
|
|
return caption_text_policy.body_phrase(body, figure_note)
|
|
|
|
|
|
def _single_caption_front(row: dict[str, Any]) -> dict[str, str]:
|
|
return caption_text_policy.single_caption_front(row)
|
|
|
|
|
|
def _pose_clause(pose: str) -> str:
|
|
return caption_text_policy.pose_clause(pose)
|
|
|
|
|
|
def _age_subject(age: str, subject: str) -> str:
|
|
return caption_text_policy.age_subject(age, subject)
|
|
|
|
|
|
def _clean_age_phrase(age: str) -> str:
|
|
return caption_text_policy.clean_age_phrase(age)
|
|
|
|
|
|
def _subject_phrase_from_counts(row: dict[str, Any]) -> str:
|
|
return caption_text_policy.subject_phrase_from_counts(row)
|
|
|
|
|
|
def _verb_for_row(row: dict[str, Any]) -> str:
|
|
return caption_text_policy.verb_for_row(row)
|
|
|
|
|
|
def _detail_allows(level: str, dense_only: bool = False) -> bool:
|
|
return caption_text_policy.detail_allows(level, dense_only=dense_only)
|
|
|
|
|
|
def _caption_metadata_route_dependencies() -> caption_metadata_routes.CaptionMetadataRouteDependencies:
|
|
return caption_text_policy.metadata_route_dependencies(_metadata_to_prose)
|
|
|
|
|
|
def _caption_metadata_route_request(
|
|
row: dict[str, Any],
|
|
detail_level: str,
|
|
keep_style: bool,
|
|
) -> caption_metadata_routes.CaptionMetadataRouteRequest:
|
|
return caption_metadata_routes.CaptionMetadataRouteRequest(
|
|
row=row,
|
|
detail_level=detail_level,
|
|
keep_style=keep_style,
|
|
)
|
|
|
|
|
|
def _single_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None:
|
|
return caption_metadata_routes.single_from_row(
|
|
_caption_metadata_route_request(row, detail_level, keep_style),
|
|
_caption_metadata_route_dependencies(),
|
|
)
|
|
|
|
|
|
def pronoun(subject: str) -> str:
|
|
return caption_metadata_routes.pronoun(subject)
|
|
|
|
|
|
def possessive_pronoun(subject: str) -> str:
|
|
return caption_metadata_routes.possessive_pronoun(subject)
|
|
|
|
|
|
def _couple_clothing_sentence(clothing: str) -> str:
|
|
return caption_metadata_routes.couple_clothing_sentence(clothing, _clean_text)
|
|
|
|
|
|
def _couple_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None:
|
|
return caption_metadata_routes.couple_from_row(
|
|
_caption_metadata_route_request(row, detail_level, keep_style),
|
|
_caption_metadata_route_dependencies(),
|
|
)
|
|
|
|
|
|
def _configured_cast_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None:
|
|
return caption_metadata_routes.configured_cast_from_row(
|
|
_caption_metadata_route_request(row, detail_level, keep_style),
|
|
_caption_metadata_route_dependencies(),
|
|
)
|
|
|
|
|
|
def _group_or_layout_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None:
|
|
return caption_metadata_routes.group_or_layout_from_row(
|
|
_caption_metadata_route_request(row, detail_level, keep_style),
|
|
_caption_metadata_route_dependencies(),
|
|
)
|
|
|
|
|
|
def _insta_of_pair_from_row(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str] | None:
|
|
return caption_metadata_routes.insta_of_pair_from_row(
|
|
_caption_metadata_route_request(row, detail_level, keep_style),
|
|
_caption_metadata_route_dependencies(),
|
|
)
|
|
|
|
|
|
def _metadata_to_prose(row: dict[str, Any], detail_level: str, keep_style: bool) -> tuple[str, str]:
|
|
for builder in (
|
|
_insta_of_pair_from_row,
|
|
_configured_cast_from_row,
|
|
_single_from_row,
|
|
_couple_from_row,
|
|
_group_or_layout_from_row,
|
|
):
|
|
result = builder(row, detail_level, keep_style)
|
|
if result:
|
|
prose, method = result
|
|
return _append_formatter_hints(prose, row), method
|
|
prose, method = _text_to_prose(_clean_text(row.get("caption") or row.get("prompt")), detail_level, keep_style)
|
|
return _append_formatter_hints(prose, row), method
|
|
|
|
|
|
def _prompt_to_prose(text: str, detail_level: str, keep_style: bool) -> tuple[str, str] | None:
|
|
if ":" not in text:
|
|
return None
|
|
cast = _field_from_any_prompt(text, ("Cast",))
|
|
item = _field_from_any_prompt(text, ITEM_LABELS)
|
|
scene = _field_from_any_prompt(text, ("Setting", "Scene"))
|
|
pose = _field_from_any_prompt(text, ("Pose",))
|
|
role_graph = _field_from_any_prompt(text, ("Role graph",))
|
|
expression = _field_from_any_prompt(text, ("Facial expressions", "Facial expression"))
|
|
composition = _normalize_composition(_field_from_any_prompt(text, ("Composition",)))
|
|
if not any((cast, item, scene, pose, role_graph, expression, composition)):
|
|
return None
|
|
|
|
subject = _clean_text(text.split(":", 1)[0])
|
|
parts = []
|
|
if subject:
|
|
parts.append(f"{_cap_first(subject)}")
|
|
if cast:
|
|
parts.append(f"The cast is {cast}")
|
|
if role_graph:
|
|
parts.append(role_graph)
|
|
if item:
|
|
item_label = "sexual pose" if _field_from_any_prompt(text, ("Sexual pose",)) else "key detail"
|
|
parts.append(f"The {item_label} is {item}")
|
|
elif pose:
|
|
parts.append(f"The pose is {pose}")
|
|
scene_bits = []
|
|
if scene:
|
|
scene_bits.append(f"set in {scene}")
|
|
if expression:
|
|
scene_bits.append(f"with {expression}")
|
|
if composition:
|
|
scene_bits.append(f"framed as {composition}")
|
|
if scene_bits and _detail_allows(detail_level):
|
|
parts.append(", ".join(scene_bits))
|
|
if keep_style:
|
|
style = _clean_text(text.split(":", 1)[1].split(".", 1)[0])
|
|
if style:
|
|
parts.append(f"The visual style is {style}")
|
|
return _join_sentences(parts), "prompt(labels)"
|
|
|
|
|
|
def _parts_to_sentence(parts: list[str], detail_level: str) -> str:
|
|
parts = [part for part in (_clean_text(part).strip(" ,.") for part in parts) if part]
|
|
if not parts:
|
|
return ""
|
|
if len(parts) == 1:
|
|
return _sentence(parts[0])
|
|
subject = parts[0]
|
|
trailing_style = ""
|
|
if parts[-1].lower().endswith("illustration"):
|
|
trailing_style = parts.pop()
|
|
composition = parts[-1] if len(parts) >= 2 else ""
|
|
scene = parts[-2] if len(parts) >= 3 else ""
|
|
details = parts[1:-2] if len(parts) >= 3 else parts[1:]
|
|
sentences = [f"{_cap_first(subject)} includes {', '.join(details)}" if details else _cap_first(subject)]
|
|
if _detail_allows(detail_level) and scene:
|
|
sentences.append(f"The setting is {scene}")
|
|
if _detail_allows(detail_level) and composition:
|
|
sentences.append(f"The composition is {composition}")
|
|
if trailing_style and _detail_allows(detail_level, dense_only=True):
|
|
sentences.append(f"The visual style is {trailing_style}")
|
|
return _join_sentences(sentences)
|
|
|
|
|
|
def _text_to_prose(text: str, detail_level: str, keep_style: bool) -> tuple[str, str]:
|
|
text = _clean_text(text)
|
|
prompt_result = _prompt_to_prose(text, detail_level, keep_style)
|
|
if prompt_result:
|
|
return prompt_result
|
|
text = _remove_trigger(_strip_style_tail(text), DEFAULT_TRIGGER)
|
|
text = _remove_trigger(text, OLD_TRIGGER)
|
|
parts = [part.strip() for part in text.split(",")]
|
|
prose = _parts_to_sentence(parts, detail_level)
|
|
return prose or _sentence(text), "text(fallback)"
|
|
|
|
|
|
def naturalize_caption(
|
|
source_text: str,
|
|
metadata_json: str = "",
|
|
input_hint: str = "auto",
|
|
trigger: str = DEFAULT_TRIGGER,
|
|
include_trigger: bool = True,
|
|
detail_level: str = "balanced",
|
|
style_policy: str = "drop_style_tail",
|
|
caption_profile: str = caption_policy.CAPTION_PROFILE_DEFAULT,
|
|
) -> tuple[str, str]:
|
|
"""Rewrite tag-style prompt/caption text into compact natural language."""
|
|
input_hint = input_hint if input_hint in ("auto", "metadata_json", "caption_or_prompt") else "auto"
|
|
detail_level, style_policy, include_trigger = caption_policy.apply_caption_profile(
|
|
caption_profile,
|
|
detail_level=detail_level,
|
|
style_policy=style_policy,
|
|
include_trigger=include_trigger,
|
|
)
|
|
keep_style = caption_policy.keep_style_terms(style_policy)
|
|
row, row_method = _row_from_inputs(source_text, metadata_json, input_hint)
|
|
if row is not None:
|
|
prose, method = _metadata_to_prose(row, detail_level, keep_style)
|
|
caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,))
|
|
return caption, f"{row_method}:{method}"
|
|
prose, method = _text_to_prose(source_text, detail_level, keep_style)
|
|
caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,))
|
|
return caption, method
|