Extract caption text policy
This commit is contained in:
+36
-183
@@ -1,21 +1,18 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import re
|
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from . import caption_metadata_routes
|
from . import caption_metadata_routes
|
||||||
from . import caption_policy
|
from . import caption_policy
|
||||||
|
from . import caption_text_policy
|
||||||
from . import formatter_input as input_policy
|
from . import formatter_input as input_policy
|
||||||
from . import krea_cast as cast_policy
|
|
||||||
from . import route_metadata as route_metadata_policy
|
|
||||||
from .prompt_hygiene import sanitize_prose_text
|
from .prompt_hygiene import sanitize_prose_text
|
||||||
except ImportError: # Allows local smoke tests with `python -c`.
|
except ImportError: # Allows local smoke tests with `python -c`.
|
||||||
import caption_metadata_routes
|
import caption_metadata_routes
|
||||||
import caption_policy
|
import caption_policy
|
||||||
|
import caption_text_policy
|
||||||
import formatter_input as input_policy
|
import formatter_input as input_policy
|
||||||
import krea_cast as cast_policy
|
|
||||||
import route_metadata as route_metadata_policy
|
|
||||||
from prompt_hygiene import sanitize_prose_text
|
from prompt_hygiene import sanitize_prose_text
|
||||||
|
|
||||||
|
|
||||||
@@ -23,125 +20,86 @@ OLD_TRIGGER = caption_policy.OLD_TRIGGER
|
|||||||
DEFAULT_TRIGGER = caption_policy.DEFAULT_TRIGGER
|
DEFAULT_TRIGGER = caption_policy.DEFAULT_TRIGGER
|
||||||
STYLE_TAILS = caption_policy.STYLE_TAILS
|
STYLE_TAILS = caption_policy.STYLE_TAILS
|
||||||
|
|
||||||
PROMPT_FIELD_LABELS = input_policy.prompt_field_labels()
|
PROMPT_FIELD_LABELS = caption_text_policy.PROMPT_FIELD_LABELS
|
||||||
|
|
||||||
ITEM_LABELS = caption_policy.ITEM_LABELS
|
ITEM_LABELS = caption_policy.ITEM_LABELS
|
||||||
ACTION_FAMILY_CAPTION_LABELS = caption_policy.ACTION_FAMILY_CAPTION_LABELS
|
ACTION_FAMILY_CAPTION_LABELS = caption_policy.ACTION_FAMILY_CAPTION_LABELS
|
||||||
POSITION_FAMILY_CAPTION_LABELS = caption_policy.POSITION_FAMILY_CAPTION_LABELS
|
POSITION_FAMILY_CAPTION_LABELS = caption_policy.POSITION_FAMILY_CAPTION_LABELS
|
||||||
|
|
||||||
|
|
||||||
def _clean_text(value: Any) -> str:
|
def _clean_text(value: Any) -> str:
|
||||||
return input_policy.clean_text(value)
|
return caption_text_policy.clean_text(value)
|
||||||
|
|
||||||
|
|
||||||
def _is_false(value: Any) -> bool:
|
def _is_false(value: Any) -> bool:
|
||||||
if isinstance(value, bool):
|
return caption_text_policy.is_false(value)
|
||||||
return value is False
|
|
||||||
if isinstance(value, str):
|
|
||||||
return value.strip().lower() in ("false", "0", "no", "off")
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def _expression_disabled(row: dict[str, Any]) -> bool:
|
def _expression_disabled(row: dict[str, Any]) -> bool:
|
||||||
return bool(row.get("expression_disabled")) or _is_false(row.get("expression_enabled", True))
|
return caption_text_policy.expression_disabled(row)
|
||||||
|
|
||||||
|
|
||||||
def _cap_first(text: str) -> str:
|
def _cap_first(text: str) -> str:
|
||||||
text = _clean_text(text).strip(" ,")
|
return caption_text_policy.cap_first(text)
|
||||||
return text[:1].upper() + text[1:] if text else ""
|
|
||||||
|
|
||||||
|
|
||||||
def _article(noun_phrase: str) -> str:
|
def _article(noun_phrase: str) -> str:
|
||||||
word = noun_phrase.lstrip().lower()
|
return caption_text_policy.article(noun_phrase)
|
||||||
if word.startswith("hour") or word[:1] in "aeiou":
|
|
||||||
return "an"
|
|
||||||
return "a"
|
|
||||||
|
|
||||||
|
|
||||||
def _sentence(text: str) -> str:
|
def _sentence(text: str) -> str:
|
||||||
text = _clean_text(text).strip(" ,;")
|
return caption_text_policy.sentence(text)
|
||||||
if not text:
|
|
||||||
return ""
|
|
||||||
if text[-1] not in ".!?":
|
|
||||||
text += "."
|
|
||||||
return _cap_first(text)
|
|
||||||
|
|
||||||
|
|
||||||
def _join_sentences(parts: list[str]) -> str:
|
def _join_sentences(parts: list[str]) -> str:
|
||||||
return " ".join(part for part in (_sentence(part) for part in parts) if part)
|
return caption_text_policy.join_sentences(parts)
|
||||||
|
|
||||||
|
|
||||||
def _formatter_hint_parts(row: dict[str, Any]) -> list[str]:
|
def _formatter_hint_parts(row: dict[str, Any]) -> list[str]:
|
||||||
hints: list[str] = []
|
return caption_text_policy.formatter_hint_parts(row)
|
||||||
if not isinstance(row, dict):
|
|
||||||
return hints
|
|
||||||
for hint in route_metadata_policy.row_formatter_hints(row, "caption"):
|
|
||||||
hint = _clean_text(hint).strip(" .")
|
|
||||||
if hint and hint not in hints:
|
|
||||||
hints.append(hint)
|
|
||||||
return hints
|
|
||||||
|
|
||||||
|
|
||||||
def _append_formatter_hints(prose: str, row: dict[str, Any]) -> str:
|
def _append_formatter_hints(prose: str, row: dict[str, Any]) -> str:
|
||||||
hints = _formatter_hint_parts(row)
|
return caption_text_policy.append_formatter_hints(prose, row)
|
||||||
if not hints:
|
|
||||||
return prose
|
|
||||||
return _join_sentences([prose, *hints])
|
|
||||||
|
|
||||||
|
|
||||||
def _human_join(parts: list[str]) -> str:
|
def _human_join(parts: list[str]) -> str:
|
||||||
parts = [part for part in (_clean_text(part) for part in parts) if part]
|
return caption_text_policy.human_join(parts)
|
||||||
if len(parts) <= 1:
|
|
||||||
return "".join(parts)
|
|
||||||
if len(parts) == 2:
|
|
||||||
return f"{parts[0]} and {parts[1]}"
|
|
||||||
return f"{', '.join(parts[:-1])}, and {parts[-1]}"
|
|
||||||
|
|
||||||
|
|
||||||
def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
|
def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
|
||||||
return caption_policy.metadata_action_label(row, default)
|
return caption_text_policy.metadata_action_label(row, default)
|
||||||
|
|
||||||
|
|
||||||
def _prompt_cast_descriptors(text: str) -> str:
|
def _prompt_cast_descriptors(text: str) -> str:
|
||||||
return cast_policy.prompt_cast_descriptors(text)
|
return caption_text_policy.prompt_cast_descriptors(text)
|
||||||
|
|
||||||
|
|
||||||
def _cast_entries(text: str) -> list[tuple[str, str]]:
|
def _cast_entries(text: str) -> list[tuple[str, str]]:
|
||||||
return cast_policy.cast_entries(text)
|
return caption_text_policy.cast_entries(text)
|
||||||
|
|
||||||
|
|
||||||
def _natural_cast_descriptor_text(text: str) -> str:
|
def _natural_cast_descriptor_text(text: str) -> str:
|
||||||
return cast_policy.natural_cast_descriptor_text(text)
|
return caption_text_policy.natural_cast_descriptor_text(text)
|
||||||
|
|
||||||
|
|
||||||
def _cast_labels(text: str) -> list[str]:
|
def _cast_labels(text: str) -> list[str]:
|
||||||
return cast_policy.cast_labels(text)
|
return caption_text_policy.cast_labels(text)
|
||||||
|
|
||||||
|
|
||||||
def _natural_label_text(text: Any, labels: list[str]) -> str:
|
def _natural_label_text(text: Any, labels: list[str]) -> str:
|
||||||
return cast_policy.natural_label_text(text, labels, capitalize_sentence_starts=False)
|
return caption_text_policy.natural_label_text(text, labels)
|
||||||
|
|
||||||
|
|
||||||
def _strip_style_tail(text: str) -> str:
|
def _strip_style_tail(text: str) -> str:
|
||||||
return caption_policy.strip_style_tail(text)
|
return caption_text_policy.strip_style_tail(text)
|
||||||
|
|
||||||
|
|
||||||
def _remove_trigger(text: str, trigger: str) -> str:
|
def _remove_trigger(text: str, trigger: str) -> str:
|
||||||
return input_policy.strip_trigger_prefix(
|
return caption_text_policy.remove_trigger(text, trigger)
|
||||||
text,
|
|
||||||
(trigger, OLD_TRIGGER, DEFAULT_TRIGGER),
|
|
||||||
remove_exact=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _with_trigger(text: str, trigger: str, include_trigger: bool) -> str:
|
def _with_trigger(text: str, trigger: str, include_trigger: bool) -> str:
|
||||||
text = _join_sentences([text]) if "." not in text else _clean_text(text)
|
return caption_text_policy.with_trigger(text, trigger, include_trigger)
|
||||||
trigger = _clean_text(trigger or DEFAULT_TRIGGER)
|
|
||||||
if not include_trigger or not trigger:
|
|
||||||
return text
|
|
||||||
if text.lower().startswith(trigger.lower() + "."):
|
|
||||||
return text
|
|
||||||
return f"{trigger}. {text}"
|
|
||||||
|
|
||||||
|
|
||||||
def _maybe_json(text: str) -> dict[str, Any] | None:
|
def _maybe_json(text: str) -> dict[str, Any] | None:
|
||||||
@@ -153,164 +111,59 @@ def _row_from_inputs(source_text: str, metadata_json: str, input_hint: str) -> t
|
|||||||
|
|
||||||
|
|
||||||
def _prompt_field(text: str, label: str) -> str:
|
def _prompt_field(text: str, label: str) -> str:
|
||||||
return input_policy.prompt_field(text, label, field_labels=PROMPT_FIELD_LABELS)
|
return caption_text_policy.prompt_field(text, label)
|
||||||
|
|
||||||
|
|
||||||
def _row_value(row: dict[str, Any], key: str, labels: tuple[str, ...] = ()) -> str:
|
def _row_value(row: dict[str, Any], key: str, labels: tuple[str, ...] = ()) -> str:
|
||||||
return input_policy.row_value(row, key, labels, field_labels=PROMPT_FIELD_LABELS)
|
return caption_text_policy.row_value(row, key, labels)
|
||||||
|
|
||||||
|
|
||||||
def _field_from_any_prompt(text: str, labels: tuple[str, ...]) -> str:
|
def _field_from_any_prompt(text: str, labels: tuple[str, ...]) -> str:
|
||||||
for label in labels:
|
return caption_text_policy.field_from_any_prompt(text, labels)
|
||||||
value = input_policy.prompt_field(text, label, field_labels=PROMPT_FIELD_LABELS)
|
|
||||||
if value:
|
|
||||||
return value
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def _normalize_composition(text: str) -> str:
|
def _normalize_composition(text: str) -> str:
|
||||||
return caption_policy.normalize_composition(text)
|
return caption_text_policy.normalize_composition(text)
|
||||||
|
|
||||||
|
|
||||||
def _clean_clothing(text: str) -> str:
|
def _clean_clothing(text: str) -> str:
|
||||||
return caption_policy.clean_clothing(text)
|
return caption_text_policy.clean_clothing(text)
|
||||||
|
|
||||||
|
|
||||||
def _body_phrase(body: Any, figure_note: Any = "") -> str:
|
def _body_phrase(body: Any, figure_note: Any = "") -> str:
|
||||||
body = _clean_text(body)
|
return caption_text_policy.body_phrase(body, figure_note)
|
||||||
figure_note = _clean_text(figure_note)
|
|
||||||
if not body:
|
|
||||||
return figure_note
|
|
||||||
if not figure_note:
|
|
||||||
return f"{body} figure"
|
|
||||||
if "figure" in figure_note.lower():
|
|
||||||
return f"{body} build and {figure_note}"
|
|
||||||
return f"{body} figure with {figure_note}"
|
|
||||||
|
|
||||||
|
|
||||||
def _single_caption_front(row: dict[str, Any]) -> dict[str, str]:
|
def _single_caption_front(row: dict[str, Any]) -> dict[str, str]:
|
||||||
caption = _clean_text(row.get("caption"))
|
return caption_text_policy.single_caption_front(row)
|
||||||
if not caption:
|
|
||||||
return {}
|
|
||||||
caption = _remove_trigger(_strip_style_tail(caption), _clean_text(row.get("trigger")) or DEFAULT_TRIGGER)
|
|
||||||
caption = _remove_trigger(caption, OLD_TRIGGER)
|
|
||||||
subject = _clean_text(row.get("primary_subject"))
|
|
||||||
age = _clean_text(row.get("age_band") or row.get("age"))
|
|
||||||
body_phrase = _clean_text(row.get("body_phrase"))
|
|
||||||
if not body_phrase:
|
|
||||||
body = _clean_text(row.get("body_type") or row.get("body"))
|
|
||||||
figure = _clean_text(row.get("figure"))
|
|
||||||
body_phrase = _body_phrase(body, figure)
|
|
||||||
front = f"{subject}, {age}, {body_phrase}, "
|
|
||||||
if subject in ("woman", "man") and age and body_phrase and caption.startswith(front):
|
|
||||||
try:
|
|
||||||
skin, hair, eyes, _rest = caption[len(front) :].split(", ", 3)
|
|
||||||
except ValueError:
|
|
||||||
return {}
|
|
||||||
else:
|
|
||||||
pieces = [piece.strip() for piece in caption.split(", ", 6)]
|
|
||||||
if len(pieces) < 7:
|
|
||||||
return {}
|
|
||||||
subject, age, body_phrase, skin, hair, eyes, _rest = pieces
|
|
||||||
if subject not in ("woman", "man"):
|
|
||||||
return {}
|
|
||||||
return {
|
|
||||||
"caption_subject": subject,
|
|
||||||
"caption_age": age,
|
|
||||||
"caption_body_phrase": body_phrase,
|
|
||||||
"caption_skin": skin,
|
|
||||||
"caption_hair": hair,
|
|
||||||
"caption_eyes": eyes,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _pose_clause(pose: str) -> str:
|
def _pose_clause(pose: str) -> str:
|
||||||
pose = _clean_text(pose)
|
return caption_text_policy.pose_clause(pose)
|
||||||
if not pose:
|
|
||||||
return ""
|
|
||||||
first = pose.split(None, 1)[0].lower()
|
|
||||||
if first.endswith("ing") or first in ("seated", "reclined", "posed"):
|
|
||||||
return pose
|
|
||||||
return f"posing in {pose}"
|
|
||||||
|
|
||||||
|
|
||||||
def _age_subject(age: str, subject: str) -> str:
|
def _age_subject(age: str, subject: str) -> str:
|
||||||
age = _clean_text(age)
|
return caption_text_policy.age_subject(age, subject)
|
||||||
subject = _clean_text(subject) or "person"
|
|
||||||
if not age:
|
|
||||||
return f"An adult {subject}"
|
|
||||||
clean_age = re.sub(r"\s+adults?$", "", age).strip()
|
|
||||||
if "year-old" in clean_age:
|
|
||||||
return f"A {clean_age} adult {subject}"
|
|
||||||
if re.search(r"\d", clean_age):
|
|
||||||
poss = "her" if subject == "woman" else "his"
|
|
||||||
return f"An adult {subject} in {poss} {clean_age}"
|
|
||||||
return f"An adult {clean_age} {subject}"
|
|
||||||
|
|
||||||
|
|
||||||
def _clean_age_phrase(age: str) -> str:
|
def _clean_age_phrase(age: str) -> str:
|
||||||
age = _clean_text(age)
|
return caption_text_policy.clean_age_phrase(age)
|
||||||
age = re.sub(r"\s+adults?$", "", age).strip()
|
|
||||||
return age.replace("-year-old", " years old")
|
|
||||||
|
|
||||||
|
|
||||||
def _subject_phrase_from_counts(row: dict[str, Any]) -> str:
|
def _subject_phrase_from_counts(row: dict[str, Any]) -> str:
|
||||||
subject = _clean_text(row.get("subject_phrase"))
|
return caption_text_policy.subject_phrase_from_counts(row)
|
||||||
if subject:
|
|
||||||
return subject
|
|
||||||
try:
|
|
||||||
women = int(row.get("women_count") or 0)
|
|
||||||
men = int(row.get("men_count") or 0)
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
return _clean_text(row.get("primary_subject")) or "adult scene"
|
|
||||||
parts = []
|
|
||||||
if women:
|
|
||||||
parts.append(f"{women} adult {'woman' if women == 1 else 'women'}")
|
|
||||||
if men:
|
|
||||||
parts.append(f"{men} adult {'man' if men == 1 else 'men'}")
|
|
||||||
if not parts:
|
|
||||||
return _clean_text(row.get("primary_subject")) or "adult scene"
|
|
||||||
return " and ".join(parts)
|
|
||||||
|
|
||||||
|
|
||||||
def _verb_for_row(row: dict[str, Any]) -> str:
|
def _verb_for_row(row: dict[str, Any]) -> str:
|
||||||
try:
|
return caption_text_policy.verb_for_row(row)
|
||||||
return "is" if int(row.get("person_count") or 0) == 1 else "are"
|
|
||||||
except (TypeError, ValueError):
|
|
||||||
return "are"
|
|
||||||
|
|
||||||
|
|
||||||
def _detail_allows(level: str, dense_only: bool = False) -> bool:
|
def _detail_allows(level: str, dense_only: bool = False) -> bool:
|
||||||
return caption_policy.detail_allows(level, dense_only=dense_only)
|
return caption_text_policy.detail_allows(level, dense_only=dense_only)
|
||||||
|
|
||||||
|
|
||||||
def _caption_metadata_route_dependencies() -> caption_metadata_routes.CaptionMetadataRouteDependencies:
|
def _caption_metadata_route_dependencies() -> caption_metadata_routes.CaptionMetadataRouteDependencies:
|
||||||
return caption_metadata_routes.CaptionMetadataRouteDependencies(
|
return caption_text_policy.metadata_route_dependencies(_metadata_to_prose)
|
||||||
item_labels=ITEM_LABELS,
|
|
||||||
clean_text=_clean_text,
|
|
||||||
row_value=_row_value,
|
|
||||||
field_row_value=lambda row, key: _row_value(row, key),
|
|
||||||
clean_clothing=_clean_clothing,
|
|
||||||
normalize_composition=_normalize_composition,
|
|
||||||
expression_disabled=_expression_disabled,
|
|
||||||
detail_allows=_detail_allows,
|
|
||||||
join_sentences=_join_sentences,
|
|
||||||
human_join=_human_join,
|
|
||||||
article=_article,
|
|
||||||
cap_first=_cap_first,
|
|
||||||
body_phrase=_body_phrase,
|
|
||||||
single_caption_front=_single_caption_front,
|
|
||||||
pose_clause=_pose_clause,
|
|
||||||
age_subject=_age_subject,
|
|
||||||
clean_age_phrase=_clean_age_phrase,
|
|
||||||
subject_phrase_from_counts=_subject_phrase_from_counts,
|
|
||||||
verb_for_row=_verb_for_row,
|
|
||||||
metadata_action_label=_metadata_action_label,
|
|
||||||
natural_cast_descriptor_text=_natural_cast_descriptor_text,
|
|
||||||
cast_labels=_cast_labels,
|
|
||||||
natural_label_text=_natural_label_text,
|
|
||||||
metadata_to_prose=_metadata_to_prose,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _caption_metadata_route_request(
|
def _caption_metadata_route_request(
|
||||||
|
|||||||
@@ -0,0 +1,304 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Any, Callable
|
||||||
|
|
||||||
|
try:
|
||||||
|
from . import caption_metadata_routes
|
||||||
|
from . import caption_policy
|
||||||
|
from . import formatter_input as input_policy
|
||||||
|
from . import krea_cast as cast_policy
|
||||||
|
from . import route_metadata as route_metadata_policy
|
||||||
|
except ImportError: # Allows local smoke tests with `python -c`.
|
||||||
|
import caption_metadata_routes
|
||||||
|
import caption_policy
|
||||||
|
import formatter_input as input_policy
|
||||||
|
import krea_cast as cast_policy
|
||||||
|
import route_metadata as route_metadata_policy
|
||||||
|
|
||||||
|
|
||||||
|
OLD_TRIGGER = caption_policy.OLD_TRIGGER
|
||||||
|
DEFAULT_TRIGGER = caption_policy.DEFAULT_TRIGGER
|
||||||
|
PROMPT_FIELD_LABELS = input_policy.prompt_field_labels()
|
||||||
|
ITEM_LABELS = caption_policy.ITEM_LABELS
|
||||||
|
|
||||||
|
|
||||||
|
def clean_text(value: Any) -> str:
|
||||||
|
return input_policy.clean_text(value)
|
||||||
|
|
||||||
|
|
||||||
|
def is_false(value: Any) -> bool:
|
||||||
|
if isinstance(value, bool):
|
||||||
|
return value is False
|
||||||
|
if isinstance(value, str):
|
||||||
|
return value.strip().lower() in ("false", "0", "no", "off")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def expression_disabled(row: dict[str, Any]) -> bool:
|
||||||
|
return bool(row.get("expression_disabled")) or is_false(row.get("expression_enabled", True))
|
||||||
|
|
||||||
|
|
||||||
|
def cap_first(text: str) -> str:
|
||||||
|
text = clean_text(text).strip(" ,")
|
||||||
|
return text[:1].upper() + text[1:] if text else ""
|
||||||
|
|
||||||
|
|
||||||
|
def article(noun_phrase: str) -> str:
|
||||||
|
word = noun_phrase.lstrip().lower()
|
||||||
|
if word.startswith("hour") or word[:1] in "aeiou":
|
||||||
|
return "an"
|
||||||
|
return "a"
|
||||||
|
|
||||||
|
|
||||||
|
def sentence(text: str) -> str:
|
||||||
|
text = clean_text(text).strip(" ,;")
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
if text[-1] not in ".!?":
|
||||||
|
text += "."
|
||||||
|
return cap_first(text)
|
||||||
|
|
||||||
|
|
||||||
|
def join_sentences(parts: list[str]) -> str:
|
||||||
|
return " ".join(part for part in (sentence(part) for part in parts) if part)
|
||||||
|
|
||||||
|
|
||||||
|
def formatter_hint_parts(row: dict[str, Any]) -> list[str]:
|
||||||
|
hints: list[str] = []
|
||||||
|
if not isinstance(row, dict):
|
||||||
|
return hints
|
||||||
|
for hint in route_metadata_policy.row_formatter_hints(row, "caption"):
|
||||||
|
hint = clean_text(hint).strip(" .")
|
||||||
|
if hint and hint not in hints:
|
||||||
|
hints.append(hint)
|
||||||
|
return hints
|
||||||
|
|
||||||
|
|
||||||
|
def append_formatter_hints(prose: str, row: dict[str, Any]) -> str:
|
||||||
|
hints = formatter_hint_parts(row)
|
||||||
|
if not hints:
|
||||||
|
return prose
|
||||||
|
return join_sentences([prose, *hints])
|
||||||
|
|
||||||
|
|
||||||
|
def human_join(parts: list[str]) -> str:
|
||||||
|
parts = [part for part in (clean_text(part) for part in parts) if part]
|
||||||
|
if len(parts) <= 1:
|
||||||
|
return "".join(parts)
|
||||||
|
if len(parts) == 2:
|
||||||
|
return f"{parts[0]} and {parts[1]}"
|
||||||
|
return f"{', '.join(parts[:-1])}, and {parts[-1]}"
|
||||||
|
|
||||||
|
|
||||||
|
def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
|
||||||
|
return caption_policy.metadata_action_label(row, default)
|
||||||
|
|
||||||
|
|
||||||
|
def prompt_cast_descriptors(text: str) -> str:
|
||||||
|
return cast_policy.prompt_cast_descriptors(text)
|
||||||
|
|
||||||
|
|
||||||
|
def cast_entries(text: str) -> list[tuple[str, str]]:
|
||||||
|
return cast_policy.cast_entries(text)
|
||||||
|
|
||||||
|
|
||||||
|
def natural_cast_descriptor_text(text: str) -> str:
|
||||||
|
return cast_policy.natural_cast_descriptor_text(text)
|
||||||
|
|
||||||
|
|
||||||
|
def cast_labels(text: str) -> list[str]:
|
||||||
|
return cast_policy.cast_labels(text)
|
||||||
|
|
||||||
|
|
||||||
|
def natural_label_text(text: Any, labels: list[str]) -> str:
|
||||||
|
return cast_policy.natural_label_text(text, labels, capitalize_sentence_starts=False)
|
||||||
|
|
||||||
|
|
||||||
|
def strip_style_tail(text: str) -> str:
|
||||||
|
return caption_policy.strip_style_tail(text)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_trigger(text: str, trigger: str) -> str:
|
||||||
|
return input_policy.strip_trigger_prefix(
|
||||||
|
text,
|
||||||
|
(trigger, OLD_TRIGGER, DEFAULT_TRIGGER),
|
||||||
|
remove_exact=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def with_trigger(text: str, trigger: str, include_trigger: bool) -> str:
|
||||||
|
text = join_sentences([text]) if "." not in text else clean_text(text)
|
||||||
|
trigger = clean_text(trigger or DEFAULT_TRIGGER)
|
||||||
|
if not include_trigger or not trigger:
|
||||||
|
return text
|
||||||
|
if text.lower().startswith(trigger.lower() + "."):
|
||||||
|
return text
|
||||||
|
return f"{trigger}. {text}"
|
||||||
|
|
||||||
|
|
||||||
|
def prompt_field(text: str, label: str) -> str:
|
||||||
|
return input_policy.prompt_field(text, label, field_labels=PROMPT_FIELD_LABELS)
|
||||||
|
|
||||||
|
|
||||||
|
def row_value(row: dict[str, Any], key: str, labels: tuple[str, ...] = ()) -> str:
|
||||||
|
return input_policy.row_value(row, key, labels, field_labels=PROMPT_FIELD_LABELS)
|
||||||
|
|
||||||
|
|
||||||
|
def field_row_value(row: dict[str, Any], key: str) -> str:
|
||||||
|
return row_value(row, key)
|
||||||
|
|
||||||
|
|
||||||
|
def field_from_any_prompt(text: str, labels: tuple[str, ...]) -> str:
|
||||||
|
for label in labels:
|
||||||
|
value = input_policy.prompt_field(text, label, field_labels=PROMPT_FIELD_LABELS)
|
||||||
|
if value:
|
||||||
|
return value
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_composition(text: str) -> str:
|
||||||
|
return caption_policy.normalize_composition(text)
|
||||||
|
|
||||||
|
|
||||||
|
def clean_clothing(text: str) -> str:
|
||||||
|
return caption_policy.clean_clothing(text)
|
||||||
|
|
||||||
|
|
||||||
|
def body_phrase(body: Any, figure_note: Any = "") -> str:
|
||||||
|
body = clean_text(body)
|
||||||
|
figure_note = clean_text(figure_note)
|
||||||
|
if not body:
|
||||||
|
return figure_note
|
||||||
|
if not figure_note:
|
||||||
|
return f"{body} figure"
|
||||||
|
if "figure" in figure_note.lower():
|
||||||
|
return f"{body} build and {figure_note}"
|
||||||
|
return f"{body} figure with {figure_note}"
|
||||||
|
|
||||||
|
|
||||||
|
def single_caption_front(row: dict[str, Any]) -> dict[str, str]:
|
||||||
|
caption = clean_text(row.get("caption"))
|
||||||
|
if not caption:
|
||||||
|
return {}
|
||||||
|
caption = remove_trigger(strip_style_tail(caption), clean_text(row.get("trigger")) or DEFAULT_TRIGGER)
|
||||||
|
caption = remove_trigger(caption, OLD_TRIGGER)
|
||||||
|
subject = clean_text(row.get("primary_subject"))
|
||||||
|
age = clean_text(row.get("age_band") or row.get("age"))
|
||||||
|
phrase = clean_text(row.get("body_phrase"))
|
||||||
|
if not phrase:
|
||||||
|
body = clean_text(row.get("body_type") or row.get("body"))
|
||||||
|
figure = clean_text(row.get("figure"))
|
||||||
|
phrase = body_phrase(body, figure)
|
||||||
|
front = f"{subject}, {age}, {phrase}, "
|
||||||
|
if subject in ("woman", "man") and age and phrase and caption.startswith(front):
|
||||||
|
try:
|
||||||
|
skin, hair, eyes, _rest = caption[len(front) :].split(", ", 3)
|
||||||
|
except ValueError:
|
||||||
|
return {}
|
||||||
|
else:
|
||||||
|
pieces = [piece.strip() for piece in caption.split(", ", 6)]
|
||||||
|
if len(pieces) < 7:
|
||||||
|
return {}
|
||||||
|
subject, age, phrase, skin, hair, eyes, _rest = pieces
|
||||||
|
if subject not in ("woman", "man"):
|
||||||
|
return {}
|
||||||
|
return {
|
||||||
|
"caption_subject": subject,
|
||||||
|
"caption_age": age,
|
||||||
|
"caption_body_phrase": phrase,
|
||||||
|
"caption_skin": skin,
|
||||||
|
"caption_hair": hair,
|
||||||
|
"caption_eyes": eyes,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def pose_clause(pose: str) -> str:
|
||||||
|
pose = clean_text(pose)
|
||||||
|
if not pose:
|
||||||
|
return ""
|
||||||
|
first = pose.split(None, 1)[0].lower()
|
||||||
|
if first.endswith("ing") or first in ("seated", "reclined", "posed"):
|
||||||
|
return pose
|
||||||
|
return f"posing in {pose}"
|
||||||
|
|
||||||
|
|
||||||
|
def age_subject(age: str, subject: str) -> str:
|
||||||
|
age = clean_text(age)
|
||||||
|
subject = clean_text(subject) or "person"
|
||||||
|
if not age:
|
||||||
|
return f"An adult {subject}"
|
||||||
|
clean_age = re.sub(r"\s+adults?$", "", age).strip()
|
||||||
|
if "year-old" in clean_age:
|
||||||
|
return f"A {clean_age} adult {subject}"
|
||||||
|
if re.search(r"\d", clean_age):
|
||||||
|
poss = "her" if subject == "woman" else "his"
|
||||||
|
return f"An adult {subject} in {poss} {clean_age}"
|
||||||
|
return f"An adult {clean_age} {subject}"
|
||||||
|
|
||||||
|
|
||||||
|
def clean_age_phrase(age: str) -> str:
|
||||||
|
age = clean_text(age)
|
||||||
|
age = re.sub(r"\s+adults?$", "", age).strip()
|
||||||
|
return age.replace("-year-old", " years old")
|
||||||
|
|
||||||
|
|
||||||
|
def subject_phrase_from_counts(row: dict[str, Any]) -> str:
|
||||||
|
subject = clean_text(row.get("subject_phrase"))
|
||||||
|
if subject:
|
||||||
|
return subject
|
||||||
|
try:
|
||||||
|
women = int(row.get("women_count") or 0)
|
||||||
|
men = int(row.get("men_count") or 0)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return clean_text(row.get("primary_subject")) or "adult scene"
|
||||||
|
parts = []
|
||||||
|
if women:
|
||||||
|
parts.append(f"{women} adult {'woman' if women == 1 else 'women'}")
|
||||||
|
if men:
|
||||||
|
parts.append(f"{men} adult {'man' if men == 1 else 'men'}")
|
||||||
|
if not parts:
|
||||||
|
return clean_text(row.get("primary_subject")) or "adult scene"
|
||||||
|
return " and ".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def verb_for_row(row: dict[str, Any]) -> str:
|
||||||
|
try:
|
||||||
|
return "is" if int(row.get("person_count") or 0) == 1 else "are"
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
return "are"
|
||||||
|
|
||||||
|
|
||||||
|
def detail_allows(level: str, dense_only: bool = False) -> bool:
|
||||||
|
return caption_policy.detail_allows(level, dense_only=dense_only)
|
||||||
|
|
||||||
|
|
||||||
|
def metadata_route_dependencies(
|
||||||
|
metadata_to_prose: Callable[[dict[str, Any], str, bool], tuple[str, str]],
|
||||||
|
) -> caption_metadata_routes.CaptionMetadataRouteDependencies:
|
||||||
|
return caption_metadata_routes.CaptionMetadataRouteDependencies(
|
||||||
|
item_labels=ITEM_LABELS,
|
||||||
|
clean_text=clean_text,
|
||||||
|
row_value=row_value,
|
||||||
|
field_row_value=field_row_value,
|
||||||
|
clean_clothing=clean_clothing,
|
||||||
|
normalize_composition=normalize_composition,
|
||||||
|
expression_disabled=expression_disabled,
|
||||||
|
detail_allows=detail_allows,
|
||||||
|
join_sentences=join_sentences,
|
||||||
|
human_join=human_join,
|
||||||
|
article=article,
|
||||||
|
cap_first=cap_first,
|
||||||
|
body_phrase=body_phrase,
|
||||||
|
single_caption_front=single_caption_front,
|
||||||
|
pose_clause=pose_clause,
|
||||||
|
age_subject=age_subject,
|
||||||
|
clean_age_phrase=clean_age_phrase,
|
||||||
|
subject_phrase_from_counts=subject_phrase_from_counts,
|
||||||
|
verb_for_row=verb_for_row,
|
||||||
|
metadata_action_label=metadata_action_label,
|
||||||
|
natural_cast_descriptor_text=natural_cast_descriptor_text,
|
||||||
|
cast_labels=cast_labels,
|
||||||
|
natural_label_text=natural_label_text,
|
||||||
|
metadata_to_prose=metadata_to_prose,
|
||||||
|
)
|
||||||
@@ -443,6 +443,10 @@ Already isolated:
|
|||||||
`CaptionMetadataRouteRequest`, `CaptionMetadataRouteDependencies`, and
|
`CaptionMetadataRouteRequest`, `CaptionMetadataRouteDependencies`, and
|
||||||
`CaptionMetadataRoute`; `caption_naturalizer.py` keeps compatibility wrappers,
|
`CaptionMetadataRoute`; `caption_naturalizer.py` keeps compatibility wrappers,
|
||||||
profile handling, trigger behavior, and text fallback.
|
profile handling, trigger behavior, and text fallback.
|
||||||
|
- `caption_text_policy.py` owns caption sentence helpers, trigger wrapping,
|
||||||
|
formatter-hint append, row-value fallback wrappers, cast text wrappers,
|
||||||
|
single-caption front parsing, route dependency assembly, and caption metadata
|
||||||
|
helper callbacks used by `caption_metadata_routes.py`.
|
||||||
- metadata-family action labels from `action_family` and `position_family` via
|
- metadata-family action labels from `action_family` and `position_family` via
|
||||||
`caption_policy.py`.
|
`caption_policy.py`.
|
||||||
- shared row route metadata reads from `route_metadata.py`.
|
- shared row route metadata reads from `route_metadata.py`.
|
||||||
|
|||||||
@@ -21,9 +21,13 @@ When a result is wrong, first identify which layer owns the bad text:
|
|||||||
|
|
||||||
- Raw builder prompt already wrong: edit `prompt_builder.py` or the relevant
|
- Raw builder prompt already wrong: edit `prompt_builder.py` or the relevant
|
||||||
`categories/*.json` pool/template.
|
`categories/*.json` pool/template.
|
||||||
- Raw builder prompt acceptable, Krea2 output wrong: edit `krea_formatter.py`.
|
- Raw builder prompt acceptable, Krea2 output wrong: inspect `krea_formatter.py`
|
||||||
- Raw builder prompt acceptable, SDXL tags wrong: edit `sdxl_formatter.py`.
|
orchestration, then the owning Krea route/policy helper.
|
||||||
- Natural caption/training caption wrong: edit `caption_naturalizer.py`.
|
- Raw builder prompt acceptable, SDXL tags wrong: inspect `sdxl_formatter.py`
|
||||||
|
orchestration, then `sdxl_tag_policy.py` and `sdxl_tag_routes.py`.
|
||||||
|
- Natural caption/training caption wrong: inspect `caption_naturalizer.py`
|
||||||
|
orchestration, then `caption_text_policy.py`, `caption_policy.py`, and
|
||||||
|
`caption_metadata_routes.py`.
|
||||||
- UI/preview/loop behavior wrong: edit `__init__.py`, node family modules such
|
- UI/preview/loop behavior wrong: edit `__init__.py`, node family modules such
|
||||||
as `node_builder.py`, `node_seed_resolution.py`, `node_camera.py`,
|
as `node_builder.py`, `node_seed_resolution.py`, `node_camera.py`,
|
||||||
`node_character.py`, `node_hardcore_position.py`, `node_formatter.py`,
|
`node_character.py`, `node_hardcore_position.py`, `node_formatter.py`,
|
||||||
@@ -125,6 +129,7 @@ Core helper ownership:
|
|||||||
| `sdxl_presets.py` | SDXL formatter profiles, style presets, quality presets, default negative prompt, and metadata-family tag hints used by the SDXL formatter and node choice lists. |
|
| `sdxl_presets.py` | SDXL formatter profiles, style presets, quality presets, default negative prompt, and metadata-family tag hints used by the SDXL formatter and node choice lists. |
|
||||||
| `sdxl_tag_policy.py` | SDXL tag splitting, tag-key dedupe, count inference, character descriptor tags, metadata-family/camera/explicit helper tags, and route dependency assembly used by `sdxl_formatter.py` and `sdxl_tag_routes.py`. |
|
| `sdxl_tag_policy.py` | SDXL tag splitting, tag-key dedupe, count inference, character descriptor tags, metadata-family/camera/explicit helper tags, and route dependency assembly used by `sdxl_formatter.py` and `sdxl_tag_routes.py`. |
|
||||||
| `caption_policy.py` | Caption naturalizer policy data and helpers: caption profiles, style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. |
|
| `caption_policy.py` | Caption naturalizer policy data and helpers: caption profiles, style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. |
|
||||||
|
| `caption_text_policy.py` | Caption sentence helpers, trigger wrapping, formatter-hint append, row-value fallback wrappers, cast text wrappers, single-caption front parsing, and metadata-route dependency assembly used by `caption_naturalizer.py` and `caption_metadata_routes.py`. |
|
||||||
|
|
||||||
## Node IO Map
|
## Node IO Map
|
||||||
|
|
||||||
@@ -332,7 +337,8 @@ Edit targets:
|
|||||||
- Hardcore-specific expressions: usually `categories/sexual_poses.json` or named
|
- Hardcore-specific expressions: usually `categories/sexual_poses.json` or named
|
||||||
hardcore expression pools.
|
hardcore expression pools.
|
||||||
- Character-level expression settings: slot config and `row_expression.py`.
|
- Character-level expression settings: slot config and `row_expression.py`.
|
||||||
- Formatter expression wording: `krea_formatter.py` or `caption_naturalizer.py`.
|
- Formatter expression wording: Krea route helpers, or `caption_text_policy.py`
|
||||||
|
and `caption_metadata_routes.py` for natural captions.
|
||||||
|
|
||||||
### Pose / Action
|
### Pose / Action
|
||||||
|
|
||||||
@@ -742,9 +748,9 @@ Naturalizer field consumption:
|
|||||||
| Branch | Reads most from | Key functions |
|
| Branch | Reads most from | Key functions |
|
||||||
| --- | --- | --- |
|
| --- | --- | --- |
|
||||||
| Normal single/couple/group | subject fields, age/body, item, scene, expression, composition, camera scene | `caption_metadata_routes.single_from_row_result`, `caption_metadata_routes.couple_from_row_result`, `caption_metadata_routes.group_or_layout_from_row_result` |
|
| Normal single/couple/group | subject fields, age/body, item, scene, expression, composition, camera scene | `caption_metadata_routes.single_from_row_result`, `caption_metadata_routes.couple_from_row_result`, `caption_metadata_routes.group_or_layout_from_row_result` |
|
||||||
| Configured cast/hardcore | `cast_descriptor_text`, `action_family`, `position_family`, `role_graph`, `item`, `scene_text`, expression, composition | `caption_metadata_routes.configured_cast_from_row_result`, `_metadata_action_label` |
|
| Configured cast/hardcore | `cast_descriptor_text`, `action_family`, `position_family`, `role_graph`, `item`, `scene_text`, expression, composition | `caption_metadata_routes.configured_cast_from_row_result`, `caption_text_policy.metadata_action_label` |
|
||||||
| Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity | `caption_metadata_routes.insta_of_pair_from_row_result` |
|
| Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity | `caption_metadata_routes.insta_of_pair_from_row_result` |
|
||||||
| Text fallback | `caption` or `prompt` text | `_text_to_prose` |
|
| Text fallback | `caption` or `prompt` text | `caption_naturalizer._text_to_prose`, with sentence helpers delegated to `caption_text_policy.py` |
|
||||||
|
|
||||||
### Final Text Hygiene
|
### Final Text Hygiene
|
||||||
|
|
||||||
|
|||||||
@@ -27,6 +27,7 @@ if str(ROOT) not in sys.path:
|
|||||||
import caption_naturalizer # noqa: E402
|
import caption_naturalizer # noqa: E402
|
||||||
import caption_metadata_routes # noqa: E402
|
import caption_metadata_routes # noqa: E402
|
||||||
import caption_policy # noqa: E402
|
import caption_policy # noqa: E402
|
||||||
|
import caption_text_policy # noqa: E402
|
||||||
import cast_context # noqa: E402
|
import cast_context # noqa: E402
|
||||||
import category_extensions # noqa: E402
|
import category_extensions # noqa: E402
|
||||||
import category_template_metadata # noqa: E402
|
import category_template_metadata # noqa: E402
|
||||||
@@ -2296,6 +2297,44 @@ def smoke_caption_policy() -> None:
|
|||||||
_expect(browsing_method == "text(fallback)", "Caption browsing profile changed fallback method")
|
_expect(browsing_method == "text(fallback)", "Caption browsing profile changed fallback method")
|
||||||
|
|
||||||
|
|
||||||
|
def smoke_caption_text_policy() -> None:
|
||||||
|
row = {
|
||||||
|
"primary_subject": "woman",
|
||||||
|
"age_band": "25-year-old adult",
|
||||||
|
"body_phrase": "slim figure",
|
||||||
|
"caption": f"{Trigger}, woman, 25-year-old adult, slim figure, fair skin, blonde hair, blue eyes, studio",
|
||||||
|
"formatter_hints": {"caption": ["caption policy hint"]},
|
||||||
|
}
|
||||||
|
_expect(
|
||||||
|
caption_naturalizer._body_phrase("slim", "balanced figure") == caption_text_policy.body_phrase("slim", "balanced figure"),
|
||||||
|
"Caption body phrase wrapper should delegate to caption_text_policy",
|
||||||
|
)
|
||||||
|
_expect(
|
||||||
|
caption_naturalizer._single_caption_front(row) == caption_text_policy.single_caption_front(row),
|
||||||
|
"Caption front parser wrapper should delegate to caption_text_policy",
|
||||||
|
)
|
||||||
|
_expect(
|
||||||
|
caption_naturalizer._formatter_hint_parts(row) == caption_text_policy.formatter_hint_parts(row),
|
||||||
|
"Caption formatter hint wrapper should delegate to caption_text_policy",
|
||||||
|
)
|
||||||
|
_expect(
|
||||||
|
caption_naturalizer._append_formatter_hints("Base sentence.", row)
|
||||||
|
== caption_text_policy.append_formatter_hints("Base sentence.", row),
|
||||||
|
"Caption formatter hint append wrapper should delegate to caption_text_policy",
|
||||||
|
)
|
||||||
|
_expect(
|
||||||
|
caption_naturalizer._with_trigger("A caption body", Trigger, True)
|
||||||
|
== caption_text_policy.with_trigger("A caption body", Trigger, True),
|
||||||
|
"Caption trigger wrapper should delegate to caption_text_policy",
|
||||||
|
)
|
||||||
|
deps = caption_naturalizer._caption_metadata_route_dependencies()
|
||||||
|
_expect(deps.clean_text is caption_text_policy.clean_text, "Caption route deps lost clean text policy")
|
||||||
|
_expect(deps.field_row_value is caption_text_policy.field_row_value, "Caption route deps lost field row-value policy")
|
||||||
|
_expect(deps.expression_disabled is caption_text_policy.expression_disabled, "Caption route deps lost expression policy")
|
||||||
|
_expect(deps.single_caption_front is caption_text_policy.single_caption_front, "Caption route deps lost front parser")
|
||||||
|
_expect(deps.metadata_to_prose is caption_naturalizer._metadata_to_prose, "Caption route deps lost metadata recursion callback")
|
||||||
|
|
||||||
|
|
||||||
def _expect_caption_route_parity(
|
def _expect_caption_route_parity(
|
||||||
name: str,
|
name: str,
|
||||||
row: dict[str, Any],
|
row: dict[str, Any],
|
||||||
@@ -5349,6 +5388,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
|
|||||||
("formatter_input_policy", smoke_formatter_input_policy),
|
("formatter_input_policy", smoke_formatter_input_policy),
|
||||||
("formatter_cast_policy", smoke_formatter_cast_policy),
|
("formatter_cast_policy", smoke_formatter_cast_policy),
|
||||||
("caption_policy", smoke_caption_policy),
|
("caption_policy", smoke_caption_policy),
|
||||||
|
("caption_text_policy", smoke_caption_text_policy),
|
||||||
("caption_metadata_routes", smoke_caption_metadata_routes),
|
("caption_metadata_routes", smoke_caption_metadata_routes),
|
||||||
("sdxl_presets_policy", smoke_sdxl_presets_policy),
|
("sdxl_presets_policy", smoke_sdxl_presets_policy),
|
||||||
("sdxl_tag_policy", smoke_sdxl_tag_policy),
|
("sdxl_tag_policy", smoke_sdxl_tag_policy),
|
||||||
|
|||||||
Reference in New Issue
Block a user