Naturalize caption character expressions

This commit is contained in:
2026-06-27 21:30:23 +02:00
parent 4de00bcc9d
commit 4f97057fc4
3 changed files with 77 additions and 5 deletions
+40
View File
@@ -94,6 +94,26 @@ def couple_subject_sentence(
return f"{subject} are adults"
def expression_detail(expression: Any, clean_text: Callable[[Any], str]) -> tuple[str, bool]:
text = clean_text(expression)
if not text:
return "", False
has_character_labels = bool(
re.search(
r"\b(?:Woman|Man) [A-Z] has\b|\bthe (?:woman|man) has\b",
text,
flags=re.IGNORECASE,
)
)
text = re.sub(
r"\b((?:Woman|Man) [A-Z]|the (?:woman|man)) has\b",
r"\1 with",
text,
flags=re.IGNORECASE,
)
return text, has_character_labels
def single_from_row_result(
request: CaptionMetadataRouteRequest,
deps: CaptionMetadataRouteDependencies,
@@ -148,6 +168,10 @@ def single_from_row_result(
if pose:
parts.append(f"{pronoun(subject)} is {deps.pose_clause(pose)}")
if expression:
expression, labeled_expression = expression_detail(expression, deps.clean_text)
if labeled_expression:
parts.append(f"The expression detail shows {expression}")
else:
parts.append(f"{possessive_pronoun(subject)} expression is {expression}")
if scene:
parts.append(f"The setting is {scene}")
@@ -204,6 +228,10 @@ def couple_from_row_result(
if deps.detail_allows(detail_level) and camera_scene:
parts.append(camera_scene)
if expression:
expression, labeled_expression = expression_detail(expression, deps.clean_text)
if labeled_expression:
parts.append(f"The expression details show {expression}")
else:
parts.append(f"Their expressions are {expression}")
if deps.detail_allows(detail_level) and composition:
parts.append(f"The composition is {composition}")
@@ -258,6 +286,10 @@ def configured_cast_from_row_result(
if scene:
scene_bits.append(f"set in {scene}")
if expression:
expression, labeled_expression = expression_detail(expression, deps.clean_text)
if labeled_expression:
scene_bits.append(f"with expression details showing {expression}")
else:
scene_bits.append(f"with {expression}")
if composition:
scene_bits.append(f"framed as {composition}")
@@ -299,6 +331,10 @@ def group_or_layout_from_row_result(
if primary == "layout scene":
parts = [f"{deps.cap_first(subject)} is arranged as an adults-only designed illustration layout"]
if expression:
expression, labeled_expression = expression_detail(expression, deps.clean_text)
if labeled_expression:
parts.append(f"The featured expression details show {expression}")
else:
parts.append(f"The featured expression is {expression}")
else:
parts = [f"{deps.cap_first(subject)} includes adults"]
@@ -307,6 +343,10 @@ def group_or_layout_from_row_result(
if item:
parts.append(f"They wear {item}")
if expression:
expression, labeled_expression = expression_detail(expression, deps.clean_text)
if labeled_expression:
parts.append(f"Their expressions show {expression}")
else:
parts.append(f"They show {expression}")
if scene:
parts.append(f"The setting is {scene}")
+11
View File
@@ -526,6 +526,16 @@ def _caption_cast_descriptor_issues(name: str, row: dict[str, Any] | None, capti
return []
def _caption_expression_grammar_issues(name: str, caption_text: str) -> list[str]:
if re.search(
r"\b(?:with|are|show|shows|is|include|includes)\s+(?:woman|man) [a-z]\s+has\b",
caption_text,
flags=re.IGNORECASE,
):
return [f"{name}.caption: character_expression_has_grammar"]
return []
def _trace_dict(formatter_name: str, payload: dict[str, Any]) -> tuple[dict[str, Any], str]:
trace_text = str(payload.get("route_trace_json") or "")
if not trace_text:
@@ -659,6 +669,7 @@ def _formatter_issues(
issues.append(f"{name}.{formatter_name}: not_metadata_route:{method}")
issues.extend(_formatter_trace_issues(name, formats, target=target, row=row))
issues.extend(_caption_cast_descriptor_issues(name, row, caption_text))
issues.extend(_caption_expression_grammar_issues(name, caption_text))
for label, value in (
(f"{name}.krea_negative", krea.get("negative_prompt")),
+21
View File
@@ -4262,6 +4262,27 @@ def smoke_caption_metadata_routes() -> None:
caption_naturalizer._configured_cast_from_row,
"metadata(configured_cast)",
)
configured_character_expression = _fixture_hardcore_row(
character_expression_text="Woman A has flushed focus; Man A has concentrated stare",
)
character_expression_route = caption_metadata_routes.configured_cast_from_row_result(
caption_naturalizer._caption_metadata_route_request(configured_character_expression, "balanced", False),
caption_naturalizer._caption_metadata_route_dependencies(),
)
_expect(character_expression_route is not None, "Caption configured-cast character expression row did not match")
assert character_expression_route is not None
_expect(
"with Woman A has" not in character_expression_route.prose,
"Caption configured-cast prose kept old character-expression grammar",
)
_expect(
"Woman A with flushed focus" in character_expression_route.prose,
"Caption configured-cast prose did not naturalize Woman A expression",
)
_expect(
"Man A with concentrated stare" in character_expression_route.prose,
"Caption configured-cast prose did not naturalize Man A expression",
)
configured_axis_only = _fixture_hardcore_row(
item="generic configured adult action",
role_graph="",