From 4f97057fc48308d596cacfe6ce905296dd49168b Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sat, 27 Jun 2026 21:30:23 +0200 Subject: [PATCH] Naturalize caption character expressions --- caption_metadata_routes.py | 50 ++++++++++++++++++++++++++++---- tools/prompt_route_simulation.py | 11 +++++++ tools/prompt_smoke.py | 21 ++++++++++++++ 3 files changed, 77 insertions(+), 5 deletions(-) diff --git a/caption_metadata_routes.py b/caption_metadata_routes.py index c716c86..b0dc4fb 100644 --- a/caption_metadata_routes.py +++ b/caption_metadata_routes.py @@ -94,6 +94,26 @@ def couple_subject_sentence( return f"{subject} are adults" +def expression_detail(expression: Any, clean_text: Callable[[Any], str]) -> tuple[str, bool]: + text = clean_text(expression) + if not text: + return "", False + has_character_labels = bool( + re.search( + r"\b(?:Woman|Man) [A-Z] has\b|\bthe (?:woman|man) has\b", + text, + flags=re.IGNORECASE, + ) + ) + text = re.sub( + r"\b((?:Woman|Man) [A-Z]|the (?:woman|man)) has\b", + r"\1 with", + text, + flags=re.IGNORECASE, + ) + return text, has_character_labels + + def single_from_row_result( request: CaptionMetadataRouteRequest, deps: CaptionMetadataRouteDependencies, @@ -148,7 +168,11 @@ def single_from_row_result( if pose: parts.append(f"{pronoun(subject)} is {deps.pose_clause(pose)}") if expression: - parts.append(f"{possessive_pronoun(subject)} expression is {expression}") + expression, labeled_expression = expression_detail(expression, deps.clean_text) + if labeled_expression: + parts.append(f"The expression detail shows {expression}") + else: + parts.append(f"{possessive_pronoun(subject)} expression is {expression}") if scene: parts.append(f"The setting is {scene}") if deps.detail_allows(detail_level) and camera_scene: @@ -204,7 +228,11 @@ def couple_from_row_result( if deps.detail_allows(detail_level) and camera_scene: parts.append(camera_scene) if expression: - parts.append(f"Their expressions are {expression}") + expression, labeled_expression = expression_detail(expression, deps.clean_text) + if labeled_expression: + parts.append(f"The expression details show {expression}") + else: + parts.append(f"Their expressions are {expression}") if deps.detail_allows(detail_level) and composition: parts.append(f"The composition is {composition}") if keep_style and style: @@ -258,7 +286,11 @@ def configured_cast_from_row_result( if scene: scene_bits.append(f"set in {scene}") if expression: - scene_bits.append(f"with {expression}") + expression, labeled_expression = expression_detail(expression, deps.clean_text) + if labeled_expression: + scene_bits.append(f"with expression details showing {expression}") + else: + scene_bits.append(f"with {expression}") if composition: scene_bits.append(f"framed as {composition}") if scene_bits and deps.detail_allows(detail_level): @@ -299,7 +331,11 @@ def group_or_layout_from_row_result( if primary == "layout scene": parts = [f"{deps.cap_first(subject)} is arranged as an adults-only designed illustration layout"] if expression: - parts.append(f"The featured expression is {expression}") + expression, labeled_expression = expression_detail(expression, deps.clean_text) + if labeled_expression: + parts.append(f"The featured expression details show {expression}") + else: + parts.append(f"The featured expression is {expression}") else: parts = [f"{deps.cap_first(subject)} includes adults"] if age: @@ -307,7 +343,11 @@ def group_or_layout_from_row_result( if item: parts.append(f"They wear {item}") if expression: - parts.append(f"They show {expression}") + expression, labeled_expression = expression_detail(expression, deps.clean_text) + if labeled_expression: + parts.append(f"Their expressions show {expression}") + else: + parts.append(f"They show {expression}") if scene: parts.append(f"The setting is {scene}") if deps.detail_allows(detail_level) and camera_scene: diff --git a/tools/prompt_route_simulation.py b/tools/prompt_route_simulation.py index 55a0baa..549cf8c 100644 --- a/tools/prompt_route_simulation.py +++ b/tools/prompt_route_simulation.py @@ -526,6 +526,16 @@ def _caption_cast_descriptor_issues(name: str, row: dict[str, Any] | None, capti return [] +def _caption_expression_grammar_issues(name: str, caption_text: str) -> list[str]: + if re.search( + r"\b(?:with|are|show|shows|is|include|includes)\s+(?:woman|man) [a-z]\s+has\b", + caption_text, + flags=re.IGNORECASE, + ): + return [f"{name}.caption: character_expression_has_grammar"] + return [] + + def _trace_dict(formatter_name: str, payload: dict[str, Any]) -> tuple[dict[str, Any], str]: trace_text = str(payload.get("route_trace_json") or "") if not trace_text: @@ -659,6 +669,7 @@ def _formatter_issues( issues.append(f"{name}.{formatter_name}: not_metadata_route:{method}") issues.extend(_formatter_trace_issues(name, formats, target=target, row=row)) issues.extend(_caption_cast_descriptor_issues(name, row, caption_text)) + issues.extend(_caption_expression_grammar_issues(name, caption_text)) for label, value in ( (f"{name}.krea_negative", krea.get("negative_prompt")), diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index f64b660..529b94a 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -4262,6 +4262,27 @@ def smoke_caption_metadata_routes() -> None: caption_naturalizer._configured_cast_from_row, "metadata(configured_cast)", ) + configured_character_expression = _fixture_hardcore_row( + character_expression_text="Woman A has flushed focus; Man A has concentrated stare", + ) + character_expression_route = caption_metadata_routes.configured_cast_from_row_result( + caption_naturalizer._caption_metadata_route_request(configured_character_expression, "balanced", False), + caption_naturalizer._caption_metadata_route_dependencies(), + ) + _expect(character_expression_route is not None, "Caption configured-cast character expression row did not match") + assert character_expression_route is not None + _expect( + "with Woman A has" not in character_expression_route.prose, + "Caption configured-cast prose kept old character-expression grammar", + ) + _expect( + "Woman A with flushed focus" in character_expression_route.prose, + "Caption configured-cast prose did not naturalize Woman A expression", + ) + _expect( + "Man A with concentrated stare" in character_expression_route.prose, + "Caption configured-cast prose did not naturalize Man A expression", + ) configured_axis_only = _fixture_hardcore_row( item="generic configured adult action", role_graph="",