From 8ae689f0e7701032fc1991f7d65ca98ab8ce7b1b Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sat, 27 Jun 2026 18:07:21 +0200 Subject: [PATCH] Use item axis details in captions --- caption_metadata_routes.py | 4 +++ caption_text_policy.py | 47 +++++++++++++++++++++++++++++++++ docs/prompt-pool-routing-map.md | 4 +-- tools/prompt_smoke.py | 42 +++++++++++++++++++++++++++++ 4 files changed, 95 insertions(+), 2 deletions(-) diff --git a/caption_metadata_routes.py b/caption_metadata_routes.py index 4d888d9..405fc35 100644 --- a/caption_metadata_routes.py +++ b/caption_metadata_routes.py @@ -51,6 +51,7 @@ class CaptionMetadataRouteDependencies: subject_phrase_from_counts: Callable[[dict[str, Any]], str] verb_for_row: Callable[[dict[str, Any]], str] metadata_action_label: Callable[[dict[str, Any]], str] + item_axis_detail_text: Callable[[dict[str, Any], str], str] natural_cast_descriptor_text: Callable[[str], str] cast_labels: Callable[[str], list[str]] natural_label_text: Callable[[Any, list[str]], str] @@ -227,6 +228,7 @@ def configured_cast_from_row_result( cast = deps.row_value(row, "cast_summary", ("Cast",)) role_graph = deps.row_value(row, "role_graph", ("Role graph",)) item = deps.row_value(row, "item", deps.item_labels) + axis_detail = deps.item_axis_detail_text(row, " ".join(part for part in (role_graph, item) if part)) scene = deps.row_value(row, "scene_text", ("Setting", "Scene")) expression = "" if not deps.expression_disabled(row): @@ -250,6 +252,8 @@ def configured_cast_from_row_result( parts.append(role_graph) if item: parts.append(f"The {deps.metadata_action_label(row)} is {item}") + if axis_detail: + parts.append(f"Selected action details include {axis_detail}") scene_bits = [] if scene: scene_bits.append(f"set in {scene}") diff --git a/caption_text_policy.py b/caption_text_policy.py index ca6cd39..60dfd66 100644 --- a/caption_text_policy.py +++ b/caption_text_policy.py @@ -97,6 +97,52 @@ def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> return caption_policy.metadata_action_label(row, default) +def _axis_value_texts(value: Any) -> list[str]: + if isinstance(value, str): + text = clean_text(value).strip(" .") + return [text] if text and text.lower() not in ("any", "auto", "random", "none") else [] + if isinstance(value, (int, float, bool)) or value is None: + return [] + if isinstance(value, list): + texts: list[str] = [] + for item in value: + texts.extend(_axis_value_texts(item)) + return texts + if isinstance(value, dict): + for preferred in ("text", "prompt", "template", "value", "name"): + preferred_texts = _axis_value_texts(value.get(preferred)) + if preferred_texts: + return preferred_texts + texts: list[str] = [] + for item in value.values(): + texts.extend(_axis_value_texts(item)) + return texts + return [] + + +def item_axis_detail_text(row: dict[str, Any], existing_text: str = "") -> str: + if not isinstance(row, dict): + return "" + axis_values = row.get("item_axis_values") + if not isinstance(axis_values, dict): + return "" + existing = clean_text(existing_text).lower() + details: list[str] = [] + seen: set[str] = set() + skipped_keys = {"action_family", "position_family", "position_key", "position_keys"} + for key, value in axis_values.items(): + if str(key) in skipped_keys: + continue + for text in _axis_value_texts(value): + normalized = clean_text(text).strip(" .") + lower = normalized.lower() + if not normalized or lower in seen or lower in existing: + continue + details.append(normalized) + seen.add(lower) + return human_join(details) + + def prompt_cast_descriptors(text: str) -> str: return cast_policy.prompt_cast_descriptors(text) @@ -299,6 +345,7 @@ def metadata_route_dependencies( subject_phrase_from_counts=subject_phrase_from_counts, verb_for_row=verb_for_row, metadata_action_label=metadata_action_label, + item_axis_detail_text=item_axis_detail_text, natural_cast_descriptor_text=natural_cast_descriptor_text, cast_labels=cast_labels, natural_label_text=natural_label_text, diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index 532f4ea..ce62066 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -143,7 +143,7 @@ Core helper ownership: | `sdxl_tag_policy.py` | SDXL tag splitting, tag-key dedupe, count inference, character descriptor tags, item-axis tags, metadata-family/camera/explicit helper tags, and route dependency assembly used by `sdxl_formatter.py` and `sdxl_tag_routes.py`. | | `caption_format_route.py` | Top-level caption dispatch, input-hint and target normalization, caption profile application, metadata-vs-text branching, trigger wrapping, final prose hygiene, and method/output shape. | | `caption_policy.py` | Caption naturalizer policy data and helpers: caption profiles, style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. | -| `caption_text_policy.py` | Caption sentence helpers, trigger wrapping, formatter-hint append, row-value fallback wrappers, cast text wrappers, single-caption front parsing, and metadata-route dependency assembly used by `caption_naturalizer.py` and `caption_metadata_routes.py`. | +| `caption_text_policy.py` | Caption sentence helpers, trigger wrapping, formatter-hint append, item-axis detail prose, row-value fallback wrappers, cast text wrappers, single-caption front parsing, and metadata-route dependency assembly used by `caption_naturalizer.py` and `caption_metadata_routes.py`. | ## Node IO Map @@ -838,7 +838,7 @@ Naturalizer field consumption: | Branch | Reads most from | Key functions | | --- | --- | --- | | Normal single/couple/group | subject fields, age/body, item, scene, expression, composition, camera scene | `caption_metadata_routes.single_from_row_result`, `caption_metadata_routes.couple_from_row_result`, `caption_metadata_routes.group_or_layout_from_row_result` | -| Configured cast/hardcore | `cast_descriptor_text`, `action_family`, `position_family`, `role_graph`, `item`, `scene_text`, expression, composition | `caption_metadata_routes.configured_cast_from_row_result`, `caption_text_policy.metadata_action_label` | +| Configured cast/hardcore | `cast_descriptor_text`, `action_family`, `position_family`, `role_graph`, `item`, `item_axis_values`, `scene_text`, expression, composition | `caption_metadata_routes.configured_cast_from_row_result`, `caption_text_policy.metadata_action_label`, `caption_text_policy.item_axis_detail_text` | | Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity, target | `caption_metadata_routes.insta_of_pair_from_row_result` | | Text fallback | `caption` or `prompt` text | `caption_naturalizer._text_to_prose`, with sentence helpers delegated to `caption_text_policy.py` | diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index 8dff169..b7174c6 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -4104,11 +4104,30 @@ def smoke_caption_text_policy() -> None: == caption_text_policy.with_trigger("A caption body", Trigger, True), "Caption trigger wrapper should delegate to caption_text_policy", ) + axis_detail_row = { + "item_axis_values": { + "position": "standing oral position", + "contact_detail": "mouth contact at hip height", + "duplicate": "standing oral position", + "ignored": "random", + } + } + _expect( + caption_text_policy.item_axis_detail_text(axis_detail_row, "generic action") + == "standing oral position and mouth contact at hip height", + "Caption axis detail text should flatten selected item axes", + ) + _expect( + caption_text_policy.item_axis_detail_text(axis_detail_row, "standing oral position already appears") + == "mouth contact at hip height", + "Caption axis detail text should skip details already present in item prose", + ) deps = caption_naturalizer._caption_metadata_route_dependencies() _expect(deps.clean_text is caption_text_policy.clean_text, "Caption route deps lost clean text policy") _expect(deps.field_row_value is caption_text_policy.field_row_value, "Caption route deps lost field row-value policy") _expect(deps.expression_disabled is caption_text_policy.expression_disabled, "Caption route deps lost expression policy") _expect(deps.single_caption_front is caption_text_policy.single_caption_front, "Caption route deps lost front parser") + _expect(deps.item_axis_detail_text is caption_text_policy.item_axis_detail_text, "Caption route deps lost item-axis detail policy") _expect(deps.metadata_to_prose is caption_naturalizer._metadata_to_prose, "Caption route deps lost metadata recursion callback") @@ -4185,6 +4204,29 @@ def smoke_caption_metadata_routes() -> None: caption_naturalizer._configured_cast_from_row, "metadata(configured_cast)", ) + configured_axis_only = _fixture_hardcore_row( + item="generic configured adult action", + role_graph="", + source_role_graph="", + item_axis_values={ + "position": "standing oral position", + "contact_detail": "mouth contact at hip height, hands on hips", + }, + action_family="oral", + position_family="oral", + position_key="standing", + position_keys=["standing"], + ) + axis_route = caption_metadata_routes.configured_cast_from_row_result( + caption_naturalizer._caption_metadata_route_request(configured_axis_only, "balanced", False), + caption_naturalizer._caption_metadata_route_dependencies(), + ) + _expect(axis_route is not None, "Caption configured-cast axis-only row did not match") + assert axis_route is not None + _expect("Selected action details include" in axis_route.prose, "Caption route did not emit selected axis details") + _expect("standing oral position" in axis_route.prose, "Caption route lost item-axis position detail") + _expect("mouth contact at hip height" in axis_route.prose, "Caption route lost item-axis contact detail") + _expect("hands on hips" in axis_route.prose, "Caption route lost item-axis split detail") group = { "primary_subject": "group scene",