From 8ae689f0e7701032fc1991f7d65ca98ab8ce7b1b Mon Sep 17 00:00:00 2001
From: Ethanfel <ethan.fel@ts-pc.fr>
Date: Sat, 27 Jun 2026 18:07:21 +0200
Subject: [PATCH] Use item axis details in captions

---
 caption_metadata_routes.py      |  4 +++
 caption_text_policy.py          | 47 +++++++++++++++++++++++++++++++++
 docs/prompt-pool-routing-map.md |  4 +--
 tools/prompt_smoke.py           | 42 +++++++++++++++++++++++++++++
 4 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/caption_metadata_routes.py b/caption_metadata_routes.py
index 4d888d9..405fc35 100644
--- a/caption_metadata_routes.py
+++ b/caption_metadata_routes.py
@@ -51,6 +51,7 @@ class CaptionMetadataRouteDependencies:
     subject_phrase_from_counts: Callable[[dict[str, Any]], str]
     verb_for_row: Callable[[dict[str, Any]], str]
     metadata_action_label: Callable[[dict[str, Any]], str]
+    item_axis_detail_text: Callable[[dict[str, Any], str], str]
     natural_cast_descriptor_text: Callable[[str], str]
     cast_labels: Callable[[str], list[str]]
     natural_label_text: Callable[[Any, list[str]], str]
@@ -227,6 +228,7 @@ def configured_cast_from_row_result(
     cast = deps.row_value(row, "cast_summary", ("Cast",))
     role_graph = deps.row_value(row, "role_graph", ("Role graph",))
     item = deps.row_value(row, "item", deps.item_labels)
+    axis_detail = deps.item_axis_detail_text(row, " ".join(part for part in (role_graph, item) if part))
     scene = deps.row_value(row, "scene_text", ("Setting", "Scene"))
     expression = ""
     if not deps.expression_disabled(row):
@@ -250,6 +252,8 @@ def configured_cast_from_row_result(
         parts.append(role_graph)
     if item:
         parts.append(f"The {deps.metadata_action_label(row)} is {item}")
+    if axis_detail:
+        parts.append(f"Selected action details include {axis_detail}")
     scene_bits = []
     if scene:
         scene_bits.append(f"set in {scene}")
diff --git a/caption_text_policy.py b/caption_text_policy.py
index ca6cd39..60dfd66 100644
--- a/caption_text_policy.py
+++ b/caption_text_policy.py
@@ -97,6 +97,52 @@ def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") ->
     return caption_policy.metadata_action_label(row, default)
 
 
+def _axis_value_texts(value: Any) -> list[str]:
+    if isinstance(value, str):
+        text = clean_text(value).strip(" .")
+        return [text] if text and text.lower() not in ("any", "auto", "random", "none") else []
+    if isinstance(value, (int, float, bool)) or value is None:
+        return []
+    if isinstance(value, list):
+        texts: list[str] = []
+        for item in value:
+            texts.extend(_axis_value_texts(item))
+        return texts
+    if isinstance(value, dict):
+        for preferred in ("text", "prompt", "template", "value", "name"):
+            preferred_texts = _axis_value_texts(value.get(preferred))
+            if preferred_texts:
+                return preferred_texts
+        texts: list[str] = []
+        for item in value.values():
+            texts.extend(_axis_value_texts(item))
+        return texts
+    return []
+
+
+def item_axis_detail_text(row: dict[str, Any], existing_text: str = "") -> str:
+    if not isinstance(row, dict):
+        return ""
+    axis_values = row.get("item_axis_values")
+    if not isinstance(axis_values, dict):
+        return ""
+    existing = clean_text(existing_text).lower()
+    details: list[str] = []
+    seen: set[str] = set()
+    skipped_keys = {"action_family", "position_family", "position_key", "position_keys"}
+    for key, value in axis_values.items():
+        if str(key) in skipped_keys:
+            continue
+        for text in _axis_value_texts(value):
+            normalized = clean_text(text).strip(" .")
+            lower = normalized.lower()
+            if not normalized or lower in seen or lower in existing:
+                continue
+            details.append(normalized)
+            seen.add(lower)
+    return human_join(details)
+
+
 def prompt_cast_descriptors(text: str) -> str:
     return cast_policy.prompt_cast_descriptors(text)
 
@@ -299,6 +345,7 @@ def metadata_route_dependencies(
         subject_phrase_from_counts=subject_phrase_from_counts,
         verb_for_row=verb_for_row,
         metadata_action_label=metadata_action_label,
+        item_axis_detail_text=item_axis_detail_text,
         natural_cast_descriptor_text=natural_cast_descriptor_text,
         cast_labels=cast_labels,
         natural_label_text=natural_label_text,
diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md
index 532f4ea..ce62066 100644
--- a/docs/prompt-pool-routing-map.md
+++ b/docs/prompt-pool-routing-map.md
@@ -143,7 +143,7 @@ Core helper ownership:
 | `sdxl_tag_policy.py` | SDXL tag splitting, tag-key dedupe, count inference, character descriptor tags, item-axis tags, metadata-family/camera/explicit helper tags, and route dependency assembly used by `sdxl_formatter.py` and `sdxl_tag_routes.py`. |
 | `caption_format_route.py` | Top-level caption dispatch, input-hint and target normalization, caption profile application, metadata-vs-text branching, trigger wrapping, final prose hygiene, and method/output shape. |
 | `caption_policy.py` | Caption naturalizer policy data and helpers: caption profiles, style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. |
-| `caption_text_policy.py` | Caption sentence helpers, trigger wrapping, formatter-hint append, row-value fallback wrappers, cast text wrappers, single-caption front parsing, and metadata-route dependency assembly used by `caption_naturalizer.py` and `caption_metadata_routes.py`. |
+| `caption_text_policy.py` | Caption sentence helpers, trigger wrapping, formatter-hint append, item-axis detail prose, row-value fallback wrappers, cast text wrappers, single-caption front parsing, and metadata-route dependency assembly used by `caption_naturalizer.py` and `caption_metadata_routes.py`. |
 
 ## Node IO Map
 
@@ -838,7 +838,7 @@ Naturalizer field consumption:
 | Branch | Reads most from | Key functions |
 | --- | --- | --- |
 | Normal single/couple/group | subject fields, age/body, item, scene, expression, composition, camera scene | `caption_metadata_routes.single_from_row_result`, `caption_metadata_routes.couple_from_row_result`, `caption_metadata_routes.group_or_layout_from_row_result` |
-| Configured cast/hardcore | `cast_descriptor_text`, `action_family`, `position_family`, `role_graph`, `item`, `scene_text`, expression, composition | `caption_metadata_routes.configured_cast_from_row_result`, `caption_text_policy.metadata_action_label` |
+| Configured cast/hardcore | `cast_descriptor_text`, `action_family`, `position_family`, `role_graph`, `item`, `item_axis_values`, `scene_text`, expression, composition | `caption_metadata_routes.configured_cast_from_row_result`, `caption_text_policy.metadata_action_label`, `caption_text_policy.item_axis_detail_text` |
 | Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity, target | `caption_metadata_routes.insta_of_pair_from_row_result` |
 | Text fallback | `caption` or `prompt` text | `caption_naturalizer._text_to_prose`, with sentence helpers delegated to `caption_text_policy.py` |
 
diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py
index 8dff169..b7174c6 100644
--- a/tools/prompt_smoke.py
+++ b/tools/prompt_smoke.py
@@ -4104,11 +4104,30 @@ def smoke_caption_text_policy() -> None:
         == caption_text_policy.with_trigger("A caption body", Trigger, True),
         "Caption trigger wrapper should delegate to caption_text_policy",
     )
+    axis_detail_row = {
+        "item_axis_values": {
+            "position": "standing oral position",
+            "contact_detail": "mouth contact at hip height",
+            "duplicate": "standing oral position",
+            "ignored": "random",
+        }
+    }
+    _expect(
+        caption_text_policy.item_axis_detail_text(axis_detail_row, "generic action")
+        == "standing oral position and mouth contact at hip height",
+        "Caption axis detail text should flatten selected item axes",
+    )
+    _expect(
+        caption_text_policy.item_axis_detail_text(axis_detail_row, "standing oral position already appears")
+        == "mouth contact at hip height",
+        "Caption axis detail text should skip details already present in item prose",
+    )
     deps = caption_naturalizer._caption_metadata_route_dependencies()
     _expect(deps.clean_text is caption_text_policy.clean_text, "Caption route deps lost clean text policy")
     _expect(deps.field_row_value is caption_text_policy.field_row_value, "Caption route deps lost field row-value policy")
     _expect(deps.expression_disabled is caption_text_policy.expression_disabled, "Caption route deps lost expression policy")
     _expect(deps.single_caption_front is caption_text_policy.single_caption_front, "Caption route deps lost front parser")
+    _expect(deps.item_axis_detail_text is caption_text_policy.item_axis_detail_text, "Caption route deps lost item-axis detail policy")
     _expect(deps.metadata_to_prose is caption_naturalizer._metadata_to_prose, "Caption route deps lost metadata recursion callback")
 
 
@@ -4185,6 +4204,29 @@ def smoke_caption_metadata_routes() -> None:
         caption_naturalizer._configured_cast_from_row,
         "metadata(configured_cast)",
     )
+    configured_axis_only = _fixture_hardcore_row(
+        item="generic configured adult action",
+        role_graph="",
+        source_role_graph="",
+        item_axis_values={
+            "position": "standing oral position",
+            "contact_detail": "mouth contact at hip height, hands on hips",
+        },
+        action_family="oral",
+        position_family="oral",
+        position_key="standing",
+        position_keys=["standing"],
+    )
+    axis_route = caption_metadata_routes.configured_cast_from_row_result(
+        caption_naturalizer._caption_metadata_route_request(configured_axis_only, "balanced", False),
+        caption_naturalizer._caption_metadata_route_dependencies(),
+    )
+    _expect(axis_route is not None, "Caption configured-cast axis-only row did not match")
+    assert axis_route is not None
+    _expect("Selected action details include" in axis_route.prose, "Caption route did not emit selected axis details")
+    _expect("standing oral position" in axis_route.prose, "Caption route lost item-axis position detail")
+    _expect("mouth contact at hip height" in axis_route.prose, "Caption route lost item-axis contact detail")
+    _expect("hands on hips" in axis_route.prose, "Caption route lost item-axis split detail")
 
     group = {
         "primary_subject": "group scene",