Use item axis details in captions

2026-06-27 18:07:21 +02:00
parent a94cb9f8f1
commit 8ae689f0e7
4 changed files with 95 additions and 2 deletions
@@ -51,6 +51,7 @@ class CaptionMetadataRouteDependencies:
    subject_phrase_from_counts: Callable[[dict[str, Any]], str]
    verb_for_row: Callable[[dict[str, Any]], str]
    metadata_action_label: Callable[[dict[str, Any]], str]
    item_axis_detail_text: Callable[[dict[str, Any], str], str]
    natural_cast_descriptor_text: Callable[[str], str]
    cast_labels: Callable[[str], list[str]]
    natural_label_text: Callable[[Any, list[str]], str]
@@ -227,6 +228,7 @@ def configured_cast_from_row_result(
    cast = deps.row_value(row, "cast_summary", ("Cast",))
    role_graph = deps.row_value(row, "role_graph", ("Role graph",))
    item = deps.row_value(row, "item", deps.item_labels)
    axis_detail = deps.item_axis_detail_text(row, " ".join(part for part in (role_graph, item) if part))
    scene = deps.row_value(row, "scene_text", ("Setting", "Scene"))
    expression = ""
    if not deps.expression_disabled(row):
@@ -250,6 +252,8 @@ def configured_cast_from_row_result(
        parts.append(role_graph)
    if item:
        parts.append(f"The {deps.metadata_action_label(row)} is {item}")
    if axis_detail:
        parts.append(f"Selected action details include {axis_detail}")
    scene_bits = []
    if scene:
        scene_bits.append(f"set in {scene}")
@@ -97,6 +97,52 @@ def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") ->
    return caption_policy.metadata_action_label(row, default)
 def _axis_value_texts(value: Any) -> list[str]:
    if isinstance(value, str):
        text = clean_text(value).strip(" .")
        return [text] if text and text.lower() not in ("any", "auto", "random", "none") else []
    if isinstance(value, (int, float, bool)) or value is None:
        return []
    if isinstance(value, list):
        texts: list[str] = []
        for item in value:
            texts.extend(_axis_value_texts(item))
        return texts
    if isinstance(value, dict):
        for preferred in ("text", "prompt", "template", "value", "name"):
            preferred_texts = _axis_value_texts(value.get(preferred))
            if preferred_texts:
                return preferred_texts
        texts: list[str] = []
        for item in value.values():
            texts.extend(_axis_value_texts(item))
        return texts
    return []
 def item_axis_detail_text(row: dict[str, Any], existing_text: str = "") -> str:
    if not isinstance(row, dict):
        return ""
    axis_values = row.get("item_axis_values")
    if not isinstance(axis_values, dict):
        return ""
    existing = clean_text(existing_text).lower()
    details: list[str] = []
    seen: set[str] = set()
    skipped_keys = {"action_family", "position_family", "position_key", "position_keys"}
    for key, value in axis_values.items():
        if str(key) in skipped_keys:
            continue
        for text in _axis_value_texts(value):
            normalized = clean_text(text).strip(" .")
            lower = normalized.lower()
            if not normalized or lower in seen or lower in existing:
                continue
            details.append(normalized)
            seen.add(lower)
    return human_join(details)
 def prompt_cast_descriptors(text: str) -> str:
    return cast_policy.prompt_cast_descriptors(text)
@@ -299,6 +345,7 @@ def metadata_route_dependencies(
        subject_phrase_from_counts=subject_phrase_from_counts,
        verb_for_row=verb_for_row,
        metadata_action_label=metadata_action_label,
        item_axis_detail_text=item_axis_detail_text,
        natural_cast_descriptor_text=natural_cast_descriptor_text,
        cast_labels=cast_labels,
        natural_label_text=natural_label_text,
@@ -143,7 +143,7 @@ Core helper ownership:
 | `sdxl_tag_policy.py` | SDXL tag splitting, tag-key dedupe, count inference, character descriptor tags, item-axis tags, metadata-family/camera/explicit helper tags, and route dependency assembly used by `sdxl_formatter.py` and `sdxl_tag_routes.py`. |
 | `caption_format_route.py` | Top-level caption dispatch, input-hint and target normalization, caption profile application, metadata-vs-text branching, trigger wrapping, final prose hygiene, and method/output shape. |
 | `caption_policy.py` | Caption naturalizer policy data and helpers: caption profiles, style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. |
-| `caption_text_policy.py` | Caption sentence helpers, trigger wrapping, formatter-hint append, row-value fallback wrappers, cast text wrappers, single-caption front parsing, and metadata-route dependency assembly used by `caption_naturalizer.py` and `caption_metadata_routes.py`. |
+| `caption_text_policy.py` | Caption sentence helpers, trigger wrapping, formatter-hint append, item-axis detail prose, row-value fallback wrappers, cast text wrappers, single-caption front parsing, and metadata-route dependency assembly used by `caption_naturalizer.py` and `caption_metadata_routes.py`. |
 ## Node IO Map
@@ -838,7 +838,7 @@ Naturalizer field consumption:
 | Branch | Reads most from | Key functions |
 | --- | --- | --- |
 | Normal single/couple/group | subject fields, age/body, item, scene, expression, composition, camera scene | `caption_metadata_routes.single_from_row_result`, `caption_metadata_routes.couple_from_row_result`, `caption_metadata_routes.group_or_layout_from_row_result` |
-| Configured cast/hardcore | `cast_descriptor_text`, `action_family`, `position_family`, `role_graph`, `item`, `scene_text`, expression, composition | `caption_metadata_routes.configured_cast_from_row_result`, `caption_text_policy.metadata_action_label` |
+| Configured cast/hardcore | `cast_descriptor_text`, `action_family`, `position_family`, `role_graph`, `item`, `item_axis_values`, `scene_text`, expression, composition | `caption_metadata_routes.configured_cast_from_row_result`, `caption_text_policy.metadata_action_label`, `caption_text_policy.item_axis_detail_text` |
 | Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity, target | `caption_metadata_routes.insta_of_pair_from_row_result` |
 | Text fallback | `caption` or `prompt` text | `caption_naturalizer._text_to_prose`, with sentence helpers delegated to `caption_text_policy.py` |
@@ -4104,11 +4104,30 @@ def smoke_caption_text_policy() -> None:
        == caption_text_policy.with_trigger("A caption body", Trigger, True),
        "Caption trigger wrapper should delegate to caption_text_policy",
    )
    axis_detail_row = {
        "item_axis_values": {
            "position": "standing oral position",
            "contact_detail": "mouth contact at hip height",
            "duplicate": "standing oral position",
            "ignored": "random",
        }
    }
    _expect(
        caption_text_policy.item_axis_detail_text(axis_detail_row, "generic action")
        == "standing oral position and mouth contact at hip height",
        "Caption axis detail text should flatten selected item axes",
    )
    _expect(
        caption_text_policy.item_axis_detail_text(axis_detail_row, "standing oral position already appears")
        == "mouth contact at hip height",
        "Caption axis detail text should skip details already present in item prose",
    )
    deps = caption_naturalizer._caption_metadata_route_dependencies()
    _expect(deps.clean_text is caption_text_policy.clean_text, "Caption route deps lost clean text policy")
    _expect(deps.field_row_value is caption_text_policy.field_row_value, "Caption route deps lost field row-value policy")
    _expect(deps.expression_disabled is caption_text_policy.expression_disabled, "Caption route deps lost expression policy")
    _expect(deps.single_caption_front is caption_text_policy.single_caption_front, "Caption route deps lost front parser")
    _expect(deps.item_axis_detail_text is caption_text_policy.item_axis_detail_text, "Caption route deps lost item-axis detail policy")
    _expect(deps.metadata_to_prose is caption_naturalizer._metadata_to_prose, "Caption route deps lost metadata recursion callback")
@@ -4185,6 +4204,29 @@ def smoke_caption_metadata_routes() -> None:
        caption_naturalizer._configured_cast_from_row,
        "metadata(configured_cast)",
    )
    configured_axis_only = _fixture_hardcore_row(
        item="generic configured adult action",
        role_graph="",
        source_role_graph="",
        item_axis_values={
            "position": "standing oral position",
            "contact_detail": "mouth contact at hip height, hands on hips",
        },
        action_family="oral",
        position_family="oral",
        position_key="standing",
        position_keys=["standing"],
    )
    axis_route = caption_metadata_routes.configured_cast_from_row_result(
        caption_naturalizer._caption_metadata_route_request(configured_axis_only, "balanced", False),
        caption_naturalizer._caption_metadata_route_dependencies(),
    )
    _expect(axis_route is not None, "Caption configured-cast axis-only row did not match")
    assert axis_route is not None
    _expect("Selected action details include" in axis_route.prose, "Caption route did not emit selected axis details")
    _expect("standing oral position" in axis_route.prose, "Caption route lost item-axis position detail")
    _expect("mouth contact at hip height" in axis_route.prose, "Caption route lost item-axis contact detail")
    _expect("hands on hips" in axis_route.prose, "Caption route lost item-axis split detail")
    group = {
        "primary_subject": "group scene",