Use item axis details in captions

This commit is contained in:
2026-06-27 18:07:21 +02:00
parent a94cb9f8f1
commit 8ae689f0e7
4 changed files with 95 additions and 2 deletions
+4
View File
@@ -51,6 +51,7 @@ class CaptionMetadataRouteDependencies:
subject_phrase_from_counts: Callable[[dict[str, Any]], str] subject_phrase_from_counts: Callable[[dict[str, Any]], str]
verb_for_row: Callable[[dict[str, Any]], str] verb_for_row: Callable[[dict[str, Any]], str]
metadata_action_label: Callable[[dict[str, Any]], str] metadata_action_label: Callable[[dict[str, Any]], str]
item_axis_detail_text: Callable[[dict[str, Any], str], str]
natural_cast_descriptor_text: Callable[[str], str] natural_cast_descriptor_text: Callable[[str], str]
cast_labels: Callable[[str], list[str]] cast_labels: Callable[[str], list[str]]
natural_label_text: Callable[[Any, list[str]], str] natural_label_text: Callable[[Any, list[str]], str]
@@ -227,6 +228,7 @@ def configured_cast_from_row_result(
cast = deps.row_value(row, "cast_summary", ("Cast",)) cast = deps.row_value(row, "cast_summary", ("Cast",))
role_graph = deps.row_value(row, "role_graph", ("Role graph",)) role_graph = deps.row_value(row, "role_graph", ("Role graph",))
item = deps.row_value(row, "item", deps.item_labels) item = deps.row_value(row, "item", deps.item_labels)
axis_detail = deps.item_axis_detail_text(row, " ".join(part for part in (role_graph, item) if part))
scene = deps.row_value(row, "scene_text", ("Setting", "Scene")) scene = deps.row_value(row, "scene_text", ("Setting", "Scene"))
expression = "" expression = ""
if not deps.expression_disabled(row): if not deps.expression_disabled(row):
@@ -250,6 +252,8 @@ def configured_cast_from_row_result(
parts.append(role_graph) parts.append(role_graph)
if item: if item:
parts.append(f"The {deps.metadata_action_label(row)} is {item}") parts.append(f"The {deps.metadata_action_label(row)} is {item}")
if axis_detail:
parts.append(f"Selected action details include {axis_detail}")
scene_bits = [] scene_bits = []
if scene: if scene:
scene_bits.append(f"set in {scene}") scene_bits.append(f"set in {scene}")
+47
View File
@@ -97,6 +97,52 @@ def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") ->
return caption_policy.metadata_action_label(row, default) return caption_policy.metadata_action_label(row, default)
def _axis_value_texts(value: Any) -> list[str]:
if isinstance(value, str):
text = clean_text(value).strip(" .")
return [text] if text and text.lower() not in ("any", "auto", "random", "none") else []
if isinstance(value, (int, float, bool)) or value is None:
return []
if isinstance(value, list):
texts: list[str] = []
for item in value:
texts.extend(_axis_value_texts(item))
return texts
if isinstance(value, dict):
for preferred in ("text", "prompt", "template", "value", "name"):
preferred_texts = _axis_value_texts(value.get(preferred))
if preferred_texts:
return preferred_texts
texts: list[str] = []
for item in value.values():
texts.extend(_axis_value_texts(item))
return texts
return []
def item_axis_detail_text(row: dict[str, Any], existing_text: str = "") -> str:
if not isinstance(row, dict):
return ""
axis_values = row.get("item_axis_values")
if not isinstance(axis_values, dict):
return ""
existing = clean_text(existing_text).lower()
details: list[str] = []
seen: set[str] = set()
skipped_keys = {"action_family", "position_family", "position_key", "position_keys"}
for key, value in axis_values.items():
if str(key) in skipped_keys:
continue
for text in _axis_value_texts(value):
normalized = clean_text(text).strip(" .")
lower = normalized.lower()
if not normalized or lower in seen or lower in existing:
continue
details.append(normalized)
seen.add(lower)
return human_join(details)
def prompt_cast_descriptors(text: str) -> str: def prompt_cast_descriptors(text: str) -> str:
return cast_policy.prompt_cast_descriptors(text) return cast_policy.prompt_cast_descriptors(text)
@@ -299,6 +345,7 @@ def metadata_route_dependencies(
subject_phrase_from_counts=subject_phrase_from_counts, subject_phrase_from_counts=subject_phrase_from_counts,
verb_for_row=verb_for_row, verb_for_row=verb_for_row,
metadata_action_label=metadata_action_label, metadata_action_label=metadata_action_label,
item_axis_detail_text=item_axis_detail_text,
natural_cast_descriptor_text=natural_cast_descriptor_text, natural_cast_descriptor_text=natural_cast_descriptor_text,
cast_labels=cast_labels, cast_labels=cast_labels,
natural_label_text=natural_label_text, natural_label_text=natural_label_text,
+2 -2
View File
@@ -143,7 +143,7 @@ Core helper ownership:
| `sdxl_tag_policy.py` | SDXL tag splitting, tag-key dedupe, count inference, character descriptor tags, item-axis tags, metadata-family/camera/explicit helper tags, and route dependency assembly used by `sdxl_formatter.py` and `sdxl_tag_routes.py`. | | `sdxl_tag_policy.py` | SDXL tag splitting, tag-key dedupe, count inference, character descriptor tags, item-axis tags, metadata-family/camera/explicit helper tags, and route dependency assembly used by `sdxl_formatter.py` and `sdxl_tag_routes.py`. |
| `caption_format_route.py` | Top-level caption dispatch, input-hint and target normalization, caption profile application, metadata-vs-text branching, trigger wrapping, final prose hygiene, and method/output shape. | | `caption_format_route.py` | Top-level caption dispatch, input-hint and target normalization, caption profile application, metadata-vs-text branching, trigger wrapping, final prose hygiene, and method/output shape. |
| `caption_policy.py` | Caption naturalizer policy data and helpers: caption profiles, style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. | | `caption_policy.py` | Caption naturalizer policy data and helpers: caption profiles, style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. |
| `caption_text_policy.py` | Caption sentence helpers, trigger wrapping, formatter-hint append, row-value fallback wrappers, cast text wrappers, single-caption front parsing, and metadata-route dependency assembly used by `caption_naturalizer.py` and `caption_metadata_routes.py`. | | `caption_text_policy.py` | Caption sentence helpers, trigger wrapping, formatter-hint append, item-axis detail prose, row-value fallback wrappers, cast text wrappers, single-caption front parsing, and metadata-route dependency assembly used by `caption_naturalizer.py` and `caption_metadata_routes.py`. |
## Node IO Map ## Node IO Map
@@ -838,7 +838,7 @@ Naturalizer field consumption:
| Branch | Reads most from | Key functions | | Branch | Reads most from | Key functions |
| --- | --- | --- | | --- | --- | --- |
| Normal single/couple/group | subject fields, age/body, item, scene, expression, composition, camera scene | `caption_metadata_routes.single_from_row_result`, `caption_metadata_routes.couple_from_row_result`, `caption_metadata_routes.group_or_layout_from_row_result` | | Normal single/couple/group | subject fields, age/body, item, scene, expression, composition, camera scene | `caption_metadata_routes.single_from_row_result`, `caption_metadata_routes.couple_from_row_result`, `caption_metadata_routes.group_or_layout_from_row_result` |
| Configured cast/hardcore | `cast_descriptor_text`, `action_family`, `position_family`, `role_graph`, `item`, `scene_text`, expression, composition | `caption_metadata_routes.configured_cast_from_row_result`, `caption_text_policy.metadata_action_label` | | Configured cast/hardcore | `cast_descriptor_text`, `action_family`, `position_family`, `role_graph`, `item`, `item_axis_values`, `scene_text`, expression, composition | `caption_metadata_routes.configured_cast_from_row_result`, `caption_text_policy.metadata_action_label`, `caption_text_policy.item_axis_detail_text` |
| Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity, target | `caption_metadata_routes.insta_of_pair_from_row_result` | | Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity, target | `caption_metadata_routes.insta_of_pair_from_row_result` |
| Text fallback | `caption` or `prompt` text | `caption_naturalizer._text_to_prose`, with sentence helpers delegated to `caption_text_policy.py` | | Text fallback | `caption` or `prompt` text | `caption_naturalizer._text_to_prose`, with sentence helpers delegated to `caption_text_policy.py` |
+42
View File
@@ -4104,11 +4104,30 @@ def smoke_caption_text_policy() -> None:
== caption_text_policy.with_trigger("A caption body", Trigger, True), == caption_text_policy.with_trigger("A caption body", Trigger, True),
"Caption trigger wrapper should delegate to caption_text_policy", "Caption trigger wrapper should delegate to caption_text_policy",
) )
axis_detail_row = {
"item_axis_values": {
"position": "standing oral position",
"contact_detail": "mouth contact at hip height",
"duplicate": "standing oral position",
"ignored": "random",
}
}
_expect(
caption_text_policy.item_axis_detail_text(axis_detail_row, "generic action")
== "standing oral position and mouth contact at hip height",
"Caption axis detail text should flatten selected item axes",
)
_expect(
caption_text_policy.item_axis_detail_text(axis_detail_row, "standing oral position already appears")
== "mouth contact at hip height",
"Caption axis detail text should skip details already present in item prose",
)
deps = caption_naturalizer._caption_metadata_route_dependencies() deps = caption_naturalizer._caption_metadata_route_dependencies()
_expect(deps.clean_text is caption_text_policy.clean_text, "Caption route deps lost clean text policy") _expect(deps.clean_text is caption_text_policy.clean_text, "Caption route deps lost clean text policy")
_expect(deps.field_row_value is caption_text_policy.field_row_value, "Caption route deps lost field row-value policy") _expect(deps.field_row_value is caption_text_policy.field_row_value, "Caption route deps lost field row-value policy")
_expect(deps.expression_disabled is caption_text_policy.expression_disabled, "Caption route deps lost expression policy") _expect(deps.expression_disabled is caption_text_policy.expression_disabled, "Caption route deps lost expression policy")
_expect(deps.single_caption_front is caption_text_policy.single_caption_front, "Caption route deps lost front parser") _expect(deps.single_caption_front is caption_text_policy.single_caption_front, "Caption route deps lost front parser")
_expect(deps.item_axis_detail_text is caption_text_policy.item_axis_detail_text, "Caption route deps lost item-axis detail policy")
_expect(deps.metadata_to_prose is caption_naturalizer._metadata_to_prose, "Caption route deps lost metadata recursion callback") _expect(deps.metadata_to_prose is caption_naturalizer._metadata_to_prose, "Caption route deps lost metadata recursion callback")
@@ -4185,6 +4204,29 @@ def smoke_caption_metadata_routes() -> None:
caption_naturalizer._configured_cast_from_row, caption_naturalizer._configured_cast_from_row,
"metadata(configured_cast)", "metadata(configured_cast)",
) )
configured_axis_only = _fixture_hardcore_row(
item="generic configured adult action",
role_graph="",
source_role_graph="",
item_axis_values={
"position": "standing oral position",
"contact_detail": "mouth contact at hip height, hands on hips",
},
action_family="oral",
position_family="oral",
position_key="standing",
position_keys=["standing"],
)
axis_route = caption_metadata_routes.configured_cast_from_row_result(
caption_naturalizer._caption_metadata_route_request(configured_axis_only, "balanced", False),
caption_naturalizer._caption_metadata_route_dependencies(),
)
_expect(axis_route is not None, "Caption configured-cast axis-only row did not match")
assert axis_route is not None
_expect("Selected action details include" in axis_route.prose, "Caption route did not emit selected axis details")
_expect("standing oral position" in axis_route.prose, "Caption route lost item-axis position detail")
_expect("mouth contact at hip height" in axis_route.prose, "Caption route lost item-axis contact detail")
_expect("hands on hips" in axis_route.prose, "Caption route lost item-axis split detail")
group = { group = {
"primary_subject": "group scene", "primary_subject": "group scene",