From 2f7c359fab3b52adab420c8986f743187c5711f8 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Fri, 26 Jun 2026 16:43:31 +0200 Subject: [PATCH] Use hardcore family metadata in SDXL and captions --- caption_naturalizer.py | 36 +++++++++++++- docs/prompt-architecture-improvement-plan.md | 12 +++-- docs/prompt-pool-routing-map.md | 8 ++-- sdxl_formatter.py | 50 ++++++++++++++++++++ tools/prompt_smoke.py | 18 ++++--- 5 files changed, 108 insertions(+), 16 deletions(-) diff --git a/caption_naturalizer.py b/caption_naturalizer.py index bb00021..50bc163 100644 --- a/caption_naturalizer.py +++ b/caption_naturalizer.py @@ -5,8 +5,10 @@ import re from typing import Any try: + from .hardcore_action_metadata import normalize_hardcore_action_family from .prompt_hygiene import sanitize_prose_text except ImportError: # Allows local smoke tests with `python -c`. + from hardcore_action_metadata import normalize_hardcore_action_family from prompt_hygiene import sanitize_prose_text @@ -45,6 +47,28 @@ ITEM_LABELS = ( "Clothing", ) +ACTION_FAMILY_CAPTION_LABELS = { + "foreplay": "foreplay action", + "outercourse": "non-penetrative action", + "oral": "oral action", + "penetration": "penetrative action", + "toy_double": "toy-assisted double-contact action", + "climax": "climax action", +} + +POSITION_FAMILY_CAPTION_LABELS = { + "penetrative": "penetrative action", + "foreplay": "foreplay action", + "interaction": "interaction beat", + "manual": "manual action", + "oral": "oral action", + "outercourse": "non-penetrative action", + "anal": "anal action", + "climax": "climax action", + "threesome": "three-person action", + "group": "group action", +} + def _clean_text(value: Any) -> str: text = "" if value is None else str(value) @@ -100,6 +124,16 @@ def _human_join(parts: list[str]) -> str: return f"{', '.join(parts[:-1])}, and {parts[-1]}" +def _metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str: + position_family = _clean_text(row.get("position_family")).lower() + if position_family in POSITION_FAMILY_CAPTION_LABELS: + return POSITION_FAMILY_CAPTION_LABELS[position_family] + action_family = normalize_hardcore_action_family(row.get("action_family")) + if action_family in ACTION_FAMILY_CAPTION_LABELS: + return ACTION_FAMILY_CAPTION_LABELS[action_family] + return default + + def _prompt_cast_descriptors(text: str) -> str: return _clean_text(text).replace("Woman A / primary creator:", "Woman A:") @@ -508,7 +542,7 @@ def _configured_cast_from_row(row: dict[str, Any], detail_level: str, keep_style if role_graph: parts.append(role_graph) if item: - parts.append(f"The sexual pose is {item}") + parts.append(f"The {_metadata_action_label(row)} is {item}") scene_bits = [] if scene: scene_bits.append(f"set in {scene}") diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md index 1d12b05..9b8a5f5 100644 --- a/docs/prompt-architecture-improvement-plan.md +++ b/docs/prompt-architecture-improvement-plan.md @@ -110,6 +110,7 @@ Already isolated: - shared hardcore action metadata lives in `hardcore_action_metadata.py`; custom rows now emit `action_family`, `position_family`, `position_key`, and `position_keys` so formatter routing and debugging do less keyword guessing. + Krea, SDXL, and training-caption routes consume these fields when present. ### Pair / Adapter Layer @@ -186,6 +187,8 @@ Keep here: - tag ordering; - weighted explicit tags; - negative-prompt assembly. +- metadata-family tag hints from `action_family`, `position_family`, and + `position_keys`. Improve later: @@ -203,6 +206,7 @@ Keep here: - natural sentence caption assembly; - training-caption trigger behavior; - style-tail policy. +- metadata-family action labels from `action_family` and `position_family`. Improve later: @@ -351,9 +355,9 @@ Medium-term: ## Recommended Next Passes -1. Extend SDXL and caption routes to optionally consume `action_family` / - `position_family` when ordering tags or caption clauses. -2. Split `__init__.py` node classes by family after behavior is covered by smoke +1. Split `__init__.py` node classes by family after behavior is covered by smoke checks. -3. Add route-level smoke fixtures for representative Krea/SDXL/caption metadata +2. Extract hardcore role graph generation from `prompt_builder.py` into a + dedicated `hardcore_role_graphs.py` module. +3. Add route-level smoke fixtures for more representative Krea/SDXL/caption metadata rows. diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index fc93cd8..a13183a 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -397,7 +397,7 @@ plain prompt text. When debugging, inspect these fields before editing pools. | `content_seed_axis` | `_build_custom_row` | Debug | Shows whether the item/action was driven by `content` or `pose`. Critical for hardcore pose categories. | | `item` | `_compose_item` or Insta override | Krea/SDXL/Naturalizer | Clothing item, category item, or sexual scene/action text. | | `item_axis_values` | `_compose_item` | Krea hardcore rewrite, SDXL tags | Filled template axes such as position/action/detail values. | -| `action_family` | `hardcore_action_metadata.source_hardcore_action_family` | Krea hardcore rewrite, debug | Source-aware formatter semantic family such as `foreplay`, `outercourse`, `oral`, `penetration`, `toy_double`, or `climax`. | +| `action_family` | `hardcore_action_metadata.source_hardcore_action_family` | Krea hardcore rewrite, SDXL tags, natural captions, debug | Source-aware formatter semantic family such as `foreplay`, `outercourse`, `oral`, `penetration`, `toy_double`, or `climax`. | | `position_family` | `_hardcore_source_position_family` | Debug/filtering | Source/UI hardcore family selected by subcategory, such as `manual`, `interaction`, `oral`, `anal`, or `climax`. | | `position_key`, `position_keys` | `_hardcore_position_keys` | Debug/future filters | Concrete position tokens inferred from axes and role text, such as `kneeling`, `doggy`, `boobjob`, or `open_thighs`. | | `custom_item`, `item_label` | Category/pair route | Formatters and debug | Label/name for item route. | @@ -608,9 +608,9 @@ SDXL field consumption: | Branch | Reads most from | Key functions | | --- | --- | --- | -| Normal metadata | cast descriptors, age/body/skin/hair/eyes, item, role graph, scene, camera config/directive | `_row_core_tags`, `_appearance_tags`, `_camera_tags` | +| Normal metadata | cast descriptors, age/body/skin/hair/eyes, `action_family`, `position_family`, `position_keys`, item, role graph, scene, camera config/directive | `_row_core_tags`, `_metadata_family_tags`, `_camera_tags` | | Pair softcore | `softcore_row`, pair partner styling, root soft camera config | `_soft_tags` | -| Pair hardcore | `hardcore_row`, `hardcore_clothing_state`, hard camera fields, hard prompt text | `_hard_tags` | +| Pair hardcore | `hardcore_row`, `action_family`, `position_family`, `position_keys`, `hardcore_clothing_state`, hard camera fields, hard prompt text | `_hard_tags`, `_metadata_family_tags` | | Text fallback | `source_text`, preserve-trigger setting | `_fallback_text_to_sdxl` | SDXL is the right place for model trigger handling, tag ordering, weight syntax, @@ -630,7 +630,7 @@ Naturalizer field consumption: | Branch | Reads most from | Key functions | | --- | --- | --- | | Normal single/couple/group | subject fields, age/body, item, scene, expression, composition, camera scene | `_single_from_row`, `_couple_from_row`, `_group_or_layout_from_row` | -| Configured cast/hardcore | `cast_descriptor_text`, `role_graph`, `item`, `scene_text`, expression, composition | `_configured_cast_from_row` | +| Configured cast/hardcore | `cast_descriptor_text`, `action_family`, `position_family`, `role_graph`, `item`, `scene_text`, expression, composition | `_configured_cast_from_row`, `_metadata_action_label` | | Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity | `_insta_pair_from_row` | | Text fallback | `caption` or `prompt` text | `_text_to_prose` | diff --git a/sdxl_formatter.py b/sdxl_formatter.py index 598c6fb..e2c8476 100644 --- a/sdxl_formatter.py +++ b/sdxl_formatter.py @@ -5,8 +5,10 @@ import re from typing import Any try: + from .hardcore_action_metadata import normalize_hardcore_action_family from .prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt except ImportError: # Allows local smoke tests with `python -c`. + from hardcore_action_metadata import normalize_hardcore_action_family from prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt @@ -39,6 +41,28 @@ SDXL_DEFAULT_NEGATIVE = ( "watermark, signature, text, logo, blurry, jpeg artifacts, censored, mosaic censor" ) +SDXL_ACTION_FAMILY_TAGS = { + "foreplay": ("foreplay", "body contact"), + "outercourse": ("outercourse", "non-penetrative sex"), + "oral": ("oral sex",), + "penetration": ("penetrative sex", "penetration"), + "toy_double": ("double penetration", "toy-assisted sex"), + "climax": ("climax", "semen"), +} + +SDXL_POSITION_FAMILY_TAGS = { + "penetrative": ("penetrative sex",), + "foreplay": ("foreplay",), + "interaction": ("interaction",), + "manual": ("manual stimulation",), + "oral": ("oral sex",), + "outercourse": ("outercourse",), + "anal": ("anal sex",), + "climax": ("climax",), + "threesome": ("threesome",), + "group": ("group sex",), +} + PROMPT_FIELD_LABELS = ( "Ages", "Body types", @@ -183,6 +207,26 @@ def _add_one(tags: list[str], seen: set[str], tag: str) -> None: seen.add(key) +def _metadata_family_tags(row: dict[str, Any]) -> list[str]: + tags: list[str] = [] + action_family = normalize_hardcore_action_family(row.get("action_family")) + tags.extend(SDXL_ACTION_FAMILY_TAGS.get(action_family, ())) + + position_family = _clean(row.get("position_family")).lower() + tags.extend(SDXL_POSITION_FAMILY_TAGS.get(position_family, ())) + + position_keys = row.get("position_keys") + if isinstance(position_keys, list): + keys = position_keys + else: + keys = [row.get("position_key")] + for key in keys: + key_text = _clean(key) + if key_text: + tags.append(key_text.replace("_", " ")) + return tags + + def _combine_tags(*parts: Any) -> str: tags: list[str] = [] seen: set[str] = set() @@ -332,6 +376,9 @@ def _row_core_tags(row: dict[str, Any], nude_weight: float) -> list[str]: for tag in _normal_character_tags(row): _add_one(tags, seen, tag) + for tag in _metadata_family_tags(row): + _add_one(tags, seen, tag) + item = _row_value(row, "item", ("Sexual scene", "Sexual pose", "Erotic outfit", "Clothing")) or _clean(row.get("custom_item")) pose = _row_value(row, "pose", ("Sexual pose", "Pose")) role_graph = _clean(row.get("source_role_graph") or row.get("role_graph")) @@ -404,6 +451,9 @@ def _hard_tags(row: dict[str, Any], root: dict[str, Any], nude_weight: float) -> for tag in _normal_character_tags(row): _add_one(tags, seen, tag) + for tag in _metadata_family_tags(row): + _add_one(tags, seen, tag) + hard_scene = _clean(row.get("scene_text")) hard_item = _clean(row.get("item")) hard_role = _clean(row.get("source_role_graph") or row.get("role_graph")) diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index f0799a3..dd781e1 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -364,14 +364,14 @@ def smoke_config_route_location_theme() -> None: def smoke_hardcore_category_routes() -> None: cast = _character_cast() cases = [ - ("hardcore_penetration", "Penetrative sex", "penetration_only", "penetrative", {"penetration", "default"}), - ("hardcore_oral", "Oral sex", "oral_only", "oral", {"oral"}), - ("hardcore_manual", "Manual stimulation", "manual_only", "manual", {"foreplay", "outercourse"}), - ("hardcore_outercourse", "Outercourse and genital teasing", "outercourse_only", "outercourse", {"outercourse"}), - ("hardcore_foreplay", "Foreplay and teasing", "foreplay_only", "foreplay", {"foreplay"}), - ("hardcore_aftercare", "Aftercare and cleanup", "interaction_only", "interaction", {"foreplay"}), + ("hardcore_penetration", "Penetrative sex", "penetration_only", "penetrative", {"penetration", "default"}, "penetrative sex", "penetrative action"), + ("hardcore_oral", "Oral sex", "oral_only", "oral", {"oral"}, "oral sex", "oral action"), + ("hardcore_manual", "Manual stimulation", "manual_only", "manual", {"foreplay", "outercourse"}, "manual stimulation", "manual action"), + ("hardcore_outercourse", "Outercourse and genital teasing", "outercourse_only", "outercourse", {"outercourse"}, "outercourse", "non-penetrative action"), + ("hardcore_foreplay", "Foreplay and teasing", "foreplay_only", "foreplay", {"foreplay"}, "foreplay", "foreplay action"), + ("hardcore_aftercare", "Aftercare and cleanup", "interaction_only", "interaction", {"foreplay"}, "interaction", "interaction beat"), ] - for index, (name, subcategory, focus, position_family, action_families) in enumerate(cases, start=1101): + for index, (name, subcategory, focus, position_family, action_families, sdxl_tag, caption_label) in enumerate(cases, start=1101): row = _prompt_row( name=name, category="Hardcore sexual poses", @@ -388,6 +388,10 @@ def smoke_hardcore_category_routes() -> None: _expect(row.get("action_family") in action_families, f"{name} action_family mismatch: {row.get('action_family')}") _expect(isinstance(row.get("position_keys"), list), f"{name} position_keys missing") _expect_formatter_outputs(row, name, target="single") + sdxl = sdxl_formatter.format_sdxl_prompt("", metadata_json=_json(row), target="single", trigger=SdxlTrigger, prepend_trigger=True) + _expect(sdxl_tag in (sdxl.get("sdxl_prompt") or "").lower(), f"{name} SDXL prompt did not include family tag {sdxl_tag!r}") + caption, _method = caption_naturalizer.naturalize_caption("", metadata_json=_json(row), trigger=Trigger, include_trigger=True) + _expect(caption_label in caption.lower(), f"{name} caption did not include family label {caption_label!r}") def smoke_krea_close_foreplay_route() -> None: