diff --git a/caption_naturalizer.py b/caption_naturalizer.py index fd11b1e..235a2cf 100644 --- a/caption_naturalizer.py +++ b/caption_naturalizer.py @@ -5,15 +5,15 @@ from typing import Any try: from . import caption_policy - from . import category_template_metadata as template_metadata_policy from . import formatter_input as input_policy from . import krea_cast as cast_policy + from . import route_metadata as route_metadata_policy from .prompt_hygiene import sanitize_prose_text except ImportError: # Allows local smoke tests with `python -c`. import caption_policy - import category_template_metadata as template_metadata_policy import formatter_input as input_policy import krea_cast as cast_policy + import route_metadata as route_metadata_policy from prompt_hygiene import sanitize_prose_text @@ -73,7 +73,7 @@ def _formatter_hint_parts(row: dict[str, Any]) -> list[str]: hints: list[str] = [] if not isinstance(row, dict): return hints - for hint in template_metadata_policy.formatter_hints_for_route(row, "caption"): + for hint in route_metadata_policy.row_formatter_hints(row, "caption"): hint = _clean_text(hint).strip(" .") if hint and hint not in hints: hints.append(hint) diff --git a/caption_policy.py b/caption_policy.py index 6ef65c3..b80d1c7 100644 --- a/caption_policy.py +++ b/caption_policy.py @@ -5,10 +5,10 @@ from typing import Any try: from . import formatter_input as input_policy - from .hardcore_action_metadata import normalize_hardcore_action_family + from . import route_metadata as route_metadata_policy except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`. import formatter_input as input_policy - from hardcore_action_metadata import normalize_hardcore_action_family + import route_metadata as route_metadata_policy OLD_TRIGGER = "sxcpinup_coloredpencil" @@ -122,10 +122,10 @@ def strip_style_tail(text: str) -> str: def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str: - position_family = input_policy.clean_text(row.get("position_family")).lower() + position_family = route_metadata_policy.row_position_family(row) if position_family in POSITION_FAMILY_CAPTION_LABELS: return POSITION_FAMILY_CAPTION_LABELS[position_family] - action_family = normalize_hardcore_action_family(row.get("action_family")) + action_family = route_metadata_policy.row_action_family(row) if action_family in ACTION_FAMILY_CAPTION_LABELS: return ACTION_FAMILY_CAPTION_LABELS[action_family] return default diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md index d004ef0..e067e21 100644 --- a/docs/prompt-architecture-improvement-plan.md +++ b/docs/prompt-architecture-improvement-plan.md @@ -188,6 +188,10 @@ Already isolated: rows now emit `action_family`, `position_family`, `position_key`, and `position_keys` so formatter routing and debugging do less keyword guessing. Krea, SDXL, and training-caption routes consume these fields when present. +- shared row route metadata readers live in `route_metadata.py`, covering + normalized action family, position family/keys, and route-specific formatter + hints for Krea, SDXL, and training-caption routes. Position keys are strict + by default, while SDXL can opt into legacy unknown key tags for compatibility. - final row and pair text normalization lives in `row_normalization.py`, covering trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation before metadata @@ -270,12 +274,14 @@ Already isolated: stripping, the shared prompt field-label inventory, prompt-field extraction, `Avoid:` splitting, and row-value fallback for Krea, SDXL, and caption routes. +- `route_metadata.py` owns shared row-level action-family, position-family, + position-key, and formatter-hint reads so formatter routes do not normalize + these fields independently. Improve later: -- extend SDXL and caption routes to optionally consume `action_family` / - `position_family` when ordering tags or caption clauses; -- add route-level smoke fixtures for representative metadata rows; +- keep adding route-level smoke fixtures when new metadata fields start + influencing formatter output; ### SDXL Formatter Path @@ -290,6 +296,7 @@ Keep here: - negative-prompt assembly. - metadata-family tag hints from `action_family`, `position_family`, and `position_keys`. +- shared row route metadata reads from `route_metadata.py`. - shared formatter input parsing from `formatter_input.py`. - style presets, quality presets, default negative prompt, and action/position family tag hints from `sdxl_presets.py`. @@ -313,6 +320,7 @@ Keep here: - style-tail policy from `caption_policy.py`. - metadata-family action labels from `action_family` and `position_family` via `caption_policy.py`. +- shared row route metadata reads from `route_metadata.py`. - shared formatter input parsing from `formatter_input.py`. - shared cast descriptor parsing and label replacement from `krea_cast.py`. - caption detail-level/style-policy normalization, clothing cleanup, and diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index 8884de5..dec13a1 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -92,6 +92,7 @@ Core helper ownership: | `hardcore_role_anal.py` | Anal and double-contact role graph wording for rear-entry, raised-edge, kneeling, side-lying, and front/back double-position geometry. | | `hardcore_role_climax.py` | Climax and ejaculation aftermath role graph wording for face/body/ass, lap, open-thigh, side-lying, and group front/back placement. | | `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. | +| `route_metadata.py` | Shared row-level route metadata readers for normalized action family, position family/keys, and formatter hints used by Krea2, SDXL, and caption routes. | | `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. | | `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. | | `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. | diff --git a/krea_formatter.py b/krea_formatter.py index db0a7e6..0c57293 100644 --- a/krea_formatter.py +++ b/krea_formatter.py @@ -5,7 +5,7 @@ from typing import Any try: from . import formatter_input as input_policy - from . import category_template_metadata as template_metadata_policy + from . import route_metadata as route_metadata_policy from .krea_action_context import ( is_close_foreplay_text as _is_close_foreplay_text, is_outercourse_text as _is_outercourse_text, @@ -36,7 +36,7 @@ try: from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text except ImportError: # Allows local smoke tests with `python -c`. import formatter_input as input_policy - import category_template_metadata as template_metadata_policy + import route_metadata as route_metadata_policy from krea_action_context import ( is_close_foreplay_text as _is_close_foreplay_text, is_outercourse_text as _is_outercourse_text, @@ -109,7 +109,7 @@ def _formatter_hint_parts(*rows: dict[str, Any]) -> list[str]: for row in rows: if not isinstance(row, dict): continue - for hint in template_metadata_policy.formatter_hints_for_route(row, "krea"): + for hint in route_metadata_policy.row_formatter_hints(row, "krea"): hint = _clean(hint).strip(" .") if hint and hint not in hints: hints.append(hint) @@ -465,7 +465,7 @@ def _normal_row_to_krea(row: dict[str, Any], detail_level: str, style_mode: str) source_composition, axis_values, detail_density, - row.get("action_family"), + route_metadata_policy.row_action_family(row), ) action = _pov_action_phrase(action, pov_labels, role_graph, item, source_composition, axis_values, detail_density) output_composition = _pov_composition_text(composition, pov_labels) @@ -597,7 +597,7 @@ def _insta_pair_to_krea(row: dict[str, Any], detail_level: str, style_mode: str) hard_source_composition, hard_axis_values, hard_detail_density, - hard.get("action_family") or row.get("action_family"), + route_metadata_policy.row_action_family(hard) or route_metadata_policy.row_action_family(row), ) hard_action = _pov_action_phrase( hard_action, diff --git a/route_metadata.py b/route_metadata.py new file mode 100644 index 0000000..787130d --- /dev/null +++ b/route_metadata.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +import re +from typing import Any + +try: + from . import category_template_metadata as template_metadata_policy + from .hardcore_action_metadata import normalize_hardcore_action_family + from .hardcore_position_config import normalize_hardcore_position_family, normalize_hardcore_position_values +except ImportError: # Allows local smoke tests from the repository root. + import category_template_metadata as template_metadata_policy + from hardcore_action_metadata import normalize_hardcore_action_family + from hardcore_position_config import normalize_hardcore_position_family, normalize_hardcore_position_values + + +def row_action_family(row: Any, default: str = "") -> str: + if not isinstance(row, dict): + return default + return normalize_hardcore_action_family(row.get("action_family"), default) + + +def row_position_family(row: Any, default: str = "") -> str: + if not isinstance(row, dict): + return default + return normalize_hardcore_position_family(str(row.get("position_family") or "").strip().lower(), default) + + +def _raw_position_key_values(row: dict[str, Any]) -> list[Any]: + values: list[Any] = [] + position_keys = row.get("position_keys") + if isinstance(position_keys, list): + values.extend(position_keys) + elif position_keys is not None: + values.append(position_keys) + if row.get("position_key") is not None: + values.append(row.get("position_key")) + return values + + +def _position_key_slug(value: Any) -> str: + text = str(value or "").strip() + if not text or text == "any": + return "" + return re.sub(r"[^a-z0-9]+", "_", text.lower()).strip("_") + + +def row_position_keys(row: Any, *, include_unknown: bool = False) -> list[str]: + if not isinstance(row, dict): + return [] + values = _raw_position_key_values(row) + selected = normalize_hardcore_position_values(values) + if not include_unknown: + return selected + for value in values: + normalized = _position_key_slug(value) + if normalized and normalized not in selected: + selected.append(normalized) + return selected + + +def row_formatter_hints(row: Any, route: str) -> list[str]: + return template_metadata_policy.formatter_hints_for_route(row, route) diff --git a/sdxl_formatter.py b/sdxl_formatter.py index 112228d..fe3bc76 100644 --- a/sdxl_formatter.py +++ b/sdxl_formatter.py @@ -5,15 +5,13 @@ from typing import Any try: from . import formatter_input as input_policy - from . import category_template_metadata as template_metadata_policy + from . import route_metadata as route_metadata_policy from . import sdxl_presets as sdxl_policy - from .hardcore_action_metadata import normalize_hardcore_action_family from .prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt except ImportError: # Allows local smoke tests with `python -c`. import formatter_input as input_policy - import category_template_metadata as template_metadata_policy + import route_metadata as route_metadata_policy import sdxl_presets as sdxl_policy - from hardcore_action_metadata import normalize_hardcore_action_family from prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt @@ -121,18 +119,13 @@ def _add_one(tags: list[str], seen: set[str], tag: str) -> None: def _metadata_family_tags(row: dict[str, Any]) -> list[str]: tags: list[str] = [] - action_family = normalize_hardcore_action_family(row.get("action_family")) + action_family = route_metadata_policy.row_action_family(row) tags.extend(SDXL_ACTION_FAMILY_TAGS.get(action_family, ())) - position_family = _clean(row.get("position_family")).lower() + position_family = route_metadata_policy.row_position_family(row) tags.extend(SDXL_POSITION_FAMILY_TAGS.get(position_family, ())) - position_keys = row.get("position_keys") - if isinstance(position_keys, list): - keys = position_keys - else: - keys = [row.get("position_key")] - for key in keys: + for key in route_metadata_policy.row_position_keys(row, include_unknown=True): key_text = _clean(key) if key_text: tags.append(key_text.replace("_", " ")) @@ -144,7 +137,7 @@ def _formatter_hint_tags(*rows: dict[str, Any]) -> list[str]: for row in rows: if not isinstance(row, dict): continue - for hint in template_metadata_policy.formatter_hints_for_route(row, "sdxl"): + for hint in route_metadata_policy.row_formatter_hints(row, "sdxl"): hint = _clean(hint).strip(" ,.") if hint and hint not in tags: tags.append(hint) diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index 53b73ef..2f99f99 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -40,6 +40,7 @@ import krea_formatter # noqa: E402 import location_config # noqa: E402 import prompt_builder as pb # noqa: E402 import row_normalization # noqa: E402 +import route_metadata # noqa: E402 import sdxl_formatter # noqa: E402 import sdxl_presets # noqa: E402 import seed_config # noqa: E402 @@ -1041,7 +1042,7 @@ def smoke_caption_policy() -> None: ) row = {"action_family": "oral", "position_family": ""} _expect(caption_policy.metadata_action_label(row) == "oral action", "Caption action-family label changed") - row = {"action_family": "oral", "position_family": "anal"} + row = {"action_family": "oral", "position_family": "Anal"} _expect(caption_naturalizer._metadata_action_label(row) == "anal action", "Caption position-family label priority changed") browsing_caption, browsing_method = caption_naturalizer.naturalize_caption( "woman, red dress, studio", @@ -1225,6 +1226,27 @@ def smoke_hardcore_position_config_policy() -> None: _expect(formatter_hints.get("krea") == ["keep mouth contact readable"], "Template metadata route lost Krea formatter hint") _expect(formatter_hints.get("sdxl") == ["oral contact", "kneeling oral"], "Template metadata route lost SDXL formatter hints") _expect(formatter_hints.get("caption") == ["oral contact caption detail"], "Template metadata route lost caption formatter hint") + route_row = { + "action_family": "penetrative", + "position_family": "Oral", + "position_keys": ["spread leg oral", "bad key"], + "position_key": "open thighs", + "formatter_hints": {"all": ["shared formatter cue"], "training_caption": ["caption formatter cue"]}, + } + _expect(route_metadata.row_action_family(route_row) == "penetration", "Route metadata action normalization changed") + _expect(route_metadata.row_position_family(route_row) == "oral", "Route metadata position-family normalization changed") + _expect( + route_metadata.row_position_keys(route_row) == ["spread_leg_oral", "open_thighs"], + "Route metadata position-key normalization changed", + ) + _expect( + route_metadata.row_position_keys({"position_keys": ["kneeling_oral"]}, include_unknown=True) == ["kneeling_oral"], + "Route metadata legacy position-key passthrough changed", + ) + _expect( + route_metadata.row_formatter_hints(route_row, "caption") == ["shared formatter cue", "caption formatter cue"], + "Route metadata formatter hint routing changed", + ) route_hints = category_template_metadata.formatter_hints_for_route( {"formatter_hints": {"all": ["shared formatter cue"], "krea2": ["krea formatter cue"]}}, "krea2",