Centralize formatter route metadata

This commit is contained in:
2026-06-27 02:24:30 +02:00
parent 7d112c0f98
commit c0c2fb2b40
8 changed files with 115 additions and 29 deletions
+3 -3
View File
@@ -5,15 +5,15 @@ from typing import Any
try: try:
from . import caption_policy from . import caption_policy
from . import category_template_metadata as template_metadata_policy
from . import formatter_input as input_policy from . import formatter_input as input_policy
from . import krea_cast as cast_policy from . import krea_cast as cast_policy
from . import route_metadata as route_metadata_policy
from .prompt_hygiene import sanitize_prose_text from .prompt_hygiene import sanitize_prose_text
except ImportError: # Allows local smoke tests with `python -c`. except ImportError: # Allows local smoke tests with `python -c`.
import caption_policy import caption_policy
import category_template_metadata as template_metadata_policy
import formatter_input as input_policy import formatter_input as input_policy
import krea_cast as cast_policy import krea_cast as cast_policy
import route_metadata as route_metadata_policy
from prompt_hygiene import sanitize_prose_text from prompt_hygiene import sanitize_prose_text
@@ -73,7 +73,7 @@ def _formatter_hint_parts(row: dict[str, Any]) -> list[str]:
hints: list[str] = [] hints: list[str] = []
if not isinstance(row, dict): if not isinstance(row, dict):
return hints return hints
for hint in template_metadata_policy.formatter_hints_for_route(row, "caption"): for hint in route_metadata_policy.row_formatter_hints(row, "caption"):
hint = _clean_text(hint).strip(" .") hint = _clean_text(hint).strip(" .")
if hint and hint not in hints: if hint and hint not in hints:
hints.append(hint) hints.append(hint)
+4 -4
View File
@@ -5,10 +5,10 @@ from typing import Any
try: try:
from . import formatter_input as input_policy from . import formatter_input as input_policy
from .hardcore_action_metadata import normalize_hardcore_action_family from . import route_metadata as route_metadata_policy
except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`. except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`.
import formatter_input as input_policy import formatter_input as input_policy
from hardcore_action_metadata import normalize_hardcore_action_family import route_metadata as route_metadata_policy
OLD_TRIGGER = "sxcpinup_coloredpencil" OLD_TRIGGER = "sxcpinup_coloredpencil"
@@ -122,10 +122,10 @@ def strip_style_tail(text: str) -> str:
def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str: def metadata_action_label(row: dict[str, Any], default: str = "sexual pose") -> str:
position_family = input_policy.clean_text(row.get("position_family")).lower() position_family = route_metadata_policy.row_position_family(row)
if position_family in POSITION_FAMILY_CAPTION_LABELS: if position_family in POSITION_FAMILY_CAPTION_LABELS:
return POSITION_FAMILY_CAPTION_LABELS[position_family] return POSITION_FAMILY_CAPTION_LABELS[position_family]
action_family = normalize_hardcore_action_family(row.get("action_family")) action_family = route_metadata_policy.row_action_family(row)
if action_family in ACTION_FAMILY_CAPTION_LABELS: if action_family in ACTION_FAMILY_CAPTION_LABELS:
return ACTION_FAMILY_CAPTION_LABELS[action_family] return ACTION_FAMILY_CAPTION_LABELS[action_family]
return default return default
+11 -3
View File
@@ -188,6 +188,10 @@ Already isolated:
rows now emit `action_family`, `position_family`, `position_key`, and rows now emit `action_family`, `position_family`, `position_key`, and
`position_keys` so formatter routing and debugging do less keyword guessing. `position_keys` so formatter routing and debugging do less keyword guessing.
Krea, SDXL, and training-caption routes consume these fields when present. Krea, SDXL, and training-caption routes consume these fields when present.
- shared row route metadata readers live in `route_metadata.py`, covering
normalized action family, position family/keys, and route-specific formatter
hints for Krea, SDXL, and training-caption routes. Position keys are strict
by default, while SDXL can opt into legacy unknown key tags for compatibility.
- final row and pair text normalization lives in `row_normalization.py`, - final row and pair text normalization lives in `row_normalization.py`,
covering trigger prepending, extra-positive append, negative merge/dedupe, covering trigger prepending, extra-positive append, negative merge/dedupe,
caption-part joining, and embedded soft/hard row sanitation before metadata caption-part joining, and embedded soft/hard row sanitation before metadata
@@ -270,12 +274,14 @@ Already isolated:
stripping, the shared prompt field-label inventory, prompt-field extraction, stripping, the shared prompt field-label inventory, prompt-field extraction,
`Avoid:` splitting, and row-value fallback for Krea, SDXL, and caption `Avoid:` splitting, and row-value fallback for Krea, SDXL, and caption
routes. routes.
- `route_metadata.py` owns shared row-level action-family, position-family,
position-key, and formatter-hint reads so formatter routes do not normalize
these fields independently.
Improve later: Improve later:
- extend SDXL and caption routes to optionally consume `action_family` / - keep adding route-level smoke fixtures when new metadata fields start
`position_family` when ordering tags or caption clauses; influencing formatter output;
- add route-level smoke fixtures for representative metadata rows;
### SDXL Formatter Path ### SDXL Formatter Path
@@ -290,6 +296,7 @@ Keep here:
- negative-prompt assembly. - negative-prompt assembly.
- metadata-family tag hints from `action_family`, `position_family`, and - metadata-family tag hints from `action_family`, `position_family`, and
`position_keys`. `position_keys`.
- shared row route metadata reads from `route_metadata.py`.
- shared formatter input parsing from `formatter_input.py`. - shared formatter input parsing from `formatter_input.py`.
- style presets, quality presets, default negative prompt, and action/position - style presets, quality presets, default negative prompt, and action/position
family tag hints from `sdxl_presets.py`. family tag hints from `sdxl_presets.py`.
@@ -313,6 +320,7 @@ Keep here:
- style-tail policy from `caption_policy.py`. - style-tail policy from `caption_policy.py`.
- metadata-family action labels from `action_family` and `position_family` via - metadata-family action labels from `action_family` and `position_family` via
`caption_policy.py`. `caption_policy.py`.
- shared row route metadata reads from `route_metadata.py`.
- shared formatter input parsing from `formatter_input.py`. - shared formatter input parsing from `formatter_input.py`.
- shared cast descriptor parsing and label replacement from `krea_cast.py`. - shared cast descriptor parsing and label replacement from `krea_cast.py`.
- caption detail-level/style-policy normalization, clothing cleanup, and - caption detail-level/style-policy normalization, clothing cleanup, and
+1
View File
@@ -92,6 +92,7 @@ Core helper ownership:
| `hardcore_role_anal.py` | Anal and double-contact role graph wording for rear-entry, raised-edge, kneeling, side-lying, and front/back double-position geometry. | | `hardcore_role_anal.py` | Anal and double-contact role graph wording for rear-entry, raised-edge, kneeling, side-lying, and front/back double-position geometry. |
| `hardcore_role_climax.py` | Climax and ejaculation aftermath role graph wording for face/body/ass, lap, open-thigh, side-lying, and group front/back placement. | | `hardcore_role_climax.py` | Climax and ejaculation aftermath role graph wording for face/body/ass, lap, open-thigh, side-lying, and group front/back placement. |
| `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. | | `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. |
| `route_metadata.py` | Shared row-level route metadata readers for normalized action family, position family/keys, and formatter hints used by Krea2, SDXL, and caption routes. |
| `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. | | `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. |
| `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. | | `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. |
| `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. | | `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. |
+5 -5
View File
@@ -5,7 +5,7 @@ from typing import Any
try: try:
from . import formatter_input as input_policy from . import formatter_input as input_policy
from . import category_template_metadata as template_metadata_policy from . import route_metadata as route_metadata_policy
from .krea_action_context import ( from .krea_action_context import (
is_close_foreplay_text as _is_close_foreplay_text, is_close_foreplay_text as _is_close_foreplay_text,
is_outercourse_text as _is_outercourse_text, is_outercourse_text as _is_outercourse_text,
@@ -36,7 +36,7 @@ try:
from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text
except ImportError: # Allows local smoke tests with `python -c`. except ImportError: # Allows local smoke tests with `python -c`.
import formatter_input as input_policy import formatter_input as input_policy
import category_template_metadata as template_metadata_policy import route_metadata as route_metadata_policy
from krea_action_context import ( from krea_action_context import (
is_close_foreplay_text as _is_close_foreplay_text, is_close_foreplay_text as _is_close_foreplay_text,
is_outercourse_text as _is_outercourse_text, is_outercourse_text as _is_outercourse_text,
@@ -109,7 +109,7 @@ def _formatter_hint_parts(*rows: dict[str, Any]) -> list[str]:
for row in rows: for row in rows:
if not isinstance(row, dict): if not isinstance(row, dict):
continue continue
for hint in template_metadata_policy.formatter_hints_for_route(row, "krea"): for hint in route_metadata_policy.row_formatter_hints(row, "krea"):
hint = _clean(hint).strip(" .") hint = _clean(hint).strip(" .")
if hint and hint not in hints: if hint and hint not in hints:
hints.append(hint) hints.append(hint)
@@ -465,7 +465,7 @@ def _normal_row_to_krea(row: dict[str, Any], detail_level: str, style_mode: str)
source_composition, source_composition,
axis_values, axis_values,
detail_density, detail_density,
row.get("action_family"), route_metadata_policy.row_action_family(row),
) )
action = _pov_action_phrase(action, pov_labels, role_graph, item, source_composition, axis_values, detail_density) action = _pov_action_phrase(action, pov_labels, role_graph, item, source_composition, axis_values, detail_density)
output_composition = _pov_composition_text(composition, pov_labels) output_composition = _pov_composition_text(composition, pov_labels)
@@ -597,7 +597,7 @@ def _insta_pair_to_krea(row: dict[str, Any], detail_level: str, style_mode: str)
hard_source_composition, hard_source_composition,
hard_axis_values, hard_axis_values,
hard_detail_density, hard_detail_density,
hard.get("action_family") or row.get("action_family"), route_metadata_policy.row_action_family(hard) or route_metadata_policy.row_action_family(row),
) )
hard_action = _pov_action_phrase( hard_action = _pov_action_phrase(
hard_action, hard_action,
+62
View File
@@ -0,0 +1,62 @@
from __future__ import annotations
import re
from typing import Any
try:
from . import category_template_metadata as template_metadata_policy
from .hardcore_action_metadata import normalize_hardcore_action_family
from .hardcore_position_config import normalize_hardcore_position_family, normalize_hardcore_position_values
except ImportError: # Allows local smoke tests from the repository root.
import category_template_metadata as template_metadata_policy
from hardcore_action_metadata import normalize_hardcore_action_family
from hardcore_position_config import normalize_hardcore_position_family, normalize_hardcore_position_values
def row_action_family(row: Any, default: str = "") -> str:
if not isinstance(row, dict):
return default
return normalize_hardcore_action_family(row.get("action_family"), default)
def row_position_family(row: Any, default: str = "") -> str:
if not isinstance(row, dict):
return default
return normalize_hardcore_position_family(str(row.get("position_family") or "").strip().lower(), default)
def _raw_position_key_values(row: dict[str, Any]) -> list[Any]:
values: list[Any] = []
position_keys = row.get("position_keys")
if isinstance(position_keys, list):
values.extend(position_keys)
elif position_keys is not None:
values.append(position_keys)
if row.get("position_key") is not None:
values.append(row.get("position_key"))
return values
def _position_key_slug(value: Any) -> str:
text = str(value or "").strip()
if not text or text == "any":
return ""
return re.sub(r"[^a-z0-9]+", "_", text.lower()).strip("_")
def row_position_keys(row: Any, *, include_unknown: bool = False) -> list[str]:
if not isinstance(row, dict):
return []
values = _raw_position_key_values(row)
selected = normalize_hardcore_position_values(values)
if not include_unknown:
return selected
for value in values:
normalized = _position_key_slug(value)
if normalized and normalized not in selected:
selected.append(normalized)
return selected
def row_formatter_hints(row: Any, route: str) -> list[str]:
return template_metadata_policy.formatter_hints_for_route(row, route)
+6 -13
View File
@@ -5,15 +5,13 @@ from typing import Any
try: try:
from . import formatter_input as input_policy from . import formatter_input as input_policy
from . import category_template_metadata as template_metadata_policy from . import route_metadata as route_metadata_policy
from . import sdxl_presets as sdxl_policy from . import sdxl_presets as sdxl_policy
from .hardcore_action_metadata import normalize_hardcore_action_family
from .prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt from .prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
except ImportError: # Allows local smoke tests with `python -c`. except ImportError: # Allows local smoke tests with `python -c`.
import formatter_input as input_policy import formatter_input as input_policy
import category_template_metadata as template_metadata_policy import route_metadata as route_metadata_policy
import sdxl_presets as sdxl_policy import sdxl_presets as sdxl_policy
from hardcore_action_metadata import normalize_hardcore_action_family
from prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt from prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
@@ -121,18 +119,13 @@ def _add_one(tags: list[str], seen: set[str], tag: str) -> None:
def _metadata_family_tags(row: dict[str, Any]) -> list[str]: def _metadata_family_tags(row: dict[str, Any]) -> list[str]:
tags: list[str] = [] tags: list[str] = []
action_family = normalize_hardcore_action_family(row.get("action_family")) action_family = route_metadata_policy.row_action_family(row)
tags.extend(SDXL_ACTION_FAMILY_TAGS.get(action_family, ())) tags.extend(SDXL_ACTION_FAMILY_TAGS.get(action_family, ()))
position_family = _clean(row.get("position_family")).lower() position_family = route_metadata_policy.row_position_family(row)
tags.extend(SDXL_POSITION_FAMILY_TAGS.get(position_family, ())) tags.extend(SDXL_POSITION_FAMILY_TAGS.get(position_family, ()))
position_keys = row.get("position_keys") for key in route_metadata_policy.row_position_keys(row, include_unknown=True):
if isinstance(position_keys, list):
keys = position_keys
else:
keys = [row.get("position_key")]
for key in keys:
key_text = _clean(key) key_text = _clean(key)
if key_text: if key_text:
tags.append(key_text.replace("_", " ")) tags.append(key_text.replace("_", " "))
@@ -144,7 +137,7 @@ def _formatter_hint_tags(*rows: dict[str, Any]) -> list[str]:
for row in rows: for row in rows:
if not isinstance(row, dict): if not isinstance(row, dict):
continue continue
for hint in template_metadata_policy.formatter_hints_for_route(row, "sdxl"): for hint in route_metadata_policy.row_formatter_hints(row, "sdxl"):
hint = _clean(hint).strip(" ,.") hint = _clean(hint).strip(" ,.")
if hint and hint not in tags: if hint and hint not in tags:
tags.append(hint) tags.append(hint)
+23 -1
View File
@@ -40,6 +40,7 @@ import krea_formatter # noqa: E402
import location_config # noqa: E402 import location_config # noqa: E402
import prompt_builder as pb # noqa: E402 import prompt_builder as pb # noqa: E402
import row_normalization # noqa: E402 import row_normalization # noqa: E402
import route_metadata # noqa: E402
import sdxl_formatter # noqa: E402 import sdxl_formatter # noqa: E402
import sdxl_presets # noqa: E402 import sdxl_presets # noqa: E402
import seed_config # noqa: E402 import seed_config # noqa: E402
@@ -1041,7 +1042,7 @@ def smoke_caption_policy() -> None:
) )
row = {"action_family": "oral", "position_family": ""} row = {"action_family": "oral", "position_family": ""}
_expect(caption_policy.metadata_action_label(row) == "oral action", "Caption action-family label changed") _expect(caption_policy.metadata_action_label(row) == "oral action", "Caption action-family label changed")
row = {"action_family": "oral", "position_family": "anal"} row = {"action_family": "oral", "position_family": "Anal"}
_expect(caption_naturalizer._metadata_action_label(row) == "anal action", "Caption position-family label priority changed") _expect(caption_naturalizer._metadata_action_label(row) == "anal action", "Caption position-family label priority changed")
browsing_caption, browsing_method = caption_naturalizer.naturalize_caption( browsing_caption, browsing_method = caption_naturalizer.naturalize_caption(
"woman, red dress, studio", "woman, red dress, studio",
@@ -1225,6 +1226,27 @@ def smoke_hardcore_position_config_policy() -> None:
_expect(formatter_hints.get("krea") == ["keep mouth contact readable"], "Template metadata route lost Krea formatter hint") _expect(formatter_hints.get("krea") == ["keep mouth contact readable"], "Template metadata route lost Krea formatter hint")
_expect(formatter_hints.get("sdxl") == ["oral contact", "kneeling oral"], "Template metadata route lost SDXL formatter hints") _expect(formatter_hints.get("sdxl") == ["oral contact", "kneeling oral"], "Template metadata route lost SDXL formatter hints")
_expect(formatter_hints.get("caption") == ["oral contact caption detail"], "Template metadata route lost caption formatter hint") _expect(formatter_hints.get("caption") == ["oral contact caption detail"], "Template metadata route lost caption formatter hint")
route_row = {
"action_family": "penetrative",
"position_family": "Oral",
"position_keys": ["spread leg oral", "bad key"],
"position_key": "open thighs",
"formatter_hints": {"all": ["shared formatter cue"], "training_caption": ["caption formatter cue"]},
}
_expect(route_metadata.row_action_family(route_row) == "penetration", "Route metadata action normalization changed")
_expect(route_metadata.row_position_family(route_row) == "oral", "Route metadata position-family normalization changed")
_expect(
route_metadata.row_position_keys(route_row) == ["spread_leg_oral", "open_thighs"],
"Route metadata position-key normalization changed",
)
_expect(
route_metadata.row_position_keys({"position_keys": ["kneeling_oral"]}, include_unknown=True) == ["kneeling_oral"],
"Route metadata legacy position-key passthrough changed",
)
_expect(
route_metadata.row_formatter_hints(route_row, "caption") == ["shared formatter cue", "caption formatter cue"],
"Route metadata formatter hint routing changed",
)
route_hints = category_template_metadata.formatter_hints_for_route( route_hints = category_template_metadata.formatter_hints_for_route(
{"formatter_hints": {"all": ["shared formatter cue"], "krea2": ["krea formatter cue"]}}, {"formatter_hints": {"all": ["shared formatter cue"], "krea2": ["krea formatter cue"]}},
"krea2", "krea2",