diff --git a/caption_naturalizer.py b/caption_naturalizer.py index c418e3a..fd11b1e 100644 --- a/caption_naturalizer.py +++ b/caption_naturalizer.py @@ -5,11 +5,13 @@ from typing import Any try: from . import caption_policy + from . import category_template_metadata as template_metadata_policy from . import formatter_input as input_policy from . import krea_cast as cast_policy from .prompt_hygiene import sanitize_prose_text except ImportError: # Allows local smoke tests with `python -c`. import caption_policy + import category_template_metadata as template_metadata_policy import formatter_input as input_policy import krea_cast as cast_policy from prompt_hygiene import sanitize_prose_text @@ -67,6 +69,24 @@ def _join_sentences(parts: list[str]) -> str: return " ".join(part for part in (_sentence(part) for part in parts) if part) +def _formatter_hint_parts(row: dict[str, Any]) -> list[str]: + hints: list[str] = [] + if not isinstance(row, dict): + return hints + for hint in template_metadata_policy.formatter_hints_for_route(row, "caption"): + hint = _clean_text(hint).strip(" .") + if hint and hint not in hints: + hints.append(hint) + return hints + + +def _append_formatter_hints(prose: str, row: dict[str, Any]) -> str: + hints = _formatter_hint_parts(row) + if not hints: + return prose + return _join_sentences([prose, *hints]) + + def _human_join(parts: list[str]) -> str: parts = [part for part in (_clean_text(part) for part in parts) if part] if len(parts) <= 1: @@ -538,8 +558,10 @@ def _metadata_to_prose(row: dict[str, Any], detail_level: str, keep_style: bool) ): result = builder(row, detail_level, keep_style) if result: - return result - return _text_to_prose(_clean_text(row.get("caption") or row.get("prompt")), detail_level, keep_style) + prose, method = result + return _append_formatter_hints(prose, row), method + prose, method = _text_to_prose(_clean_text(row.get("caption") or row.get("prompt")), detail_level, keep_style) + return _append_formatter_hints(prose, row), method def _prompt_to_prose(text: str, detail_level: str, keep_style: bool) -> tuple[str, str] | None: diff --git a/category_template_metadata.py b/category_template_metadata.py index 01d5b1f..8668a10 100644 --- a/category_template_metadata.py +++ b/category_template_metadata.py @@ -96,6 +96,37 @@ def formatter_hints(metadata: dict[str, Any]) -> dict[str, list[str]]: return {route: hints for route, hints in normalized.items() if hints} +def formatter_hints_for_route(row_or_hints: Any, route: str) -> list[str]: + route = normalize_formatter_route(route) + if not route or not isinstance(row_or_hints, dict): + return [] + + if isinstance(row_or_hints.get("formatter_hints"), dict): + raw_hints = row_or_hints.get("formatter_hints") or {} + elif "formatter_hint" in row_or_hints: + raw_hints = formatter_hints(row_or_hints) + else: + raw_hints = row_or_hints + + normalized: dict[str, list[str]] = {} + if isinstance(raw_hints, dict): + for raw_route, values in raw_hints.items(): + normalized_route = normalize_formatter_route(raw_route) + if not normalized_route: + continue + for value in _list_from(values): + hint = _clean_hint(value) + if hint and hint not in normalized.setdefault(normalized_route, []): + normalized[normalized_route].append(hint) + + hints: list[str] = [] + for raw_route in ("all", route): + for hint in normalized.get(raw_route, []): + if hint not in hints: + hints.append(hint) + return hints + + def merge_position_keys(primary: list[str], fallback: list[str]) -> list[str]: merged: list[str] = [] for key in [*primary, *fallback]: diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md index 65a9999..d004ef0 100644 --- a/docs/prompt-architecture-improvement-plan.md +++ b/docs/prompt-architecture-improvement-plan.md @@ -338,7 +338,9 @@ Keep here: - optional object-style item templates with route metadata such as `action_family`, `action_type`, `position_family`, `family`, `position_key`, `position_keys`, and `formatter_hint`; string templates remain valid and fall - back to Python inference. + back to Python inference. Normalized formatter hints are routed into Krea, + SDXL, and caption naturalization through `all` plus the matching formatter + route only. Improve later: diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index eb2006a..8884de5 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -222,7 +222,8 @@ Important JSON keys: `position_family`, `family`, `position_key`, `position_keys`, and `formatter_hint`. Formatter hints may be a string/list for all routes or a map keyed by `krea`, `sdxl`, or `caption`; aliases such as `krea2` and - `training_caption` are normalized by `category_template_metadata.py`. + `training_caption` are normalized by `category_template_metadata.py` and + consumed only by the matching formatter route plus the shared `all` route. - `axes`: values used to fill `item_templates`. - `scene_pool` / `scene_pools` or direct `scenes`: location road. - `expression_pool` / `expression_pools` or direct `expressions`: expression road. @@ -452,7 +453,7 @@ plain prompt text. When debugging, inspect these fields before editing pools. | `item` | `_compose_item` or Insta override | Krea/SDXL/Naturalizer | Clothing item, category item, or sexual scene/action text. | | `item_axis_values` | `_compose_item` | Krea hardcore rewrite, SDXL tags | Filled template axes such as position/action/detail values. | | `item_template_metadata` | `_compose_item` | Debug, Krea/SDXL/Naturalizer route metadata | Optional metadata from object-style item templates; currently used to prefer explicit action/position families and keys before inference. | -| `formatter_hints` | `category_template_metadata.formatter_hints` | Debug/future formatter specialization | Normalized route-specific hints from object-style item templates, keyed by `all`, `krea`, `sdxl`, or `caption`. | +| `formatter_hints` | `category_template_metadata.formatter_hints` | Krea/SDXL/Naturalizer route specialization, debug | Normalized route-specific hints from object-style item templates, keyed by `all`, `krea`, `sdxl`, or `caption`; each formatter consumes `all` plus its own route only. | | `action_family` | `item_template_metadata` or `hardcore_action_metadata.source_hardcore_action_family` | Krea hardcore rewrite, SDXL tags, natural captions, debug | Source-aware formatter semantic family such as `foreplay`, `outercourse`, `oral`, `penetration`, `toy_double`, or `climax`. | | `position_family` | `item_template_metadata` or `_hardcore_source_position_family` | Debug/filtering | Source/UI hardcore family selected by template metadata or subcategory, such as `manual`, `interaction`, `oral`, `anal`, or `climax`. | | `position_key`, `position_keys` | `item_template_metadata` plus `_hardcore_position_keys` | Debug/future filters | Concrete position tokens from object-template metadata and inferred axes/role text, such as `kneeling`, `doggy`, `boobjob`, or `open_thighs`. | diff --git a/krea_formatter.py b/krea_formatter.py index 3e9abf7..db0a7e6 100644 --- a/krea_formatter.py +++ b/krea_formatter.py @@ -5,6 +5,7 @@ from typing import Any try: from . import formatter_input as input_policy + from . import category_template_metadata as template_metadata_policy from .krea_action_context import ( is_close_foreplay_text as _is_close_foreplay_text, is_outercourse_text as _is_outercourse_text, @@ -35,6 +36,7 @@ try: from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text except ImportError: # Allows local smoke tests with `python -c`. import formatter_input as input_policy + import category_template_metadata as template_metadata_policy from krea_action_context import ( is_close_foreplay_text as _is_close_foreplay_text, is_outercourse_text as _is_outercourse_text, @@ -102,6 +104,25 @@ def _paragraph(parts: list[str]) -> str: return " ".join(part for part in (_sentence(part) for part in parts) if part) +def _formatter_hint_parts(*rows: dict[str, Any]) -> list[str]: + hints: list[str] = [] + for row in rows: + if not isinstance(row, dict): + continue + for hint in template_metadata_policy.formatter_hints_for_route(row, "krea"): + hint = _clean(hint).strip(" .") + if hint and hint not in hints: + hints.append(hint) + return hints + + +def _append_formatter_hints(prompt: str, *rows: dict[str, Any]) -> str: + hints = _formatter_hint_parts(*rows) + if not hints: + return prompt + return _paragraph([prompt, *hints]) + + def _with_indefinite_article(text: str) -> str: text = _clean(text) if not text or text.lower().startswith(("a ", "an ")): @@ -715,6 +736,10 @@ def format_krea2_prompt( if row and row.get("mode") == "Insta/OF": soft_prompt, soft_negative, hard_prompt, hard_negative = _insta_pair_to_krea(row, detail_level, style_mode) + soft_row = row.get("softcore_row") if isinstance(row.get("softcore_row"), dict) else {} + hard_row = row.get("hardcore_row") if isinstance(row.get("hardcore_row"), dict) else {} + soft_prompt = _append_formatter_hints(soft_prompt, row, soft_row) + hard_prompt = _append_formatter_hints(hard_prompt, row, hard_row) if extra_positive.strip(): soft_prompt = f"{soft_prompt.rstrip()} {extra_positive.strip()}" hard_prompt = f"{hard_prompt.rstrip()} {extra_positive.strip()}" @@ -735,6 +760,7 @@ def format_krea2_prompt( if row: prompt, kind = _normal_row_to_krea(row, detail_level, style_mode) + prompt = _append_formatter_hints(prompt, row) extracted_negative = _clean(row.get("negative_prompt")) method = f"{method}:krea2({kind})" else: diff --git a/sdxl_formatter.py b/sdxl_formatter.py index 6a4a79b..112228d 100644 --- a/sdxl_formatter.py +++ b/sdxl_formatter.py @@ -5,11 +5,13 @@ from typing import Any try: from . import formatter_input as input_policy + from . import category_template_metadata as template_metadata_policy from . import sdxl_presets as sdxl_policy from .hardcore_action_metadata import normalize_hardcore_action_family from .prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt except ImportError: # Allows local smoke tests with `python -c`. import formatter_input as input_policy + import category_template_metadata as template_metadata_policy import sdxl_presets as sdxl_policy from hardcore_action_metadata import normalize_hardcore_action_family from prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt @@ -137,6 +139,18 @@ def _metadata_family_tags(row: dict[str, Any]) -> list[str]: return tags +def _formatter_hint_tags(*rows: dict[str, Any]) -> list[str]: + tags: list[str] = [] + for row in rows: + if not isinstance(row, dict): + continue + for hint in template_metadata_policy.formatter_hints_for_route(row, "sdxl"): + hint = _clean(hint).strip(" ,.") + if hint and hint not in tags: + tags.append(hint) + return tags + + def _combine_tags(*parts: Any) -> str: tags: list[str] = [] seen: set[str] = set() @@ -288,6 +302,8 @@ def _row_core_tags(row: dict[str, Any], nude_weight: float) -> list[str]: for tag in _metadata_family_tags(row): _add_one(tags, seen, tag) + for tag in _formatter_hint_tags(row): + _add(tags, seen, tag) item = _row_value(row, "item", ("Sexual scene", "Sexual pose", "Erotic outfit", "Clothing")) or _clean(row.get("custom_item")) pose = _row_value(row, "pose", ("Sexual pose", "Pose")) @@ -334,6 +350,8 @@ def _quality_tail(quality_preset: str, custom_quality: str) -> str: def _soft_tags(row: dict[str, Any], root: dict[str, Any], nude_weight: float) -> str: tags = _row_core_tags(row, nude_weight) seen = {_tag_key(tag) for tag in tags} + for tag in _formatter_hint_tags(root): + _add(tags, seen, tag) descriptor = _clean(root.get("shared_descriptor")) if descriptor and not any("woman" in _tag_key(tag) for tag in tags): for tag in _character_tags_from_descriptor(descriptor): @@ -369,6 +387,8 @@ def _hard_tags(row: dict[str, Any], root: dict[str, Any], nude_weight: float) -> for tag in _metadata_family_tags(row): _add_one(tags, seen, tag) + for tag in _formatter_hint_tags(row, root): + _add(tags, seen, tag) hard_scene = _clean(row.get("scene_text")) hard_item = _clean(row.get("item")) diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index ccd1805..53b73ef 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -1225,6 +1225,11 @@ def smoke_hardcore_position_config_policy() -> None: _expect(formatter_hints.get("krea") == ["keep mouth contact readable"], "Template metadata route lost Krea formatter hint") _expect(formatter_hints.get("sdxl") == ["oral contact", "kneeling oral"], "Template metadata route lost SDXL formatter hints") _expect(formatter_hints.get("caption") == ["oral contact caption detail"], "Template metadata route lost caption formatter hint") + route_hints = category_template_metadata.formatter_hints_for_route( + {"formatter_hints": {"all": ["shared formatter cue"], "krea2": ["krea formatter cue"]}}, + "krea2", + ) + _expect(route_hints == ["shared formatter cue", "krea formatter cue"], "Formatter hint route resolver changed") _expect( pb._template_action_family(template_metadata) == category_template_metadata.template_action_family(template_metadata), "Prompt builder template action policy should delegate", @@ -2486,6 +2491,39 @@ def smoke_formatter_metadata_fixtures() -> None: for term in case["caption_terms"]: _expect(term in caption_text, f"{name}.caption missing {term!r}") + route_row = _fixture_hardcore_row( + formatter_hints={ + "all": ["shared route anchor"], + "krea": ["krea readable anchor"], + "sdxl": ["sdxl route tag"], + "caption": ["caption route phrase"], + } + ) + _expect_custom_row(route_row, "fixture_formatter_hints") + metadata = _json(route_row) + + krea = krea_formatter.format_krea2_prompt("", metadata_json=metadata, target="single") + krea_prompt = _expect_text("fixture_formatter_hints.krea_prompt", krea.get("krea_prompt"), 40).lower() + _expect("shared route anchor" in krea_prompt, "Krea formatter missed shared formatter hint") + _expect("krea readable anchor" in krea_prompt, "Krea formatter missed Krea formatter hint") + _expect("sdxl route tag" not in krea_prompt, "Krea formatter leaked SDXL formatter hint") + _expect("caption route phrase" not in krea_prompt, "Krea formatter leaked caption formatter hint") + + sdxl = sdxl_formatter.format_sdxl_prompt("", metadata_json=metadata, target="single", trigger=SdxlTrigger, prepend_trigger=True) + sdxl_prompt = _expect_text("fixture_formatter_hints.sdxl_prompt", sdxl.get("sdxl_prompt"), 40).lower() + _expect("shared route anchor" in sdxl_prompt, "SDXL formatter missed shared formatter hint") + _expect("sdxl route tag" in sdxl_prompt, "SDXL formatter missed SDXL formatter hint") + _expect("krea readable anchor" not in sdxl_prompt, "SDXL formatter leaked Krea formatter hint") + _expect("caption route phrase" not in sdxl_prompt, "SDXL formatter leaked caption formatter hint") + + caption, method = caption_naturalizer.naturalize_caption("", metadata_json=metadata, trigger=Trigger, include_trigger=True) + caption_text = _expect_text("fixture_formatter_hints.caption", caption, 40).lower() + _expect("metadata" in method, "Caption formatter hints fixture did not use metadata") + _expect("shared route anchor" in caption_text, "Caption naturalizer missed shared formatter hint") + _expect("caption route phrase" in caption_text, "Caption naturalizer missed caption formatter hint") + _expect("krea readable anchor" not in caption_text, "Caption naturalizer leaked Krea formatter hint") + _expect("sdxl route tag" not in caption_text, "Caption naturalizer leaked SDXL formatter hint") + def smoke_node_utility_registration() -> None: required_nodes = [