Consume formatter hints

This commit is contained in:
2026-06-27 02:17:04 +02:00
parent dfdfff953b
commit 7d112c0f98
7 changed files with 145 additions and 5 deletions
+24 -2
View File
@@ -5,11 +5,13 @@ from typing import Any
try:
from . import caption_policy
from . import category_template_metadata as template_metadata_policy
from . import formatter_input as input_policy
from . import krea_cast as cast_policy
from .prompt_hygiene import sanitize_prose_text
except ImportError: # Allows local smoke tests with `python -c`.
import caption_policy
import category_template_metadata as template_metadata_policy
import formatter_input as input_policy
import krea_cast as cast_policy
from prompt_hygiene import sanitize_prose_text
@@ -67,6 +69,24 @@ def _join_sentences(parts: list[str]) -> str:
return " ".join(part for part in (_sentence(part) for part in parts) if part)
def _formatter_hint_parts(row: dict[str, Any]) -> list[str]:
hints: list[str] = []
if not isinstance(row, dict):
return hints
for hint in template_metadata_policy.formatter_hints_for_route(row, "caption"):
hint = _clean_text(hint).strip(" .")
if hint and hint not in hints:
hints.append(hint)
return hints
def _append_formatter_hints(prose: str, row: dict[str, Any]) -> str:
hints = _formatter_hint_parts(row)
if not hints:
return prose
return _join_sentences([prose, *hints])
def _human_join(parts: list[str]) -> str:
parts = [part for part in (_clean_text(part) for part in parts) if part]
if len(parts) <= 1:
@@ -538,8 +558,10 @@ def _metadata_to_prose(row: dict[str, Any], detail_level: str, keep_style: bool)
):
result = builder(row, detail_level, keep_style)
if result:
return result
return _text_to_prose(_clean_text(row.get("caption") or row.get("prompt")), detail_level, keep_style)
prose, method = result
return _append_formatter_hints(prose, row), method
prose, method = _text_to_prose(_clean_text(row.get("caption") or row.get("prompt")), detail_level, keep_style)
return _append_formatter_hints(prose, row), method
def _prompt_to_prose(text: str, detail_level: str, keep_style: bool) -> tuple[str, str] | None:
+31
View File
@@ -96,6 +96,37 @@ def formatter_hints(metadata: dict[str, Any]) -> dict[str, list[str]]:
return {route: hints for route, hints in normalized.items() if hints}
def formatter_hints_for_route(row_or_hints: Any, route: str) -> list[str]:
route = normalize_formatter_route(route)
if not route or not isinstance(row_or_hints, dict):
return []
if isinstance(row_or_hints.get("formatter_hints"), dict):
raw_hints = row_or_hints.get("formatter_hints") or {}
elif "formatter_hint" in row_or_hints:
raw_hints = formatter_hints(row_or_hints)
else:
raw_hints = row_or_hints
normalized: dict[str, list[str]] = {}
if isinstance(raw_hints, dict):
for raw_route, values in raw_hints.items():
normalized_route = normalize_formatter_route(raw_route)
if not normalized_route:
continue
for value in _list_from(values):
hint = _clean_hint(value)
if hint and hint not in normalized.setdefault(normalized_route, []):
normalized[normalized_route].append(hint)
hints: list[str] = []
for raw_route in ("all", route):
for hint in normalized.get(raw_route, []):
if hint not in hints:
hints.append(hint)
return hints
def merge_position_keys(primary: list[str], fallback: list[str]) -> list[str]:
merged: list[str] = []
for key in [*primary, *fallback]:
+3 -1
View File
@@ -338,7 +338,9 @@ Keep here:
- optional object-style item templates with route metadata such as
`action_family`, `action_type`, `position_family`, `family`, `position_key`,
`position_keys`, and `formatter_hint`; string templates remain valid and fall
back to Python inference.
back to Python inference. Normalized formatter hints are routed into Krea,
SDXL, and caption naturalization through `all` plus the matching formatter
route only.
Improve later:
+3 -2
View File
@@ -222,7 +222,8 @@ Important JSON keys:
`position_family`, `family`, `position_key`, `position_keys`, and
`formatter_hint`. Formatter hints may be a string/list for all routes or a
map keyed by `krea`, `sdxl`, or `caption`; aliases such as `krea2` and
`training_caption` are normalized by `category_template_metadata.py`.
`training_caption` are normalized by `category_template_metadata.py` and
consumed only by the matching formatter route plus the shared `all` route.
- `axes`: values used to fill `item_templates`.
- `scene_pool` / `scene_pools` or direct `scenes`: location road.
- `expression_pool` / `expression_pools` or direct `expressions`: expression road.
@@ -452,7 +453,7 @@ plain prompt text. When debugging, inspect these fields before editing pools.
| `item` | `_compose_item` or Insta override | Krea/SDXL/Naturalizer | Clothing item, category item, or sexual scene/action text. |
| `item_axis_values` | `_compose_item` | Krea hardcore rewrite, SDXL tags | Filled template axes such as position/action/detail values. |
| `item_template_metadata` | `_compose_item` | Debug, Krea/SDXL/Naturalizer route metadata | Optional metadata from object-style item templates; currently used to prefer explicit action/position families and keys before inference. |
| `formatter_hints` | `category_template_metadata.formatter_hints` | Debug/future formatter specialization | Normalized route-specific hints from object-style item templates, keyed by `all`, `krea`, `sdxl`, or `caption`. |
| `formatter_hints` | `category_template_metadata.formatter_hints` | Krea/SDXL/Naturalizer route specialization, debug | Normalized route-specific hints from object-style item templates, keyed by `all`, `krea`, `sdxl`, or `caption`; each formatter consumes `all` plus its own route only. |
| `action_family` | `item_template_metadata` or `hardcore_action_metadata.source_hardcore_action_family` | Krea hardcore rewrite, SDXL tags, natural captions, debug | Source-aware formatter semantic family such as `foreplay`, `outercourse`, `oral`, `penetration`, `toy_double`, or `climax`. |
| `position_family` | `item_template_metadata` or `_hardcore_source_position_family` | Debug/filtering | Source/UI hardcore family selected by template metadata or subcategory, such as `manual`, `interaction`, `oral`, `anal`, or `climax`. |
| `position_key`, `position_keys` | `item_template_metadata` plus `_hardcore_position_keys` | Debug/future filters | Concrete position tokens from object-template metadata and inferred axes/role text, such as `kneeling`, `doggy`, `boobjob`, or `open_thighs`. |
+26
View File
@@ -5,6 +5,7 @@ from typing import Any
try:
from . import formatter_input as input_policy
from . import category_template_metadata as template_metadata_policy
from .krea_action_context import (
is_close_foreplay_text as _is_close_foreplay_text,
is_outercourse_text as _is_outercourse_text,
@@ -35,6 +36,7 @@ try:
from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text
except ImportError: # Allows local smoke tests with `python -c`.
import formatter_input as input_policy
import category_template_metadata as template_metadata_policy
from krea_action_context import (
is_close_foreplay_text as _is_close_foreplay_text,
is_outercourse_text as _is_outercourse_text,
@@ -102,6 +104,25 @@ def _paragraph(parts: list[str]) -> str:
return " ".join(part for part in (_sentence(part) for part in parts) if part)
def _formatter_hint_parts(*rows: dict[str, Any]) -> list[str]:
hints: list[str] = []
for row in rows:
if not isinstance(row, dict):
continue
for hint in template_metadata_policy.formatter_hints_for_route(row, "krea"):
hint = _clean(hint).strip(" .")
if hint and hint not in hints:
hints.append(hint)
return hints
def _append_formatter_hints(prompt: str, *rows: dict[str, Any]) -> str:
hints = _formatter_hint_parts(*rows)
if not hints:
return prompt
return _paragraph([prompt, *hints])
def _with_indefinite_article(text: str) -> str:
text = _clean(text)
if not text or text.lower().startswith(("a ", "an ")):
@@ -715,6 +736,10 @@ def format_krea2_prompt(
if row and row.get("mode") == "Insta/OF":
soft_prompt, soft_negative, hard_prompt, hard_negative = _insta_pair_to_krea(row, detail_level, style_mode)
soft_row = row.get("softcore_row") if isinstance(row.get("softcore_row"), dict) else {}
hard_row = row.get("hardcore_row") if isinstance(row.get("hardcore_row"), dict) else {}
soft_prompt = _append_formatter_hints(soft_prompt, row, soft_row)
hard_prompt = _append_formatter_hints(hard_prompt, row, hard_row)
if extra_positive.strip():
soft_prompt = f"{soft_prompt.rstrip()} {extra_positive.strip()}"
hard_prompt = f"{hard_prompt.rstrip()} {extra_positive.strip()}"
@@ -735,6 +760,7 @@ def format_krea2_prompt(
if row:
prompt, kind = _normal_row_to_krea(row, detail_level, style_mode)
prompt = _append_formatter_hints(prompt, row)
extracted_negative = _clean(row.get("negative_prompt"))
method = f"{method}:krea2({kind})"
else:
+20
View File
@@ -5,11 +5,13 @@ from typing import Any
try:
from . import formatter_input as input_policy
from . import category_template_metadata as template_metadata_policy
from . import sdxl_presets as sdxl_policy
from .hardcore_action_metadata import normalize_hardcore_action_family
from .prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
except ImportError: # Allows local smoke tests with `python -c`.
import formatter_input as input_policy
import category_template_metadata as template_metadata_policy
import sdxl_presets as sdxl_policy
from hardcore_action_metadata import normalize_hardcore_action_family
from prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
@@ -137,6 +139,18 @@ def _metadata_family_tags(row: dict[str, Any]) -> list[str]:
return tags
def _formatter_hint_tags(*rows: dict[str, Any]) -> list[str]:
tags: list[str] = []
for row in rows:
if not isinstance(row, dict):
continue
for hint in template_metadata_policy.formatter_hints_for_route(row, "sdxl"):
hint = _clean(hint).strip(" ,.")
if hint and hint not in tags:
tags.append(hint)
return tags
def _combine_tags(*parts: Any) -> str:
tags: list[str] = []
seen: set[str] = set()
@@ -288,6 +302,8 @@ def _row_core_tags(row: dict[str, Any], nude_weight: float) -> list[str]:
for tag in _metadata_family_tags(row):
_add_one(tags, seen, tag)
for tag in _formatter_hint_tags(row):
_add(tags, seen, tag)
item = _row_value(row, "item", ("Sexual scene", "Sexual pose", "Erotic outfit", "Clothing")) or _clean(row.get("custom_item"))
pose = _row_value(row, "pose", ("Sexual pose", "Pose"))
@@ -334,6 +350,8 @@ def _quality_tail(quality_preset: str, custom_quality: str) -> str:
def _soft_tags(row: dict[str, Any], root: dict[str, Any], nude_weight: float) -> str:
tags = _row_core_tags(row, nude_weight)
seen = {_tag_key(tag) for tag in tags}
for tag in _formatter_hint_tags(root):
_add(tags, seen, tag)
descriptor = _clean(root.get("shared_descriptor"))
if descriptor and not any("woman" in _tag_key(tag) for tag in tags):
for tag in _character_tags_from_descriptor(descriptor):
@@ -369,6 +387,8 @@ def _hard_tags(row: dict[str, Any], root: dict[str, Any], nude_weight: float) ->
for tag in _metadata_family_tags(row):
_add_one(tags, seen, tag)
for tag in _formatter_hint_tags(row, root):
_add(tags, seen, tag)
hard_scene = _clean(row.get("scene_text"))
hard_item = _clean(row.get("item"))
+38
View File
@@ -1225,6 +1225,11 @@ def smoke_hardcore_position_config_policy() -> None:
_expect(formatter_hints.get("krea") == ["keep mouth contact readable"], "Template metadata route lost Krea formatter hint")
_expect(formatter_hints.get("sdxl") == ["oral contact", "kneeling oral"], "Template metadata route lost SDXL formatter hints")
_expect(formatter_hints.get("caption") == ["oral contact caption detail"], "Template metadata route lost caption formatter hint")
route_hints = category_template_metadata.formatter_hints_for_route(
{"formatter_hints": {"all": ["shared formatter cue"], "krea2": ["krea formatter cue"]}},
"krea2",
)
_expect(route_hints == ["shared formatter cue", "krea formatter cue"], "Formatter hint route resolver changed")
_expect(
pb._template_action_family(template_metadata) == category_template_metadata.template_action_family(template_metadata),
"Prompt builder template action policy should delegate",
@@ -2486,6 +2491,39 @@ def smoke_formatter_metadata_fixtures() -> None:
for term in case["caption_terms"]:
_expect(term in caption_text, f"{name}.caption missing {term!r}")
route_row = _fixture_hardcore_row(
formatter_hints={
"all": ["shared route anchor"],
"krea": ["krea readable anchor"],
"sdxl": ["sdxl route tag"],
"caption": ["caption route phrase"],
}
)
_expect_custom_row(route_row, "fixture_formatter_hints")
metadata = _json(route_row)
krea = krea_formatter.format_krea2_prompt("", metadata_json=metadata, target="single")
krea_prompt = _expect_text("fixture_formatter_hints.krea_prompt", krea.get("krea_prompt"), 40).lower()
_expect("shared route anchor" in krea_prompt, "Krea formatter missed shared formatter hint")
_expect("krea readable anchor" in krea_prompt, "Krea formatter missed Krea formatter hint")
_expect("sdxl route tag" not in krea_prompt, "Krea formatter leaked SDXL formatter hint")
_expect("caption route phrase" not in krea_prompt, "Krea formatter leaked caption formatter hint")
sdxl = sdxl_formatter.format_sdxl_prompt("", metadata_json=metadata, target="single", trigger=SdxlTrigger, prepend_trigger=True)
sdxl_prompt = _expect_text("fixture_formatter_hints.sdxl_prompt", sdxl.get("sdxl_prompt"), 40).lower()
_expect("shared route anchor" in sdxl_prompt, "SDXL formatter missed shared formatter hint")
_expect("sdxl route tag" in sdxl_prompt, "SDXL formatter missed SDXL formatter hint")
_expect("krea readable anchor" not in sdxl_prompt, "SDXL formatter leaked Krea formatter hint")
_expect("caption route phrase" not in sdxl_prompt, "SDXL formatter leaked caption formatter hint")
caption, method = caption_naturalizer.naturalize_caption("", metadata_json=metadata, trigger=Trigger, include_trigger=True)
caption_text = _expect_text("fixture_formatter_hints.caption", caption, 40).lower()
_expect("metadata" in method, "Caption formatter hints fixture did not use metadata")
_expect("shared route anchor" in caption_text, "Caption naturalizer missed shared formatter hint")
_expect("caption route phrase" in caption_text, "Caption naturalizer missed caption formatter hint")
_expect("krea readable anchor" not in caption_text, "Caption naturalizer leaked Krea formatter hint")
_expect("sdxl route tag" not in caption_text, "Caption naturalizer leaked SDXL formatter hint")
def smoke_node_utility_registration() -> None:
required_nodes = [