Extract row normalization policy

This commit is contained in:
2026-06-27 01:15:24 +02:00
parent 2165e9fc16
commit b54b8b9421
6 changed files with 237 additions and 62 deletions
+5 -1
View File
@@ -165,6 +165,10 @@ Already isolated:
rows now emit `action_family`, `position_family`, `position_key`, and rows now emit `action_family`, `position_family`, `position_key`, and
`position_keys` so formatter routing and debugging do less keyword guessing. `position_keys` so formatter routing and debugging do less keyword guessing.
Krea, SDXL, and training-caption routes consume these fields when present. Krea, SDXL, and training-caption routes consume these fields when present.
- final row and pair text normalization lives in `row_normalization.py`,
covering trigger prepending, extra-positive append, negative merge/dedupe,
caption-part joining, and embedded soft/hard row sanitation before metadata
leaves generation.
### Pair / Adapter Layer ### Pair / Adapter Layer
@@ -198,7 +202,7 @@ Already isolated:
root clothing-state assembly. root clothing-state assembly.
- final pair output assembly lives in `pair_output.py`, including soft/hard - final pair output assembly lives in `pair_output.py`, including soft/hard
prompt strings, trigger preservation, negatives, captions, and root metadata prompt strings, trigger preservation, negatives, captions, and root metadata
shape. shape; the final cleanup step is delegated to `row_normalization.py`.
### Krea2 Formatter Path ### Krea2 Formatter Path
+1
View File
@@ -93,6 +93,7 @@ Core helper ownership:
| `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. | | `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. |
| `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. | | `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. |
| `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. | | `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. |
| `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. |
## Node IO Map ## Node IO Map
+23 -35
View File
@@ -3,9 +3,9 @@ from __future__ import annotations
from typing import Any, Callable from typing import Any, Callable
try: try:
from .prompt_hygiene import sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text from . import row_normalization as row_policy
except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`. except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`.
from prompt_hygiene import sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text import row_normalization as row_policy
def _labeled_expression_sentence(label: str, expression: Any) -> str: def _labeled_expression_sentence(label: str, expression: Any) -> str:
@@ -16,17 +16,11 @@ def _labeled_expression_sentence(label: str, expression: Any) -> str:
def _prepend_trigger(prompt: str, trigger: str, enabled: bool) -> str: def _prepend_trigger(prompt: str, trigger: str, enabled: bool) -> str:
trigger = trigger.strip() return row_policy.prepend_trigger(prompt, trigger, enabled)
if not enabled or not trigger:
return prompt
if prompt.lower().startswith(trigger.lower()):
return prompt
return f"{trigger}, {prompt}"
def _combined_negative(base: str, extra: str) -> str: def _combined_negative(base: str, extra: str) -> str:
parts = [part.strip() for part in (base, extra) if part and part.strip()] return row_policy.combined_negative(base, extra)
return ", ".join(parts)
def assemble_insta_pair_metadata( def assemble_insta_pair_metadata(
@@ -109,17 +103,6 @@ def assemble_insta_pair_metadata(
f"{hard_camera_sentence}" f"{hard_camera_sentence}"
f"{hard_row['positive_suffix']}." f"{hard_row['positive_suffix']}."
) )
if extra_positive.strip():
soft_prompt = f"{soft_prompt.rstrip()} {extra_positive.strip()}"
hard_prompt = f"{hard_prompt.rstrip()} {extra_positive.strip()}"
soft_prompt = _prepend_trigger(soft_prompt, active_trigger, bool(prepend_trigger_to_prompt))
hard_prompt = _prepend_trigger(hard_prompt, active_trigger, bool(prepend_trigger_to_prompt))
soft_prompt = sanitize_prompt_text(soft_prompt, triggers=(active_trigger,))
hard_prompt = sanitize_prompt_text(hard_prompt, triggers=(active_trigger,))
soft_negative = sanitize_negative_text(_combined_negative(soft_negative_base, extra_negative))
hard_negative = sanitize_negative_text(_combined_negative(hard_negative_base, extra_negative))
soft_caption_parts = [ soft_caption_parts = [
active_trigger, active_trigger,
"Insta/OF softcore mode", "Insta/OF softcore mode",
@@ -134,10 +117,6 @@ def assemble_insta_pair_metadata(
soft_row["composition"], soft_row["composition"],
camera_caption_text(soft_camera_config) if soft_camera_directive else "", camera_caption_text(soft_camera_config) if soft_camera_directive else "",
] ]
soft_caption = sanitize_caption_text(
", ".join(str(part).strip() for part in soft_caption_parts if str(part).strip()),
triggers=(active_trigger,),
)
hard_caption_parts = [ hard_caption_parts = [
active_trigger, active_trigger,
"Insta/OF hardcore mode", "Insta/OF hardcore mode",
@@ -151,12 +130,20 @@ def assemble_insta_pair_metadata(
hard_composition, hard_composition,
camera_caption_text(hard_camera_config) if hard_camera_directive else "", camera_caption_text(hard_camera_config) if hard_camera_directive else "",
] ]
hard_caption = sanitize_caption_text( normalized_text = row_policy.normalize_pair_text_outputs(
", ".join(str(part).strip() for part in hard_caption_parts if str(part).strip()), active_trigger=active_trigger,
triggers=(active_trigger,), prepend_trigger_to_prompt=bool(prepend_trigger_to_prompt),
extra_positive=extra_positive,
extra_negative=extra_negative,
soft_prompt=soft_prompt,
hard_prompt=hard_prompt,
soft_negative_base=soft_negative_base,
hard_negative_base=hard_negative_base,
soft_caption_parts=soft_caption_parts,
hard_caption_parts=hard_caption_parts,
) )
return { pair = {
"mode": "Insta/OF", "mode": "Insta/OF",
"options": options, "options": options,
"shared_descriptor": descriptor, "shared_descriptor": descriptor,
@@ -169,12 +156,12 @@ def assemble_insta_pair_metadata(
"hardcore_clothing_state": hard_clothing_state, "hardcore_clothing_state": hard_clothing_state,
"hardcore_detail_density": hard_detail_density, "hardcore_detail_density": hard_detail_density,
"hardcore_position_config": hard_row.get("hardcore_position_config", {}), "hardcore_position_config": hard_row.get("hardcore_position_config", {}),
"softcore_prompt": soft_prompt, "softcore_prompt": normalized_text["soft_prompt"],
"hardcore_prompt": hard_prompt, "hardcore_prompt": normalized_text["hard_prompt"],
"softcore_negative_prompt": soft_negative, "softcore_negative_prompt": normalized_text["soft_negative"],
"hardcore_negative_prompt": hard_negative, "hardcore_negative_prompt": normalized_text["hard_negative"],
"softcore_caption": soft_caption, "softcore_caption": normalized_text["soft_caption"],
"hardcore_caption": hard_caption, "hardcore_caption": normalized_text["hard_caption"],
"softcore_row": soft_row, "softcore_row": soft_row,
"hardcore_row": hard_row, "hardcore_row": hard_row,
"hardcore_women_count": hard_women_count, "hardcore_women_count": hard_women_count,
@@ -188,3 +175,4 @@ def assemble_insta_pair_metadata(
"softcore_camera_scene_directive": soft_camera_scene_directive, "softcore_camera_scene_directive": soft_camera_scene_directive,
"hardcore_camera_scene_directive": hard_camera_scene_directive, "hardcore_camera_scene_directive": hard_camera_scene_directive,
} }
return row_policy.normalize_pair_metadata(pair, active_trigger=active_trigger)
+11 -26
View File
@@ -38,6 +38,7 @@ try:
from . import pair_output from . import pair_output
from . import pair_rows from . import pair_rows
from . import pair_options from . import pair_options
from . import row_normalization as row_policy
from . import scene_camera_adapters from . import scene_camera_adapters
from . import seed_config as seed_policy from . import seed_config as seed_policy
from .hardcore_text_cleanup import ( from .hardcore_text_cleanup import (
@@ -46,11 +47,6 @@ try:
) )
from .hardcore_action_metadata import source_hardcore_action_family from .hardcore_action_metadata import source_hardcore_action_family
from .hardcore_role_graphs import build_hardcore_role_graph from .hardcore_role_graphs import build_hardcore_role_graph
from .prompt_hygiene import (
sanitize_caption_text,
sanitize_negative_text,
sanitize_prompt_text,
)
except ImportError: # Allows local smoke tests with `python -c`. except ImportError: # Allows local smoke tests with `python -c`.
from category_library import ( from category_library import (
category_json_files as _json_files, category_json_files as _json_files,
@@ -82,6 +78,7 @@ except ImportError: # Allows local smoke tests with `python -c`.
import pair_output import pair_output
import pair_rows import pair_rows
import pair_options import pair_options
import row_normalization as row_policy
import scene_camera_adapters import scene_camera_adapters
import seed_config as seed_policy import seed_config as seed_policy
from hardcore_text_cleanup import ( from hardcore_text_cleanup import (
@@ -90,11 +87,6 @@ except ImportError: # Allows local smoke tests with `python -c`.
) )
from hardcore_action_metadata import source_hardcore_action_family from hardcore_action_metadata import source_hardcore_action_family
from hardcore_role_graphs import build_hardcore_role_graph from hardcore_role_graphs import build_hardcore_role_graph
from prompt_hygiene import (
sanitize_caption_text,
sanitize_negative_text,
sanitize_prompt_text,
)
ROOT_DIR = Path(__file__).resolve().parent ROOT_DIR = Path(__file__).resolve().parent
@@ -1377,17 +1369,11 @@ def _disable_row_expression(row: dict[str, Any], source: str = "disabled") -> di
def _prepend_trigger(prompt: str, trigger: str, enabled: bool) -> str: def _prepend_trigger(prompt: str, trigger: str, enabled: bool) -> str:
trigger = trigger.strip() return row_policy.prepend_trigger(prompt, trigger, enabled)
if not enabled or not trigger:
return prompt
if prompt.lower().startswith(trigger.lower()):
return prompt
return f"{trigger}, {prompt}"
def _combined_negative(base: str, extra: str) -> str: def _combined_negative(base: str, extra: str) -> str:
parts = [part.strip() for part in (base, extra) if part and part.strip()] return row_policy.combined_negative(base, extra)
return ", ".join(parts)
def camera_mode_choices() -> list[str]: def camera_mode_choices() -> list[str]:
@@ -4190,17 +4176,16 @@ def build_prompt(
) )
if not expression_enabled: if not expression_enabled:
row = _disable_row_expression(row, "disabled") row = _disable_row_expression(row, "disabled")
if extra_positive.strip():
row["prompt"] = f"{row['prompt'].rstrip()} {extra_positive.strip()}"
row = _apply_camera_config(row, camera_config) row = _apply_camera_config(row, camera_config)
active_trigger = trigger.strip() or g.TRIGGER active_trigger = trigger.strip() or g.TRIGGER
row["prompt"] = _prepend_trigger(row["prompt"], active_trigger, bool(prepend_trigger_to_prompt)) row = row_policy.normalize_prompt_row(
row["prompt"] = sanitize_prompt_text(row["prompt"], triggers=(active_trigger,)) row,
row["caption"] = sanitize_caption_text(row.get("caption", ""), triggers=(active_trigger,)) active_trigger=active_trigger,
row["negative_prompt"] = sanitize_negative_text( prepend_trigger_to_prompt=bool(prepend_trigger_to_prompt),
_combined_negative(row.get("negative_prompt", g.NEGATIVE_PROMPT), extra_negative) extra_positive=extra_positive,
extra_negative=extra_negative,
default_negative=g.NEGATIVE_PROMPT,
) )
row["trigger"] = active_trigger
row.setdefault("expression_intensity", expression_intensity) row.setdefault("expression_intensity", expression_intensity)
row.setdefault("expression_intensity_source", expression_intensity_source) row.setdefault("expression_intensity_source", expression_intensity_source)
return row return row
+119
View File
@@ -0,0 +1,119 @@
from __future__ import annotations
from typing import Any
try:
from .prompt_hygiene import sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text
except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`.
from prompt_hygiene import sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text
def _trigger_tuple(active_trigger: str) -> tuple[str, ...]:
trigger = str(active_trigger or "").strip()
return (trigger,) if trigger else ()
def prepend_trigger(prompt: str, trigger: str, enabled: bool) -> str:
trigger = str(trigger or "").strip()
prompt = str(prompt or "")
if not enabled or not trigger:
return prompt
if prompt.lower().startswith(trigger.lower()):
return prompt
return f"{trigger}, {prompt}"
def combined_negative(base: str, extra: str) -> str:
parts = [str(part).strip() for part in (base, extra) if part and str(part).strip()]
return ", ".join(parts)
def caption_from_parts(parts: list[Any] | tuple[Any, ...], *, active_trigger: str = "") -> str:
text = ", ".join(str(part).strip() for part in parts if str(part).strip())
return sanitize_caption_text(text, triggers=_trigger_tuple(active_trigger))
def normalize_prompt_row(
row: dict[str, Any],
*,
active_trigger: str,
prepend_trigger_to_prompt: bool,
extra_positive: str = "",
extra_negative: str = "",
default_negative: str = "",
) -> dict[str, Any]:
trigger = str(active_trigger or "").strip()
positive = str(extra_positive or "").strip()
prompt = str(row.get("prompt", "") or "")
if positive:
prompt = f"{prompt.rstrip()} {positive}".strip()
prompt = prepend_trigger(prompt, trigger, bool(prepend_trigger_to_prompt))
row["prompt"] = sanitize_prompt_text(prompt, triggers=_trigger_tuple(trigger))
row["caption"] = sanitize_caption_text(row.get("caption", ""), triggers=_trigger_tuple(trigger))
row["negative_prompt"] = sanitize_negative_text(
combined_negative(str(row.get("negative_prompt", default_negative) or ""), extra_negative)
)
row["trigger"] = trigger
return row
def normalize_pair_text_outputs(
*,
active_trigger: str,
prepend_trigger_to_prompt: bool,
extra_positive: str = "",
extra_negative: str = "",
soft_prompt: str,
hard_prompt: str,
soft_negative_base: str,
hard_negative_base: str,
soft_caption_parts: list[Any] | tuple[Any, ...],
hard_caption_parts: list[Any] | tuple[Any, ...],
) -> dict[str, str]:
trigger = str(active_trigger or "").strip()
positive = str(extra_positive or "").strip()
if positive:
soft_prompt = f"{str(soft_prompt or '').rstrip()} {positive}"
hard_prompt = f"{str(hard_prompt or '').rstrip()} {positive}"
soft_prompt = prepend_trigger(soft_prompt, trigger, bool(prepend_trigger_to_prompt))
hard_prompt = prepend_trigger(hard_prompt, trigger, bool(prepend_trigger_to_prompt))
return {
"soft_prompt": sanitize_prompt_text(soft_prompt, triggers=_trigger_tuple(trigger)),
"hard_prompt": sanitize_prompt_text(hard_prompt, triggers=_trigger_tuple(trigger)),
"soft_negative": sanitize_negative_text(combined_negative(soft_negative_base, extra_negative)),
"hard_negative": sanitize_negative_text(combined_negative(hard_negative_base, extra_negative)),
"soft_caption": caption_from_parts(soft_caption_parts, active_trigger=trigger),
"hard_caption": caption_from_parts(hard_caption_parts, active_trigger=trigger),
}
def sanitize_metadata_row_text(row: dict[str, Any], *, active_trigger: str = "") -> dict[str, Any]:
trigger = str(active_trigger or row.get("trigger") or "").strip()
triggers = _trigger_tuple(trigger)
if "prompt" in row:
row["prompt"] = sanitize_prompt_text(row.get("prompt", ""), triggers=triggers)
if "caption" in row:
row["caption"] = sanitize_caption_text(row.get("caption", ""), triggers=triggers)
if "negative_prompt" in row:
row["negative_prompt"] = sanitize_negative_text(row.get("negative_prompt", ""))
if trigger and not row.get("trigger"):
row["trigger"] = trigger
return row
def normalize_pair_metadata(pair: dict[str, Any], *, active_trigger: str = "") -> dict[str, Any]:
trigger = str(active_trigger or "").strip()
triggers = _trigger_tuple(trigger)
for key in ("softcore_prompt", "hardcore_prompt"):
if key in pair:
pair[key] = sanitize_prompt_text(pair.get(key, ""), triggers=triggers)
for key in ("softcore_caption", "hardcore_caption"):
if key in pair:
pair[key] = sanitize_caption_text(pair.get(key, ""), triggers=triggers)
for key in ("softcore_negative_prompt", "hardcore_negative_prompt"):
if key in pair:
pair[key] = sanitize_negative_text(pair.get(key, ""))
for key in ("softcore_row", "hardcore_row"):
if isinstance(pair.get(key), dict):
pair[key] = sanitize_metadata_row_text(pair[key], active_trigger=trigger)
return pair
+78
View File
@@ -35,6 +35,7 @@ import generation_profile_config # noqa: E402
import krea_formatter # noqa: E402 import krea_formatter # noqa: E402
import location_config # noqa: E402 import location_config # noqa: E402
import prompt_builder as pb # noqa: E402 import prompt_builder as pb # noqa: E402
import row_normalization # noqa: E402
import sdxl_formatter # noqa: E402 import sdxl_formatter # noqa: E402
import seed_config # noqa: E402 import seed_config # noqa: E402
@@ -770,6 +771,82 @@ def smoke_character_profile_policy() -> None:
_expect(applied_profile.get("profile_type") == "character", "Profile context returned wrong profile") _expect(applied_profile.get("profile_type") == "character", "Profile context returned wrong profile")
def smoke_row_normalization_policy() -> None:
_expect(
pb._prepend_trigger("base prompt", Trigger, True) == row_normalization.prepend_trigger("base prompt", Trigger, True),
"Prompt builder trigger helper should delegate to row normalization policy",
)
_expect(
pb._combined_negative("bad anatomy", "low quality") == row_normalization.combined_negative("bad anatomy", "low quality"),
"Prompt builder negative helper should delegate to row normalization policy",
)
row = row_normalization.normalize_prompt_row(
{
"prompt": f"{Trigger}, {Trigger}, base prompt.",
"caption": f"{Trigger}, {Trigger}, base caption.",
"negative_prompt": "bad anatomy, bad anatomy",
},
active_trigger=Trigger,
prepend_trigger_to_prompt=True,
extra_positive="extra detail",
extra_negative="low quality, bad anatomy",
default_negative="bad anatomy",
)
_expect_trigger_once("row_normalization.prompt", row.get("prompt"), Trigger)
_expect_trigger_once("row_normalization.caption", row.get("caption"), Trigger)
_expect("extra detail" in row.get("prompt", ""), "Row normalization lost extra positive text")
_expect(row.get("trigger") == Trigger, "Row normalization lost active trigger")
_expect_no_duplicate_comma_items("row_normalization.negative", row.get("negative_prompt"))
outputs = row_normalization.normalize_pair_text_outputs(
active_trigger=Trigger,
prepend_trigger_to_prompt=True,
extra_positive="pair extra",
extra_negative="low quality, bad anatomy",
soft_prompt="soft prompt.",
hard_prompt="hard prompt.",
soft_negative_base="bad anatomy, bad anatomy",
hard_negative_base="bad anatomy, low quality",
soft_caption_parts=[Trigger, "soft caption"],
hard_caption_parts=[Trigger, "hard caption"],
)
_expect_trigger_once("row_normalization.soft_prompt", outputs.get("soft_prompt"), Trigger)
_expect_trigger_once("row_normalization.hard_prompt", outputs.get("hard_prompt"), Trigger)
_expect_trigger_once("row_normalization.soft_caption", outputs.get("soft_caption"), Trigger)
_expect_trigger_once("row_normalization.hard_caption", outputs.get("hard_caption"), Trigger)
_expect_no_duplicate_comma_items("row_normalization.soft_negative", outputs.get("soft_negative"))
_expect_no_duplicate_comma_items("row_normalization.hard_negative", outputs.get("hard_negative"))
pair = row_normalization.normalize_pair_metadata(
{
"softcore_prompt": f"{Trigger}, {Trigger}, soft pair.",
"hardcore_prompt": f"{Trigger}, {Trigger}, hard pair.",
"softcore_caption": f"{Trigger}, {Trigger}, soft caption.",
"hardcore_caption": f"{Trigger}, {Trigger}, hard caption.",
"softcore_negative_prompt": "bad anatomy, bad anatomy",
"hardcore_negative_prompt": "bad anatomy, low quality, bad anatomy",
"softcore_row": {
"prompt": f"{Trigger}, {Trigger}, embedded soft.",
"caption": f"{Trigger}, {Trigger}, embedded soft caption.",
"negative_prompt": "bad anatomy, bad anatomy",
},
"hardcore_row": {
"prompt": f"{Trigger}, {Trigger}, embedded hard.",
"caption": f"{Trigger}, {Trigger}, embedded hard caption.",
"negative_prompt": "low quality, bad anatomy, low quality",
},
},
active_trigger=Trigger,
)
_expect_trigger_once("row_normalization.pair.softcore_prompt", pair.get("softcore_prompt"), Trigger)
_expect_trigger_once("row_normalization.pair.hardcore_prompt", pair.get("hardcore_prompt"), Trigger)
_expect_trigger_once("row_normalization.pair.softcore_row.prompt", pair["softcore_row"].get("prompt"), Trigger)
_expect_trigger_once("row_normalization.pair.hardcore_row.caption", pair["hardcore_row"].get("caption"), Trigger)
_expect_no_duplicate_comma_items("row_normalization.pair.soft_negative", pair.get("softcore_negative_prompt"))
_expect_no_duplicate_comma_items("row_normalization.pair.hard_row_negative", pair["hardcore_row"].get("negative_prompt"))
def smoke_hardcore_position_config_policy() -> None: def smoke_hardcore_position_config_policy() -> None:
_expect( _expect(
pb.HARDCORE_POSITION_FAMILY_CHOICES is hardcore_position_config.HARDCORE_POSITION_FAMILY_CHOICES, pb.HARDCORE_POSITION_FAMILY_CHOICES is hardcore_position_config.HARDCORE_POSITION_FAMILY_CHOICES,
@@ -2740,6 +2817,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
("filter_config_policy", smoke_filter_config_policy), ("filter_config_policy", smoke_filter_config_policy),
("character_config_policy", smoke_character_config_policy), ("character_config_policy", smoke_character_config_policy),
("character_profile_policy", smoke_character_profile_policy), ("character_profile_policy", smoke_character_profile_policy),
("row_normalization_policy", smoke_row_normalization_policy),
("hardcore_position_config_policy", smoke_hardcore_position_config_policy), ("hardcore_position_config_policy", smoke_hardcore_position_config_policy),
("category_library_route", smoke_category_library_route), ("category_library_route", smoke_category_library_route),
("hardcore_category_routes", smoke_hardcore_category_routes), ("hardcore_category_routes", smoke_hardcore_category_routes),