Extract row normalization policy

2026-06-27 01:15:24 +02:00
parent 2165e9fc16
commit b54b8b9421
6 changed files with 237 additions and 62 deletions
@@ -165,6 +165,10 @@ Already isolated:
  rows now emit `action_family`, `position_family`, `position_key`, and
  `position_keys` so formatter routing and debugging do less keyword guessing.
  Krea, SDXL, and training-caption routes consume these fields when present.
 - final row and pair text normalization lives in `row_normalization.py`,
  covering trigger prepending, extra-positive append, negative merge/dedupe,
  caption-part joining, and embedded soft/hard row sanitation before metadata
  leaves generation.
 ### Pair / Adapter Layer
@@ -198,7 +202,7 @@ Already isolated:
  root clothing-state assembly.
 - final pair output assembly lives in `pair_output.py`, including soft/hard
  prompt strings, trigger preservation, negatives, captions, and root metadata
-  shape.
+  shape; the final cleanup step is delegated to `row_normalization.py`.
 ### Krea2 Formatter Path
@@ -93,6 +93,7 @@ Core helper ownership:
 | `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. |
 | `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. |
 | `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. |
 | `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. |
 ## Node IO Map
@@ -3,9 +3,9 @@ from __future__ import annotations
 from typing import Any, Callable
 try:
-    from .prompt_hygiene import sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text
+    from . import row_normalization as row_policy
 except ImportError:  # Allows local smoke tests with `python tools/prompt_smoke.py`.
-    from prompt_hygiene import sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text
+    import row_normalization as row_policy
 def _labeled_expression_sentence(label: str, expression: Any) -> str:
@@ -16,17 +16,11 @@ def _labeled_expression_sentence(label: str, expression: Any) -> str:
 def _prepend_trigger(prompt: str, trigger: str, enabled: bool) -> str:
-    trigger = trigger.strip()
+    return row_policy.prepend_trigger(prompt, trigger, enabled)
    if not enabled or not trigger:
        return prompt
    if prompt.lower().startswith(trigger.lower()):
        return prompt
    return f"{trigger}, {prompt}"
 def _combined_negative(base: str, extra: str) -> str:
-    parts = [part.strip() for part in (base, extra) if part and part.strip()]
+    return row_policy.combined_negative(base, extra)
    return ", ".join(parts)
 def assemble_insta_pair_metadata(
@@ -109,17 +103,6 @@ def assemble_insta_pair_metadata(
        f"{hard_camera_sentence}"
        f"{hard_row['positive_suffix']}."
    )
    if extra_positive.strip():
        soft_prompt = f"{soft_prompt.rstrip()} {extra_positive.strip()}"
        hard_prompt = f"{hard_prompt.rstrip()} {extra_positive.strip()}"
    soft_prompt = _prepend_trigger(soft_prompt, active_trigger, bool(prepend_trigger_to_prompt))
    hard_prompt = _prepend_trigger(hard_prompt, active_trigger, bool(prepend_trigger_to_prompt))
    soft_prompt = sanitize_prompt_text(soft_prompt, triggers=(active_trigger,))
    hard_prompt = sanitize_prompt_text(hard_prompt, triggers=(active_trigger,))
    soft_negative = sanitize_negative_text(_combined_negative(soft_negative_base, extra_negative))
    hard_negative = sanitize_negative_text(_combined_negative(hard_negative_base, extra_negative))
    soft_caption_parts = [
        active_trigger,
        "Insta/OF softcore mode",
@@ -134,10 +117,6 @@ def assemble_insta_pair_metadata(
        soft_row["composition"],
        camera_caption_text(soft_camera_config) if soft_camera_directive else "",
    ]
    soft_caption = sanitize_caption_text(
        ", ".join(str(part).strip() for part in soft_caption_parts if str(part).strip()),
        triggers=(active_trigger,),
    )
    hard_caption_parts = [
        active_trigger,
        "Insta/OF hardcore mode",
@@ -151,12 +130,20 @@ def assemble_insta_pair_metadata(
        hard_composition,
        camera_caption_text(hard_camera_config) if hard_camera_directive else "",
    ]
-    hard_caption = sanitize_caption_text(
+    normalized_text = row_policy.normalize_pair_text_outputs(
-        ", ".join(str(part).strip() for part in hard_caption_parts if str(part).strip()),
+        active_trigger=active_trigger,
-        triggers=(active_trigger,),
+        prepend_trigger_to_prompt=bool(prepend_trigger_to_prompt),
        extra_positive=extra_positive,
        extra_negative=extra_negative,
        soft_prompt=soft_prompt,
        hard_prompt=hard_prompt,
        soft_negative_base=soft_negative_base,
        hard_negative_base=hard_negative_base,
        soft_caption_parts=soft_caption_parts,
        hard_caption_parts=hard_caption_parts,
    )
-    return {
+    pair = {
        "mode": "Insta/OF",
        "options": options,
        "shared_descriptor": descriptor,
@@ -169,12 +156,12 @@ def assemble_insta_pair_metadata(
        "hardcore_clothing_state": hard_clothing_state,
        "hardcore_detail_density": hard_detail_density,
        "hardcore_position_config": hard_row.get("hardcore_position_config", {}),
-        "softcore_prompt": soft_prompt,
+        "softcore_prompt": normalized_text["soft_prompt"],
-        "hardcore_prompt": hard_prompt,
+        "hardcore_prompt": normalized_text["hard_prompt"],
-        "softcore_negative_prompt": soft_negative,
+        "softcore_negative_prompt": normalized_text["soft_negative"],
-        "hardcore_negative_prompt": hard_negative,
+        "hardcore_negative_prompt": normalized_text["hard_negative"],
-        "softcore_caption": soft_caption,
+        "softcore_caption": normalized_text["soft_caption"],
-        "hardcore_caption": hard_caption,
+        "hardcore_caption": normalized_text["hard_caption"],
        "softcore_row": soft_row,
        "hardcore_row": hard_row,
        "hardcore_women_count": hard_women_count,
@@ -188,3 +175,4 @@ def assemble_insta_pair_metadata(
        "softcore_camera_scene_directive": soft_camera_scene_directive,
        "hardcore_camera_scene_directive": hard_camera_scene_directive,
    }
    return row_policy.normalize_pair_metadata(pair, active_trigger=active_trigger)
@@ -38,6 +38,7 @@ try:
    from . import pair_output
    from . import pair_rows
    from . import pair_options
    from . import row_normalization as row_policy
    from . import scene_camera_adapters
    from . import seed_config as seed_policy
    from .hardcore_text_cleanup import (
@@ -46,11 +47,6 @@ try:
    )
    from .hardcore_action_metadata import source_hardcore_action_family
    from .hardcore_role_graphs import build_hardcore_role_graph
    from .prompt_hygiene import (
        sanitize_caption_text,
        sanitize_negative_text,
        sanitize_prompt_text,
    )
 except ImportError:  # Allows local smoke tests with `python -c`.
    from category_library import (
        category_json_files as _json_files,
@@ -82,6 +78,7 @@ except ImportError:  # Allows local smoke tests with `python -c`.
    import pair_output
    import pair_rows
    import pair_options
    import row_normalization as row_policy
    import scene_camera_adapters
    import seed_config as seed_policy
    from hardcore_text_cleanup import (
@@ -90,11 +87,6 @@ except ImportError:  # Allows local smoke tests with `python -c`.
    )
    from hardcore_action_metadata import source_hardcore_action_family
    from hardcore_role_graphs import build_hardcore_role_graph
    from prompt_hygiene import (
        sanitize_caption_text,
        sanitize_negative_text,
        sanitize_prompt_text,
    )
 ROOT_DIR = Path(__file__).resolve().parent
@@ -1377,17 +1369,11 @@ def _disable_row_expression(row: dict[str, Any], source: str = "disabled") -> di
 def _prepend_trigger(prompt: str, trigger: str, enabled: bool) -> str:
-    trigger = trigger.strip()
+    return row_policy.prepend_trigger(prompt, trigger, enabled)
    if not enabled or not trigger:
        return prompt
    if prompt.lower().startswith(trigger.lower()):
        return prompt
    return f"{trigger}, {prompt}"
 def _combined_negative(base: str, extra: str) -> str:
-    parts = [part.strip() for part in (base, extra) if part and part.strip()]
+    return row_policy.combined_negative(base, extra)
    return ", ".join(parts)
 def camera_mode_choices() -> list[str]:
@@ -4190,17 +4176,16 @@ def build_prompt(
        )
    if not expression_enabled:
        row = _disable_row_expression(row, "disabled")
    if extra_positive.strip():
        row["prompt"] = f"{row['prompt'].rstrip()} {extra_positive.strip()}"
    row = _apply_camera_config(row, camera_config)
    active_trigger = trigger.strip() or g.TRIGGER
-    row["prompt"] = _prepend_trigger(row["prompt"], active_trigger, bool(prepend_trigger_to_prompt))
+    row = row_policy.normalize_prompt_row(
-    row["prompt"] = sanitize_prompt_text(row["prompt"], triggers=(active_trigger,))
+        row,
-    row["caption"] = sanitize_caption_text(row.get("caption", ""), triggers=(active_trigger,))
+        active_trigger=active_trigger,
-    row["negative_prompt"] = sanitize_negative_text(
+        prepend_trigger_to_prompt=bool(prepend_trigger_to_prompt),
-        _combined_negative(row.get("negative_prompt", g.NEGATIVE_PROMPT), extra_negative)
+        extra_positive=extra_positive,
        extra_negative=extra_negative,
        default_negative=g.NEGATIVE_PROMPT,
    )
    row["trigger"] = active_trigger
    row.setdefault("expression_intensity", expression_intensity)
    row.setdefault("expression_intensity_source", expression_intensity_source)
    return row
@@ -0,0 +1,119 @@
 from __future__ import annotations
 from typing import Any
 try:
    from .prompt_hygiene import sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text
 except ImportError:  # Allows local smoke tests with `python tools/prompt_smoke.py`.
    from prompt_hygiene import sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text
 def _trigger_tuple(active_trigger: str) -> tuple[str, ...]:
    trigger = str(active_trigger or "").strip()
    return (trigger,) if trigger else ()
 def prepend_trigger(prompt: str, trigger: str, enabled: bool) -> str:
    trigger = str(trigger or "").strip()
    prompt = str(prompt or "")
    if not enabled or not trigger:
        return prompt
    if prompt.lower().startswith(trigger.lower()):
        return prompt
    return f"{trigger}, {prompt}"
 def combined_negative(base: str, extra: str) -> str:
    parts = [str(part).strip() for part in (base, extra) if part and str(part).strip()]
    return ", ".join(parts)
 def caption_from_parts(parts: list[Any] | tuple[Any, ...], *, active_trigger: str = "") -> str:
    text = ", ".join(str(part).strip() for part in parts if str(part).strip())
    return sanitize_caption_text(text, triggers=_trigger_tuple(active_trigger))
 def normalize_prompt_row(
    row: dict[str, Any],
    *,
    active_trigger: str,
    prepend_trigger_to_prompt: bool,
    extra_positive: str = "",
    extra_negative: str = "",
    default_negative: str = "",
 ) -> dict[str, Any]:
    trigger = str(active_trigger or "").strip()
    positive = str(extra_positive or "").strip()
    prompt = str(row.get("prompt", "") or "")
    if positive:
        prompt = f"{prompt.rstrip()} {positive}".strip()
    prompt = prepend_trigger(prompt, trigger, bool(prepend_trigger_to_prompt))
    row["prompt"] = sanitize_prompt_text(prompt, triggers=_trigger_tuple(trigger))
    row["caption"] = sanitize_caption_text(row.get("caption", ""), triggers=_trigger_tuple(trigger))
    row["negative_prompt"] = sanitize_negative_text(
        combined_negative(str(row.get("negative_prompt", default_negative) or ""), extra_negative)
    )
    row["trigger"] = trigger
    return row
 def normalize_pair_text_outputs(
    *,
    active_trigger: str,
    prepend_trigger_to_prompt: bool,
    extra_positive: str = "",
    extra_negative: str = "",
    soft_prompt: str,
    hard_prompt: str,
    soft_negative_base: str,
    hard_negative_base: str,
    soft_caption_parts: list[Any] | tuple[Any, ...],
    hard_caption_parts: list[Any] | tuple[Any, ...],
 ) -> dict[str, str]:
    trigger = str(active_trigger or "").strip()
    positive = str(extra_positive or "").strip()
    if positive:
        soft_prompt = f"{str(soft_prompt or '').rstrip()} {positive}"
        hard_prompt = f"{str(hard_prompt or '').rstrip()} {positive}"
    soft_prompt = prepend_trigger(soft_prompt, trigger, bool(prepend_trigger_to_prompt))
    hard_prompt = prepend_trigger(hard_prompt, trigger, bool(prepend_trigger_to_prompt))
    return {
        "soft_prompt": sanitize_prompt_text(soft_prompt, triggers=_trigger_tuple(trigger)),
        "hard_prompt": sanitize_prompt_text(hard_prompt, triggers=_trigger_tuple(trigger)),
        "soft_negative": sanitize_negative_text(combined_negative(soft_negative_base, extra_negative)),
        "hard_negative": sanitize_negative_text(combined_negative(hard_negative_base, extra_negative)),
        "soft_caption": caption_from_parts(soft_caption_parts, active_trigger=trigger),
        "hard_caption": caption_from_parts(hard_caption_parts, active_trigger=trigger),
    }
 def sanitize_metadata_row_text(row: dict[str, Any], *, active_trigger: str = "") -> dict[str, Any]:
    trigger = str(active_trigger or row.get("trigger") or "").strip()
    triggers = _trigger_tuple(trigger)
    if "prompt" in row:
        row["prompt"] = sanitize_prompt_text(row.get("prompt", ""), triggers=triggers)
    if "caption" in row:
        row["caption"] = sanitize_caption_text(row.get("caption", ""), triggers=triggers)
    if "negative_prompt" in row:
        row["negative_prompt"] = sanitize_negative_text(row.get("negative_prompt", ""))
    if trigger and not row.get("trigger"):
        row["trigger"] = trigger
    return row
 def normalize_pair_metadata(pair: dict[str, Any], *, active_trigger: str = "") -> dict[str, Any]:
    trigger = str(active_trigger or "").strip()
    triggers = _trigger_tuple(trigger)
    for key in ("softcore_prompt", "hardcore_prompt"):
        if key in pair:
            pair[key] = sanitize_prompt_text(pair.get(key, ""), triggers=triggers)
    for key in ("softcore_caption", "hardcore_caption"):
        if key in pair:
            pair[key] = sanitize_caption_text(pair.get(key, ""), triggers=triggers)
    for key in ("softcore_negative_prompt", "hardcore_negative_prompt"):
        if key in pair:
            pair[key] = sanitize_negative_text(pair.get(key, ""))
    for key in ("softcore_row", "hardcore_row"):
        if isinstance(pair.get(key), dict):
            pair[key] = sanitize_metadata_row_text(pair[key], active_trigger=trigger)
    return pair
@@ -35,6 +35,7 @@ import generation_profile_config  # noqa: E402
 import krea_formatter  # noqa: E402
 import location_config  # noqa: E402
 import prompt_builder as pb  # noqa: E402
 import row_normalization  # noqa: E402
 import sdxl_formatter  # noqa: E402
 import seed_config  # noqa: E402
@@ -770,6 +771,82 @@ def smoke_character_profile_policy() -> None:
    _expect(applied_profile.get("profile_type") == "character", "Profile context returned wrong profile")
 def smoke_row_normalization_policy() -> None:
    _expect(
        pb._prepend_trigger("base prompt", Trigger, True) == row_normalization.prepend_trigger("base prompt", Trigger, True),
        "Prompt builder trigger helper should delegate to row normalization policy",
    )
    _expect(
        pb._combined_negative("bad anatomy", "low quality") == row_normalization.combined_negative("bad anatomy", "low quality"),
        "Prompt builder negative helper should delegate to row normalization policy",
    )
    row = row_normalization.normalize_prompt_row(
        {
            "prompt": f"{Trigger}, {Trigger}, base prompt.",
            "caption": f"{Trigger}, {Trigger}, base caption.",
            "negative_prompt": "bad anatomy, bad anatomy",
        },
        active_trigger=Trigger,
        prepend_trigger_to_prompt=True,
        extra_positive="extra detail",
        extra_negative="low quality, bad anatomy",
        default_negative="bad anatomy",
    )
    _expect_trigger_once("row_normalization.prompt", row.get("prompt"), Trigger)
    _expect_trigger_once("row_normalization.caption", row.get("caption"), Trigger)
    _expect("extra detail" in row.get("prompt", ""), "Row normalization lost extra positive text")
    _expect(row.get("trigger") == Trigger, "Row normalization lost active trigger")
    _expect_no_duplicate_comma_items("row_normalization.negative", row.get("negative_prompt"))
    outputs = row_normalization.normalize_pair_text_outputs(
        active_trigger=Trigger,
        prepend_trigger_to_prompt=True,
        extra_positive="pair extra",
        extra_negative="low quality, bad anatomy",
        soft_prompt="soft prompt.",
        hard_prompt="hard prompt.",
        soft_negative_base="bad anatomy, bad anatomy",
        hard_negative_base="bad anatomy, low quality",
        soft_caption_parts=[Trigger, "soft caption"],
        hard_caption_parts=[Trigger, "hard caption"],
    )
    _expect_trigger_once("row_normalization.soft_prompt", outputs.get("soft_prompt"), Trigger)
    _expect_trigger_once("row_normalization.hard_prompt", outputs.get("hard_prompt"), Trigger)
    _expect_trigger_once("row_normalization.soft_caption", outputs.get("soft_caption"), Trigger)
    _expect_trigger_once("row_normalization.hard_caption", outputs.get("hard_caption"), Trigger)
    _expect_no_duplicate_comma_items("row_normalization.soft_negative", outputs.get("soft_negative"))
    _expect_no_duplicate_comma_items("row_normalization.hard_negative", outputs.get("hard_negative"))
    pair = row_normalization.normalize_pair_metadata(
        {
            "softcore_prompt": f"{Trigger}, {Trigger}, soft pair.",
            "hardcore_prompt": f"{Trigger}, {Trigger}, hard pair.",
            "softcore_caption": f"{Trigger}, {Trigger}, soft caption.",
            "hardcore_caption": f"{Trigger}, {Trigger}, hard caption.",
            "softcore_negative_prompt": "bad anatomy, bad anatomy",
            "hardcore_negative_prompt": "bad anatomy, low quality, bad anatomy",
            "softcore_row": {
                "prompt": f"{Trigger}, {Trigger}, embedded soft.",
                "caption": f"{Trigger}, {Trigger}, embedded soft caption.",
                "negative_prompt": "bad anatomy, bad anatomy",
            },
            "hardcore_row": {
                "prompt": f"{Trigger}, {Trigger}, embedded hard.",
                "caption": f"{Trigger}, {Trigger}, embedded hard caption.",
                "negative_prompt": "low quality, bad anatomy, low quality",
            },
        },
        active_trigger=Trigger,
    )
    _expect_trigger_once("row_normalization.pair.softcore_prompt", pair.get("softcore_prompt"), Trigger)
    _expect_trigger_once("row_normalization.pair.hardcore_prompt", pair.get("hardcore_prompt"), Trigger)
    _expect_trigger_once("row_normalization.pair.softcore_row.prompt", pair["softcore_row"].get("prompt"), Trigger)
    _expect_trigger_once("row_normalization.pair.hardcore_row.caption", pair["hardcore_row"].get("caption"), Trigger)
    _expect_no_duplicate_comma_items("row_normalization.pair.soft_negative", pair.get("softcore_negative_prompt"))
    _expect_no_duplicate_comma_items("row_normalization.pair.hard_row_negative", pair["hardcore_row"].get("negative_prompt"))
 def smoke_hardcore_position_config_policy() -> None:
    _expect(
        pb.HARDCORE_POSITION_FAMILY_CHOICES is hardcore_position_config.HARDCORE_POSITION_FAMILY_CHOICES,
@@ -2740,6 +2817,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
    ("filter_config_policy", smoke_filter_config_policy),
    ("character_config_policy", smoke_character_config_policy),
    ("character_profile_policy", smoke_character_profile_policy),
    ("row_normalization_policy", smoke_row_normalization_policy),
    ("hardcore_position_config_policy", smoke_hardcore_position_config_policy),
    ("category_library_route", smoke_category_library_route),
    ("hardcore_category_routes", smoke_hardcore_category_routes),