From 4fdef3875b949545342eadd6cd826682db95772f Mon Sep 17 00:00:00 2001
From: Ethanfel <ethan.fel@ts-pc.fr>
Date: Sat, 27 Jun 2026 14:01:10 +0200
Subject: [PATCH] Centralize negative prompt hygiene

---
 docs/prompt-architecture-improvement-plan.md |  1 +
 docs/prompt-pool-routing-map.md              |  2 +-
 krea_formatter.py                            |  7 +++---
 prompt_hygiene.py                            |  5 ++++
 row_normalization.py                         |  7 +++---
 tools/prompt_map_audit.py                    |  1 +
 tools/prompt_smoke.py                        | 24 ++++++++++++++++++++
 7 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md
index 9ff63e5..b9b3a60 100644
--- a/docs/prompt-architecture-improvement-plan.md
+++ b/docs/prompt-architecture-improvement-plan.md
@@ -53,6 +53,7 @@ It should only handle route-agnostic cleanup:
 - empty field-label removal;
 - repeated trigger prefix cleanup;
 - duplicate comma-list item removal;
+- route-agnostic negative-prompt merge/dedupe;
 - adjacent duplicate sentence cleanup;
 - simple dangling connector cleanup.
 
diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md
index 920dfb9..3d10124 100644
--- a/docs/prompt-pool-routing-map.md
+++ b/docs/prompt-pool-routing-map.md
@@ -124,7 +124,7 @@ Core helper ownership:
 | `row_camera.py` | Row-level camera insertion, contextual coworking composition mutation, subject-kind detection, POV label fallback, and POV suppression of normal camera directives. |
 | `krea_row_fields.py` | Shared Krea normal-row field extraction for item, scene, pose, expression, composition/source-composition, camera, and style used by normal and configured-cast routes. |
 | `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. |
-| `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. |
+| `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup, including route-agnostic negative-prompt merge/dedupe. |
 | `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, embedded soft/hard row output and side-metadata synchronization, and embedded row sanitation. |
 | `formatter_detail.py` | Shared formatter detail-level choices, normalization, and concise/balanced/dense gates used by Krea2 and caption routes. |
 | `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, fallback field-label stripping, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
diff --git a/krea_formatter.py b/krea_formatter.py
index a6dd4dc..fcd84ed 100644
--- a/krea_formatter.py
+++ b/krea_formatter.py
@@ -38,7 +38,7 @@ try:
         pov_labels_from_value as _pov_labels_from_value,
     )
     from .krea_pov_actions import pov_action_phrase as _pov_action_phrase
-    from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text
+    from .prompt_hygiene import combine_negative_text, sanitize_negative_text, sanitize_prose_text
 except ImportError:  # Allows local smoke tests with `python -c`.
     import formatter_input as input_policy
     import krea_format_route
@@ -74,7 +74,7 @@ except ImportError:  # Allows local smoke tests with `python -c`.
         pov_labels_from_value as _pov_labels_from_value,
     )
     from krea_pov_actions import pov_action_phrase as _pov_action_phrase
-    from prompt_hygiene import sanitize_negative_text, sanitize_prose_text
+    from prompt_hygiene import combine_negative_text, sanitize_negative_text, sanitize_prose_text
 
 
 TRIGGER_CANDIDATES = (
@@ -199,8 +199,7 @@ def _single_caption_front(row: dict[str, Any]) -> dict[str, str]:
 
 
 def _combine_negative(*parts: str) -> str:
-    cleaned = [_clean(part).strip(" ,.") for part in parts if _clean(part).strip(" ,.")]
-    return ", ".join(cleaned)
+    return combine_negative_text(*parts)
 
 
 def _sanitize_scene_text_for_cast(text: Any, labels: list[str]) -> str:
diff --git a/prompt_hygiene.py b/prompt_hygiene.py
index 40770b5..d014e72 100644
--- a/prompt_hygiene.py
+++ b/prompt_hygiene.py
@@ -167,3 +167,8 @@ def sanitize_tag_prompt(value: Any, triggers: Iterable[str] = ()) -> str:
 
 def sanitize_negative_text(value: Any) -> str:
     return dedupe_comma_list(value)
+
+
+def combine_negative_text(*parts: Any) -> str:
+    cleaned = [clean_spacing(part).strip(" ,.;") for part in parts if clean_spacing(part).strip(" ,.;")]
+    return sanitize_negative_text(", ".join(cleaned))
diff --git a/row_normalization.py b/row_normalization.py
index 48caba4..4c74739 100644
--- a/row_normalization.py
+++ b/row_normalization.py
@@ -3,9 +3,9 @@ from __future__ import annotations
 from typing import Any
 
 try:
-    from .prompt_hygiene import sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text
+    from .prompt_hygiene import combine_negative_text, sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text
 except ImportError:  # Allows local smoke tests with `python tools/prompt_smoke.py`.
-    from prompt_hygiene import sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text
+    from prompt_hygiene import combine_negative_text, sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text
 
 
 def _trigger_tuple(active_trigger: str) -> tuple[str, ...]:
@@ -24,8 +24,7 @@ def prepend_trigger(prompt: str, trigger: str, enabled: bool) -> str:
 
 
 def combined_negative(base: str, extra: str) -> str:
-    parts = [str(part).strip() for part in (base, extra) if part and str(part).strip()]
-    return ", ".join(parts)
+    return combine_negative_text(base, extra)
 
 
 def caption_from_parts(parts: list[Any] | tuple[Any, ...], *, active_trigger: str = "") -> str:
diff --git a/tools/prompt_map_audit.py b/tools/prompt_map_audit.py
index 5b2a066..3746356 100644
--- a/tools/prompt_map_audit.py
+++ b/tools/prompt_map_audit.py
@@ -38,6 +38,7 @@ CRITICAL_ROUTE_MODULES: tuple[tuple[str, str], ...] = (
     ("krea_format_route.py", "krea_format_route_policy"),
     ("sdxl_format_route.py", "sdxl_format_route_policy"),
     ("caption_format_route.py", "caption_format_route_policy"),
+    ("prompt_hygiene.py", "prompt_hygiene_policy"),
     ("formatter_detail.py", "formatter_detail_policy"),
     ("formatter_input.py", "formatter_input_policy"),
     ("formatter_target.py", "formatter_target_policy"),
diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py
index 2c753a1..f1af5c0 100644
--- a/tools/prompt_smoke.py
+++ b/tools/prompt_smoke.py
@@ -63,6 +63,7 @@ import pair_camera  # noqa: E402
 import pair_cast  # noqa: E402
 import pair_clothing  # noqa: E402
 import pair_rows  # noqa: E402
+import prompt_hygiene  # noqa: E402
 import prompt_builder as pb  # noqa: E402
 import pov_policy  # noqa: E402
 import row_normalization  # noqa: E402
@@ -2412,6 +2413,28 @@ def smoke_row_normalization_policy() -> None:
     _expect_no_duplicate_comma_items("row_normalization.pair.hard_row_negative", pair["hardcore_row"].get("negative_prompt"))
 
 
+def smoke_prompt_hygiene_policy() -> None:
+    merged = prompt_hygiene.combine_negative_text(
+        "bad anatomy, bad anatomy",
+        "low quality",
+        "bad anatomy",
+        "",
+    )
+    _expect(merged == "bad anatomy, low quality", "Prompt hygiene negative merge/dedupe changed")
+    _expect(
+        row_normalization.combined_negative("bad anatomy, bad anatomy", "low quality, bad anatomy") == merged,
+        "Row normalization negative merge should delegate to prompt hygiene",
+    )
+    _expect(
+        krea_formatter._combine_negative("bad anatomy, bad anatomy", "low quality", "bad anatomy") == merged,
+        "Krea negative merge should delegate to prompt hygiene",
+    )
+    _expect(
+        prompt_hygiene.sanitize_prose_text("Scene: . A sentence. A sentence.") == "A sentence.",
+        "Prompt hygiene prose cleanup changed",
+    )
+
+
 def smoke_row_rendering_policy() -> None:
     _expect(pb.SINGLE_TEMPLATE == row_rendering.SINGLE_TEMPLATE, "Prompt builder single template should delegate to row_rendering")
     _expect(
@@ -6662,6 +6685,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
     ("character_config_policy", smoke_character_config_policy),
     ("character_profile_policy", smoke_character_profile_policy),
     ("row_normalization_policy", smoke_row_normalization_policy),
+    ("prompt_hygiene_policy", smoke_prompt_hygiene_policy),
     ("row_rendering_policy", smoke_row_rendering_policy),
     ("row_role_graph_policy", smoke_row_role_graph_policy),
     ("row_assembly_policy", smoke_row_assembly_policy),