From 4fdef3875b949545342eadd6cd826682db95772f Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sat, 27 Jun 2026 14:01:10 +0200 Subject: [PATCH] Centralize negative prompt hygiene --- docs/prompt-architecture-improvement-plan.md | 1 + docs/prompt-pool-routing-map.md | 2 +- krea_formatter.py | 7 +++--- prompt_hygiene.py | 5 ++++ row_normalization.py | 7 +++--- tools/prompt_map_audit.py | 1 + tools/prompt_smoke.py | 24 ++++++++++++++++++++ 7 files changed, 38 insertions(+), 9 deletions(-) diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md index 9ff63e5..b9b3a60 100644 --- a/docs/prompt-architecture-improvement-plan.md +++ b/docs/prompt-architecture-improvement-plan.md @@ -53,6 +53,7 @@ It should only handle route-agnostic cleanup: - empty field-label removal; - repeated trigger prefix cleanup; - duplicate comma-list item removal; +- route-agnostic negative-prompt merge/dedupe; - adjacent duplicate sentence cleanup; - simple dangling connector cleanup. diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index 920dfb9..3d10124 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -124,7 +124,7 @@ Core helper ownership: | `row_camera.py` | Row-level camera insertion, contextual coworking composition mutation, subject-kind detection, POV label fallback, and POV suppression of normal camera directives. | | `krea_row_fields.py` | Shared Krea normal-row field extraction for item, scene, pose, expression, composition/source-composition, camera, and style used by normal and configured-cast routes. | | `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. | -| `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. | +| `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup, including route-agnostic negative-prompt merge/dedupe. | | `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, embedded soft/hard row output and side-metadata synchronization, and embedded row sanitation. | | `formatter_detail.py` | Shared formatter detail-level choices, normalization, and concise/balanced/dense gates used by Krea2 and caption routes. | | `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, fallback field-label stripping, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. | diff --git a/krea_formatter.py b/krea_formatter.py index a6dd4dc..fcd84ed 100644 --- a/krea_formatter.py +++ b/krea_formatter.py @@ -38,7 +38,7 @@ try: pov_labels_from_value as _pov_labels_from_value, ) from .krea_pov_actions import pov_action_phrase as _pov_action_phrase - from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text + from .prompt_hygiene import combine_negative_text, sanitize_negative_text, sanitize_prose_text except ImportError: # Allows local smoke tests with `python -c`. import formatter_input as input_policy import krea_format_route @@ -74,7 +74,7 @@ except ImportError: # Allows local smoke tests with `python -c`. pov_labels_from_value as _pov_labels_from_value, ) from krea_pov_actions import pov_action_phrase as _pov_action_phrase - from prompt_hygiene import sanitize_negative_text, sanitize_prose_text + from prompt_hygiene import combine_negative_text, sanitize_negative_text, sanitize_prose_text TRIGGER_CANDIDATES = ( @@ -199,8 +199,7 @@ def _single_caption_front(row: dict[str, Any]) -> dict[str, str]: def _combine_negative(*parts: str) -> str: - cleaned = [_clean(part).strip(" ,.") for part in parts if _clean(part).strip(" ,.")] - return ", ".join(cleaned) + return combine_negative_text(*parts) def _sanitize_scene_text_for_cast(text: Any, labels: list[str]) -> str: diff --git a/prompt_hygiene.py b/prompt_hygiene.py index 40770b5..d014e72 100644 --- a/prompt_hygiene.py +++ b/prompt_hygiene.py @@ -167,3 +167,8 @@ def sanitize_tag_prompt(value: Any, triggers: Iterable[str] = ()) -> str: def sanitize_negative_text(value: Any) -> str: return dedupe_comma_list(value) + + +def combine_negative_text(*parts: Any) -> str: + cleaned = [clean_spacing(part).strip(" ,.;") for part in parts if clean_spacing(part).strip(" ,.;")] + return sanitize_negative_text(", ".join(cleaned)) diff --git a/row_normalization.py b/row_normalization.py index 48caba4..4c74739 100644 --- a/row_normalization.py +++ b/row_normalization.py @@ -3,9 +3,9 @@ from __future__ import annotations from typing import Any try: - from .prompt_hygiene import sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text + from .prompt_hygiene import combine_negative_text, sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`. - from prompt_hygiene import sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text + from prompt_hygiene import combine_negative_text, sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text def _trigger_tuple(active_trigger: str) -> tuple[str, ...]: @@ -24,8 +24,7 @@ def prepend_trigger(prompt: str, trigger: str, enabled: bool) -> str: def combined_negative(base: str, extra: str) -> str: - parts = [str(part).strip() for part in (base, extra) if part and str(part).strip()] - return ", ".join(parts) + return combine_negative_text(base, extra) def caption_from_parts(parts: list[Any] | tuple[Any, ...], *, active_trigger: str = "") -> str: diff --git a/tools/prompt_map_audit.py b/tools/prompt_map_audit.py index 5b2a066..3746356 100644 --- a/tools/prompt_map_audit.py +++ b/tools/prompt_map_audit.py @@ -38,6 +38,7 @@ CRITICAL_ROUTE_MODULES: tuple[tuple[str, str], ...] = ( ("krea_format_route.py", "krea_format_route_policy"), ("sdxl_format_route.py", "sdxl_format_route_policy"), ("caption_format_route.py", "caption_format_route_policy"), + ("prompt_hygiene.py", "prompt_hygiene_policy"), ("formatter_detail.py", "formatter_detail_policy"), ("formatter_input.py", "formatter_input_policy"), ("formatter_target.py", "formatter_target_policy"), diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index 2c753a1..f1af5c0 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -63,6 +63,7 @@ import pair_camera # noqa: E402 import pair_cast # noqa: E402 import pair_clothing # noqa: E402 import pair_rows # noqa: E402 +import prompt_hygiene # noqa: E402 import prompt_builder as pb # noqa: E402 import pov_policy # noqa: E402 import row_normalization # noqa: E402 @@ -2412,6 +2413,28 @@ def smoke_row_normalization_policy() -> None: _expect_no_duplicate_comma_items("row_normalization.pair.hard_row_negative", pair["hardcore_row"].get("negative_prompt")) +def smoke_prompt_hygiene_policy() -> None: + merged = prompt_hygiene.combine_negative_text( + "bad anatomy, bad anatomy", + "low quality", + "bad anatomy", + "", + ) + _expect(merged == "bad anatomy, low quality", "Prompt hygiene negative merge/dedupe changed") + _expect( + row_normalization.combined_negative("bad anatomy, bad anatomy", "low quality, bad anatomy") == merged, + "Row normalization negative merge should delegate to prompt hygiene", + ) + _expect( + krea_formatter._combine_negative("bad anatomy, bad anatomy", "low quality", "bad anatomy") == merged, + "Krea negative merge should delegate to prompt hygiene", + ) + _expect( + prompt_hygiene.sanitize_prose_text("Scene: . A sentence. A sentence.") == "A sentence.", + "Prompt hygiene prose cleanup changed", + ) + + def smoke_row_rendering_policy() -> None: _expect(pb.SINGLE_TEMPLATE == row_rendering.SINGLE_TEMPLATE, "Prompt builder single template should delegate to row_rendering") _expect( @@ -6662,6 +6685,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [ ("character_config_policy", smoke_character_config_policy), ("character_profile_policy", smoke_character_profile_policy), ("row_normalization_policy", smoke_row_normalization_policy), + ("prompt_hygiene_policy", smoke_prompt_hygiene_policy), ("row_rendering_policy", smoke_row_rendering_policy), ("row_role_graph_policy", smoke_row_role_graph_policy), ("row_assembly_policy", smoke_row_assembly_policy),