Add prompt hygiene architecture pass

2026-06-26 13:26:06 +02:00
parent c768b37399
commit b3cd8d77a1
7 changed files with 569 additions and 24 deletions
@@ -4,6 +4,11 @@ import json
 import re
 from typing import Any
 try:
    from .prompt_hygiene import sanitize_prose_text
 except ImportError:  # Allows local smoke tests with `python -c`.
    from prompt_hygiene import sanitize_prose_text
 OLD_TRIGGER = "sxcpinup_coloredpencil"
 DEFAULT_TRIGGER = "sxcppnl7"
@@ -724,6 +729,8 @@ def naturalize_caption(
    row, row_method = _row_from_inputs(source_text, metadata_json, input_hint)
    if row is not None:
        prose, method = _metadata_to_prose(row, detail_level, keep_style)
-        return _with_trigger(prose, trigger, include_trigger), f"{row_method}:{method}"
+        caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,))
        return caption, f"{row_method}:{method}"
    prose, method = _text_to_prose(source_text, detail_level, keep_style)
-    return _with_trigger(prose, trigger, include_trigger), method
+    caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,))
    return caption, method
@@ -0,0 +1,301 @@
 # Prompt Architecture Improvement Plan
 This is a working research note for organizing the prompt builder around the
 routing map in `docs/prompt-pool-routing-map.md`.
 ## Current Branch Additions
 The current branch adds two major surfaces:
 - `SxCP Krea2 Resolution Selector` in `__init__.py`, with README notes.
 - Expanded hardcore interaction/manual/action pools in
  `categories/sexual_poses.json`,
  `categories/expression_composition_pools.json`, `prompt_builder.py`, and
  `krea_formatter.py`.
 The map audit currently sees:
 - 15 sexual pose subcategories.
 - 94 sexual pose item templates.
 - 23 expression pools.
 - 24 composition pools.
 - A new Krea2 resolution node with width/height/API aspect outputs.
 ## Architectural Finding
 The project has a good functional map, but ownership is still mixed inside large
 files:
 - `prompt_builder.py` owns selection, character resolution, role graph logic,
  camera adaptation, pair assembly, and some final string cleanup.
 - `krea_formatter.py` owns metadata parsing, cast naturalization, sexual action
  rewriting, POV rewriting, clothing cleanup, camera preservation, fallback
  parsing, and final prose assembly.
 - `sdxl_formatter.py` owns tag assembly and style/quality presets.
 - `caption_naturalizer.py` owns training-caption prose.
 - Category JSON files own scalable pool content, but Python still owns several
  compatibility and role-graph decisions.
 The biggest maintainability risk is not the number of pools. The risk is that
 selection, semantic rewriting, and final text hygiene are too interleaved. When a
 prompt has wrong text, it is easy to patch the wrong layer.
 ## First Refactor Boundary
 Generic text hygiene now has one home:
 - `prompt_hygiene.py`
 It should only handle route-agnostic cleanup:
 - whitespace and punctuation normalization;
 - empty field-label removal;
 - repeated trigger prefix cleanup;
 - duplicate comma-list item removal;
 - adjacent duplicate sentence cleanup;
 - simple dangling connector cleanup.
 It must not make semantic decisions such as sexual action positioning, POV
 geometry, clothing state, or model-specific tag weighting. Those stay in the
 route-specific owner.
 Current integration points:
 - `prompt_builder.build_prompt`
 - `prompt_builder.build_insta_of_pair`
 - `krea_formatter.format_krea2_prompt`
 - `sdxl_formatter.format_sdxl_prompt`
 - `caption_naturalizer.naturalize_caption`
 ## Target Organization
 ### Generation Layer
 Owner: `prompt_builder.py` plus `categories/*.json`.
 Keep here:
 - category/subcategory/item selection;
 - seed axis routing;
 - character slot/profile resolution;
 - scene/expression/composition pool selection;
 - role graph creation from structured category axes;
 - metadata row construction.
 Move or isolate later:
 - role graph generation for hardcore interaction categories into a dedicated
  module, for example `hardcore_role_graphs.py`;
 - camera-scene adapters into `scene_camera_adapters.py`;
 - category-library loading and inheritance helpers into `category_library.py`.
 ### Pair / Adapter Layer
 Owner today: `build_insta_of_pair`.
 Keep here:
 - soft/hard row creation;
 - continuity policy;
 - softcore cast policy;
 - pair-level camera routing;
 - pair metadata shape.
 Improve later:
 - make a single pair metadata sanitizer that normalizes `softcore_row`,
  `hardcore_row`, pair prompts, negatives, captions, and camera fields;
 - split pair assembly into small functions by phase:
  `build_soft_row`, `build_hard_row`, `resolve_pair_camera`,
  `resolve_pair_clothing`, `assemble_pair_metadata`.
 ### Krea2 Formatter Path
 Owner: `krea_formatter.py`.
 Keep here:
 - Krea prose style;
 - cast prose;
 - hardcore action sentence rewriting;
 - POV sentence rewriting;
 - clothing naturalization;
 - camera-scene preservation;
 - fallback text parsing.
 Improve later:
 - split semantic blocks into modules:
  `krea_cast.py`, `krea_actions.py`, `krea_pov.py`, `krea_clothing.py`;
 - add route-level smoke fixtures for representative metadata rows;
 - make `_hardcore_action_sentence` dispatch by action family instead of long
  conditional chains.
 ### SDXL Formatter Path
 Owner: `sdxl_formatter.py`.
 Keep here:
 - trigger behavior;
 - style and quality presets;
 - tag ordering;
 - weighted explicit tags;
 - negative-prompt assembly.
 Improve later:
 - move presets into data dictionaries or JSON so adding styles does not require
  editing formatter logic;
 - add formatter profiles for Pony, SDXL photo, and flat vector;
 - make fallback cleanup use the shared field-label inventory.
 ### Naturalizer Path
 Owner: `caption_naturalizer.py`.
 Keep here:
 - natural sentence caption assembly;
 - training-caption trigger behavior;
 - style-tail policy.
 Improve later:
 - share more metadata readers with Krea without sharing Krea prose;
 - add a `caption_profile` option for concise/dense LoRA caption styles.
 ### Category JSON Path
 Owner: `categories/*.json`.
 Keep here:
 - scalable prompt pool content;
 - named scene/expression/composition pools;
 - item templates and axes;
 - direct category-specific wording.
 Improve later:
 - introduce optional `family` and `action_type` fields on item templates so
  Python filters do less keyword guessing;
 - add `formatter_hint` fields only where needed, not globally;
 - add a JSON audit that checks every referenced expression/composition/scene pool
  exists.
 ### Node / UI Path
 Owner: `__init__.py`, `loop_nodes.py`, `web/*.js`.
 Keep here:
 - ComfyUI node input/output declarations;
 - widget behavior;
 - button actions;
 - dynamic input slots.
 Improve later:
 - split large node classes into files by family;
 - keep node display names, return names, and docs in sync through the audit
  helper;
 - add small endpoint tests for profile/accumulator/index-switch routes.
 ## Path-Specific Improvements
 ### Prompt Builder
 Near-term:
 - Add final row hygiene already done through `prompt_hygiene.py`.
 - Add a metadata invariant checker for rows before return.
 - Normalize every row with one function before JSON serialization.
 Medium-term:
 - Extract category loading and role graph logic.
 - Convert keyword-heavy interaction filtering to template metadata.
 ### Insta/OF Pair
 Near-term:
 - Normalize pair metadata with one helper.
 - Confirm pair prompts, captions, and soft/hard rows carry the same sanitized
  scene/camera/clothing fields.
 Medium-term:
 - Make pair camera and clothing phases explicit subfunctions.
 - Add smoke fixtures for same-cast, POV man, explicit nude, and different-camera
  modes.
 ### Krea2
 Near-term:
 - Add final prose hygiene already done through `prompt_hygiene.py`.
 - Add tests for close foreplay, POV oral, POV penetration, aftercare, manual
  stimulation, and camera-scene preservation.
 Medium-term:
 - Dispatch action rewriting by action family.
 - Split Krea semantic helpers into smaller modules.
 ### SDXL
 Near-term:
 - Add final tag hygiene already done through `prompt_hygiene.py`.
 - Add smoke tests for trigger preservation and duplicate tag removal.
 Medium-term:
 - Make style/quality presets data-driven.
 ### Naturalizer
 Near-term:
 - Add final prose hygiene already done through `prompt_hygiene.py`.
 - Verify training captions keep trigger exactly once.
 Medium-term:
 - Add caption profiles for training and browsing use cases.
 ### Camera / Scene
 Near-term:
 - Keep Qwen/orbit as camera source.
 - Keep scene-camera adapters scoped by location family.
 - Use the memory note in
  `/home/ethanfel/.codex/memories/scene-camera-system.md` when editing POV.
 Medium-term:
 - Move coworking adapter into a scene-camera adapter module.
 - Build new adapters one location family at a time.
 ## Invariants To Preserve
 - Metadata is the preferred formatter input.
 - Prompt Builder should output structured rows even if raw prompt text is rough.
 - Krea should fix prose and semantic action readability, not category selection.
 - SDXL should produce tag-style output and preserve model triggers as requested.
 - Naturalizer should output training-friendly captions without changing the
  selected content.
 - Generic cleanup belongs in `prompt_hygiene.py`; semantic cleanup belongs in
  the owning route.
 ## Recommended Next Passes
 1. Add metadata invariant checks and small smoke fixtures.
 2. Split Krea action/POV/clothing helpers into separate modules.
 3. Add category JSON pool reference validation to `tools/prompt_map_audit.py`.
 4. Extract scene-camera adapters from `prompt_builder.py`.
 5. Split `__init__.py` node classes by family after behavior is covered by smoke
   checks.
@@ -605,6 +605,25 @@ Naturalizer field consumption:
 | Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity | `_insta_pair_from_row` |
 | Text fallback | `caption` or `prompt` text | `_text_to_prose` |
 ### Final Text Hygiene
 `prompt_hygiene.py` owns route-agnostic final cleanup. It is intentionally
 small: whitespace, punctuation, empty field labels, adjacent duplicate
 sentences, repeated trigger prefixes, duplicate comma-list items, and dangling
 connectors.
 It is called from:
 - `prompt_builder.build_prompt`
 - `prompt_builder.build_insta_of_pair`
 - `krea_formatter.format_krea2_prompt`
 - `sdxl_formatter.format_sdxl_prompt`
 - `caption_naturalizer.naturalize_caption`
 Do not put semantic fixes in `prompt_hygiene.py`. Sexual action readability,
 POV geometry, clothing state, Krea prose, SDXL weighting, and training-caption
 policy still belong to their route-specific owner.
 ## Utility / Workflow Nodes
 These do not own prompt pool wording, but they affect execution and review:
@@ -616,6 +635,7 @@ These do not own prompt pool wording, but they affect execution and review:
 | Accumulator | `loop_nodes.py`, `web/accumulator_preview.js` | Stores generated values/images during workflow execution and previews/reorders/deletes them. |
 | Persistent text preview | `loop_nodes.py`, `web/preview_any_text.js` | Stores any value as text and keeps it after workflow reload. |
 | SDXL bucket size | `SxCPSDXLBucketSize` in `__init__.py` | Random/fixed SDXL bucket width and height selection. |
 | Krea2 resolution selector | `SxCPKrea2ResolutionSelector` in `__init__.py` | Krea-compatible width/height and API aspect/resolution helper. |
 ## Drift Audit Helper
@@ -655,6 +675,7 @@ or pool appears there but not in this map, update the relevant route table.
 | Camera prompt missing from Krea2 | Row `camera_directive` / `camera_scene_directive`, then Krea `_camera_phrase`. |
 | Trigger missing in Krea2 fallback | `format_krea2_prompt` preserve-trigger fallback behavior. |
 | SDXL tags too weak/wrong style | `sdxl_formatter.py` presets and `_row_core_tags` / `_soft_tags` / `_hard_tags`. |
 | Duplicate punctuation, empty labels, repeated trigger, repeated tag item | `prompt_hygiene.py`, then the route-specific formatter if the repeated content is semantic. |
 | Saved profile does not match liked character | Profile save/load path and whether the saved input is row metadata or regenerated slot config. |
 | Accumulator preview behavior wrong | `loop_nodes.py` accumulator methods and `web/accumulator_preview.js`. |
@@ -4,6 +4,11 @@ import json
 import re
 from typing import Any
 try:
    from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text
 except ImportError:  # Allows local smoke tests with `python -c`.
    from prompt_hygiene import sanitize_negative_text, sanitize_prose_text
 TRIGGER_CANDIDATES = (
    "sxcpinup_coloredpencil",
@@ -2678,20 +2683,21 @@ def format_krea2_prompt(
    if row and row.get("mode") == "Insta/OF":
        soft_prompt, soft_negative, hard_prompt, hard_negative = _insta_pair_to_krea(row, detail_level, style_mode)
        selected = hard_prompt if target == "hardcore" else soft_prompt if target == "softcore" else soft_prompt
        selected_negative = hard_negative if target == "hardcore" else soft_negative
        if extra_positive.strip():
            selected = f"{selected.rstrip()} {extra_positive.strip()}"
            soft_prompt = f"{soft_prompt.rstrip()} {extra_positive.strip()}"
            hard_prompt = f"{hard_prompt.rstrip()} {extra_positive.strip()}"
-        negative = _combine_negative(selected_negative, negative_prompt, extra_negative)
+        soft_prompt = sanitize_prose_text(soft_prompt, triggers=TRIGGER_CANDIDATES)
        hard_prompt = sanitize_prose_text(hard_prompt, triggers=TRIGGER_CANDIDATES)
        selected = hard_prompt if target == "hardcore" else soft_prompt if target == "softcore" else soft_prompt
        selected_negative = hard_negative if target == "hardcore" else soft_negative
        negative = sanitize_negative_text(_combine_negative(selected_negative, negative_prompt, extra_negative))
        return {
            "krea_prompt": selected,
            "negative_prompt": negative,
            "krea_softcore_prompt": soft_prompt,
            "krea_hardcore_prompt": hard_prompt,
-            "softcore_negative_prompt": _combine_negative(soft_negative, extra_negative),
+            "softcore_negative_prompt": sanitize_negative_text(_combine_negative(soft_negative, extra_negative)),
-            "hardcore_negative_prompt": _combine_negative(hard_negative, extra_negative),
+            "hardcore_negative_prompt": sanitize_negative_text(_combine_negative(hard_negative, extra_negative)),
            "method": f"{method}:krea2(insta_of_pair)",
        }
@@ -2704,7 +2710,8 @@ def format_krea2_prompt(
    if extra_positive.strip():
        prompt = f"{prompt.rstrip()} {extra_positive.strip()}"
-    negative = _combine_negative(extracted_negative, negative_prompt, extra_negative)
+    prompt = sanitize_prose_text(prompt, triggers=TRIGGER_CANDIDATES)
    negative = sanitize_negative_text(_combine_negative(extracted_negative, negative_prompt, extra_negative))
    return {
        "krea_prompt": prompt,
        "negative_prompt": negative,
@@ -10,8 +10,18 @@ from typing import Any, Callable
 try:
    from . import generate_prompt_batches as g
    from .prompt_hygiene import (
        sanitize_caption_text,
        sanitize_negative_text,
        sanitize_prompt_text,
    )
 except ImportError:  # Allows local smoke tests with `python -c`.
    import generate_prompt_batches as g
    from prompt_hygiene import (
        sanitize_caption_text,
        sanitize_negative_text,
        sanitize_prompt_text,
    )
 ROOT_DIR = Path(__file__).resolve().parent
@@ -7609,7 +7619,11 @@ def build_prompt(
    row = _apply_camera_config(row, camera_config)
    active_trigger = trigger.strip() or g.TRIGGER
    row["prompt"] = _prepend_trigger(row["prompt"], active_trigger, bool(prepend_trigger_to_prompt))
-    row["negative_prompt"] = _combined_negative(row.get("negative_prompt", g.NEGATIVE_PROMPT), extra_negative)
+    row["prompt"] = sanitize_prompt_text(row["prompt"], triggers=(active_trigger,))
    row["caption"] = sanitize_caption_text(row.get("caption", ""), triggers=(active_trigger,))
    row["negative_prompt"] = sanitize_negative_text(
        _combined_negative(row.get("negative_prompt", g.NEGATIVE_PROMPT), extra_negative)
    )
    row["trigger"] = active_trigger
    row.setdefault("expression_intensity", expression_intensity)
    row.setdefault("expression_intensity_source", expression_intensity_source)
@@ -8794,8 +8808,10 @@ def build_insta_of_pair(
    soft_prompt = _insta_of_active_trigger(soft_prompt, active_trigger, bool(prepend_trigger_to_prompt))
    hard_prompt = _insta_of_active_trigger(hard_prompt, active_trigger, bool(prepend_trigger_to_prompt))
-    soft_negative = _combined_negative(INSTA_OF_SOFT_NEGATIVE, extra_negative)
+    soft_prompt = sanitize_prompt_text(soft_prompt, triggers=(active_trigger,))
-    hard_negative = _combined_negative(INSTA_OF_NEGATIVE, extra_negative)
+    hard_prompt = sanitize_prompt_text(hard_prompt, triggers=(active_trigger,))
    soft_negative = sanitize_negative_text(_combined_negative(INSTA_OF_SOFT_NEGATIVE, extra_negative))
    hard_negative = sanitize_negative_text(_combined_negative(INSTA_OF_NEGATIVE, extra_negative))
    soft_caption_parts = [
        active_trigger,
        "Insta/OF softcore mode",
@@ -8810,7 +8826,10 @@ def build_insta_of_pair(
        soft_row["composition"],
        _camera_caption_text(soft_camera_config) if soft_camera_directive else "",
    ]
-    soft_caption = ", ".join(str(part).strip() for part in soft_caption_parts if str(part).strip())
+    soft_caption = sanitize_caption_text(
        ", ".join(str(part).strip() for part in soft_caption_parts if str(part).strip()),
        triggers=(active_trigger,),
    )
    hard_caption_parts = [
        active_trigger,
        "Insta/OF hardcore mode",
@@ -8824,7 +8843,10 @@ def build_insta_of_pair(
        hard_composition,
        _camera_caption_text(hard_camera_config) if hard_camera_directive else "",
    ]
-    hard_caption = ", ".join(str(part).strip() for part in hard_caption_parts if str(part).strip())
+    hard_caption = sanitize_caption_text(
        ", ".join(str(part).strip() for part in hard_caption_parts if str(part).strip()),
        triggers=(active_trigger,),
    )
    metadata = {
        "mode": "Insta/OF",
        "options": options,
@@ -0,0 +1,169 @@
 from __future__ import annotations
 import re
 from typing import Any, Iterable
 EMPTY_FIELD_LABELS = (
    "Ages",
    "Body types",
    "Cast",
    "Cast descriptors",
    "Characters",
    "Scene",
    "Setting",
    "Pose",
    "Sexual pose",
    "Sexual scene",
    "Facial expression",
    "Facial expressions",
    "Clothing",
    "Erotic outfit",
    "Prop/detail",
    "Composition",
    "Role graph",
    "Camera",
    "Camera control",
    "Camera priority",
    "Use",
    "Avoid",
 )
 def clean_spacing(value: Any) -> str:
    text = "" if value is None else str(value)
    text = text.replace("\n", " ")
    text = re.sub(r"\s+", " ", text).strip()
    text = re.sub(r"\s+([,.;:])", r"\1", text)
    text = re.sub(r"([,;:]){2,}", r"\1", text)
    text = re.sub(r"\.\s*\.", ".", text)
    text = re.sub(r",\s*\.", ".", text)
    text = re.sub(r":\s*\.", ".", text)
    text = re.sub(r";\s*\.", ".", text)
    text = re.sub(r"\(\s+", "(", text)
    text = re.sub(r"\s+\)", ")", text)
    return text.strip()
 def _strip_empty_fields(text: str) -> str:
    if not text:
        return ""
    labels = "|".join(re.escape(label) for label in EMPTY_FIELD_LABELS)
    text = re.sub(rf"\b(?:{labels})\s*:\s*[.,;]", "", text, flags=re.IGNORECASE)
    text = re.sub(rf"\b(?:{labels}):\s*(?=\.|,|;|$)", "", text, flags=re.IGNORECASE)
    text = re.sub(rf"\b(?:{labels})\.(?=\s|$)", "", text, flags=re.IGNORECASE)
    text = re.sub(rf"\b(?:{labels}):\s*(?:none|null|n/a)\b[.,;]?", "", text, flags=re.IGNORECASE)
    return clean_spacing(text)
 def _drop_dangling_connectors(text: str) -> str:
    text = re.sub(r"\b(?:with|and|or|while|featuring)\s*([,.;])", r"\1", text, flags=re.IGNORECASE)
    text = re.sub(r"([,.;])\s*(?:with|and|or|while|featuring)\s*([,.;])", r"\1", text, flags=re.IGNORECASE)
    text = re.sub(r"\bwith\s*,", "", text, flags=re.IGNORECASE)
    text = re.sub(r",\s*and\s*\.", ".", text, flags=re.IGNORECASE)
    return clean_spacing(text)
 def _sentence_key(text: str, triggers: Iterable[str] = ()) -> str:
    key_text = text
    for trigger in triggers:
        trigger = str(trigger or "").strip()
        if trigger:
            key_text = re.sub(rf"^{re.escape(trigger)}\s*[,.;]\s*", "", key_text, flags=re.IGNORECASE)
    return re.sub(r"\W+", " ", key_text.lower()).strip()
 def _dedupe_adjacent_sentences(text: str, triggers: Iterable[str] = ()) -> str:
    parts = [part.strip() for part in re.split(r"(?<=[.!?])\s+", text) if part.strip()]
    deduped: list[str] = []
    previous = ""
    for part in parts:
        key = _sentence_key(part, triggers)
        if key and key != previous:
            deduped.append(part)
            previous = key
    return " ".join(deduped)
 def _dedupe_labeled_sentences(text: str) -> str:
    parts = [part.strip() for part in re.split(r"(?<=[.!?])\s+", text) if part.strip()]
    seen: set[tuple[str, str]] = set()
    deduped: list[str] = []
    for part in parts:
        match = re.match(r"^([A-Za-z][A-Za-z /_-]{1,40}):\s*(.+)$", part)
        if not match:
            deduped.append(part)
            continue
        key = (match.group(1).strip().lower(), re.sub(r"\W+", " ", match.group(2).lower()).strip())
        if key not in seen:
            deduped.append(part)
            seen.add(key)
    return " ".join(deduped)
 def _trigger_prefix_key(text: str, triggers: Iterable[str]) -> str:
    lowered = text.lower().strip()
    for trigger in triggers:
        trigger = str(trigger or "").strip()
        if trigger and lowered.startswith(trigger.lower()):
            return trigger
    return ""
 def _dedupe_trigger_prefix(text: str, triggers: Iterable[str]) -> str:
    text = clean_spacing(text)
    trigger = _trigger_prefix_key(text, triggers)
    if not trigger:
        return text
    pattern = rf"^(?:{re.escape(trigger)}\s*[,.;]\s*)+"
    return f"{trigger}, {re.sub(pattern, '', text, flags=re.IGNORECASE).strip(' ,.;')}"
 def _split_comma_items(text: str) -> list[str]:
    return [part.strip(" ,.;") for part in re.split(r"\s*[,;]\s*", clean_spacing(text)) if part.strip(" ,.;")]
 def dedupe_comma_list(text: Any) -> str:
    items: list[str] = []
    seen: set[str] = set()
    for item in _split_comma_items(str(text or "")):
        key = re.sub(r"\W+", " ", item.lower()).strip()
        if key and key not in seen:
            items.append(item)
            seen.add(key)
    return ", ".join(items)
 def sanitize_prose_text(value: Any, triggers: Iterable[str] = ()) -> str:
    text = clean_spacing(value)
    if not text:
        return ""
    text = _strip_empty_fields(text)
    text = _drop_dangling_connectors(text)
    text = _dedupe_labeled_sentences(text)
    text = _dedupe_trigger_prefix(text, triggers)
    text = _dedupe_adjacent_sentences(text, triggers)
    return clean_spacing(text).strip(" ,;")
 def sanitize_prompt_text(value: Any, triggers: Iterable[str] = ()) -> str:
    return sanitize_prose_text(value, triggers=triggers)
 def sanitize_caption_text(value: Any, triggers: Iterable[str] = ()) -> str:
    return sanitize_prose_text(value, triggers=triggers)
 def sanitize_tag_prompt(value: Any, triggers: Iterable[str] = ()) -> str:
    text = clean_spacing(value)
    if not text:
        return ""
    trigger = _trigger_prefix_key(text, triggers)
    if trigger:
        text = re.sub(rf"^(?:{re.escape(trigger)}\s*[,;]\s*)+", "", text, flags=re.IGNORECASE).strip(" ,;")
        return f"{trigger}, {dedupe_comma_list(text)}" if text else trigger
    return dedupe_comma_list(text)
 def sanitize_negative_text(value: Any) -> str:
    return dedupe_comma_list(value)
@@ -4,6 +4,11 @@ import json
 import re
 from typing import Any
 try:
    from .prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
 except ImportError:  # Allows local smoke tests with `python -c`.
    from prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
 TRIGGER_CANDIDATES = (
    "sxcpinup_coloredpencil",
@@ -432,11 +437,14 @@ def _assemble_prompt(
    custom_quality: str,
    extra_positive: str,
 ) -> str:
-    return _combine_tags(
+    return sanitize_tag_prompt(
        _combine_tags(
            _style_prefix(style_preset, trigger, prepend_trigger, custom_style),
            body_tags,
            _quality_tail(quality_preset, custom_quality),
            extra_positive,
        ),
        triggers=(trigger,),
    )
@@ -504,14 +512,22 @@ def format_sdxl_prompt(
            extra_positive,
        )
        selected = hard_prompt if target == "hardcore" else soft_prompt
-        selected_negative = row.get("hardcore_negative_prompt") if target == "hardcore" else row.get("softcore_negative_prompt")
+        selected_negative = (
            row.get("hardcore_negative_prompt") if target == "hardcore" else row.get("softcore_negative_prompt")
        )
        return {
            "sdxl_prompt": selected,
-            "negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, selected_negative, negative_prompt, extra_negative),
+            "negative_prompt": sanitize_negative_text(
                _combine_negative(SDXL_DEFAULT_NEGATIVE, selected_negative, negative_prompt, extra_negative)
            ),
            "sdxl_softcore_prompt": soft_prompt,
            "sdxl_hardcore_prompt": hard_prompt,
-            "softcore_negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("softcore_negative_prompt"), extra_negative),
+            "softcore_negative_prompt": sanitize_negative_text(
-            "hardcore_negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("hardcore_negative_prompt"), extra_negative),
+                _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("softcore_negative_prompt"), extra_negative)
            ),
            "hardcore_negative_prompt": sanitize_negative_text(
                _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("hardcore_negative_prompt"), extra_negative)
            ),
            "method": f"{method}:sdxl(insta_of_pair)",
        }
@@ -534,7 +550,9 @@ def format_sdxl_prompt(
    )
    return {
        "sdxl_prompt": prompt,
-        "negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, extracted_negative, negative_prompt, extra_negative),
+        "negative_prompt": sanitize_negative_text(
            _combine_negative(SDXL_DEFAULT_NEGATIVE, extracted_negative, negative_prompt, extra_negative)
        ),
        "sdxl_softcore_prompt": "",
        "sdxl_hardcore_prompt": "",
        "softcore_negative_prompt": "",