From b3cd8d77a131139a888329ac6205c2529ccbdb1a Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Fri, 26 Jun 2026 13:26:06 +0200 Subject: [PATCH] Add prompt hygiene architecture pass --- caption_naturalizer.py | 11 +- docs/prompt-architecture-improvement-plan.md | 301 +++++++++++++++++++ docs/prompt-pool-routing-map.md | 21 ++ krea_formatter.py | 21 +- prompt_builder.py | 32 +- prompt_hygiene.py | 169 +++++++++++ sdxl_formatter.py | 38 ++- 7 files changed, 569 insertions(+), 24 deletions(-) create mode 100644 docs/prompt-architecture-improvement-plan.md create mode 100644 prompt_hygiene.py diff --git a/caption_naturalizer.py b/caption_naturalizer.py index 6c4b8d4..bb00021 100644 --- a/caption_naturalizer.py +++ b/caption_naturalizer.py @@ -4,6 +4,11 @@ import json import re from typing import Any +try: + from .prompt_hygiene import sanitize_prose_text +except ImportError: # Allows local smoke tests with `python -c`. + from prompt_hygiene import sanitize_prose_text + OLD_TRIGGER = "sxcpinup_coloredpencil" DEFAULT_TRIGGER = "sxcppnl7" @@ -724,6 +729,8 @@ def naturalize_caption( row, row_method = _row_from_inputs(source_text, metadata_json, input_hint) if row is not None: prose, method = _metadata_to_prose(row, detail_level, keep_style) - return _with_trigger(prose, trigger, include_trigger), f"{row_method}:{method}" + caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,)) + return caption, f"{row_method}:{method}" prose, method = _text_to_prose(source_text, detail_level, keep_style) - return _with_trigger(prose, trigger, include_trigger), method + caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,)) + return caption, method diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md new file mode 100644 index 0000000..cdbcc81 --- /dev/null +++ b/docs/prompt-architecture-improvement-plan.md @@ -0,0 +1,301 @@ +# Prompt Architecture Improvement Plan + +This is a working research note for organizing the prompt builder around the +routing map in `docs/prompt-pool-routing-map.md`. + +## Current Branch Additions + +The current branch adds two major surfaces: + +- `SxCP Krea2 Resolution Selector` in `__init__.py`, with README notes. +- Expanded hardcore interaction/manual/action pools in + `categories/sexual_poses.json`, + `categories/expression_composition_pools.json`, `prompt_builder.py`, and + `krea_formatter.py`. + +The map audit currently sees: + +- 15 sexual pose subcategories. +- 94 sexual pose item templates. +- 23 expression pools. +- 24 composition pools. +- A new Krea2 resolution node with width/height/API aspect outputs. + +## Architectural Finding + +The project has a good functional map, but ownership is still mixed inside large +files: + +- `prompt_builder.py` owns selection, character resolution, role graph logic, + camera adaptation, pair assembly, and some final string cleanup. +- `krea_formatter.py` owns metadata parsing, cast naturalization, sexual action + rewriting, POV rewriting, clothing cleanup, camera preservation, fallback + parsing, and final prose assembly. +- `sdxl_formatter.py` owns tag assembly and style/quality presets. +- `caption_naturalizer.py` owns training-caption prose. +- Category JSON files own scalable pool content, but Python still owns several + compatibility and role-graph decisions. + +The biggest maintainability risk is not the number of pools. The risk is that +selection, semantic rewriting, and final text hygiene are too interleaved. When a +prompt has wrong text, it is easy to patch the wrong layer. + +## First Refactor Boundary + +Generic text hygiene now has one home: + +- `prompt_hygiene.py` + +It should only handle route-agnostic cleanup: + +- whitespace and punctuation normalization; +- empty field-label removal; +- repeated trigger prefix cleanup; +- duplicate comma-list item removal; +- adjacent duplicate sentence cleanup; +- simple dangling connector cleanup. + +It must not make semantic decisions such as sexual action positioning, POV +geometry, clothing state, or model-specific tag weighting. Those stay in the +route-specific owner. + +Current integration points: + +- `prompt_builder.build_prompt` +- `prompt_builder.build_insta_of_pair` +- `krea_formatter.format_krea2_prompt` +- `sdxl_formatter.format_sdxl_prompt` +- `caption_naturalizer.naturalize_caption` + +## Target Organization + +### Generation Layer + +Owner: `prompt_builder.py` plus `categories/*.json`. + +Keep here: + +- category/subcategory/item selection; +- seed axis routing; +- character slot/profile resolution; +- scene/expression/composition pool selection; +- role graph creation from structured category axes; +- metadata row construction. + +Move or isolate later: + +- role graph generation for hardcore interaction categories into a dedicated + module, for example `hardcore_role_graphs.py`; +- camera-scene adapters into `scene_camera_adapters.py`; +- category-library loading and inheritance helpers into `category_library.py`. + +### Pair / Adapter Layer + +Owner today: `build_insta_of_pair`. + +Keep here: + +- soft/hard row creation; +- continuity policy; +- softcore cast policy; +- pair-level camera routing; +- pair metadata shape. + +Improve later: + +- make a single pair metadata sanitizer that normalizes `softcore_row`, + `hardcore_row`, pair prompts, negatives, captions, and camera fields; +- split pair assembly into small functions by phase: + `build_soft_row`, `build_hard_row`, `resolve_pair_camera`, + `resolve_pair_clothing`, `assemble_pair_metadata`. + +### Krea2 Formatter Path + +Owner: `krea_formatter.py`. + +Keep here: + +- Krea prose style; +- cast prose; +- hardcore action sentence rewriting; +- POV sentence rewriting; +- clothing naturalization; +- camera-scene preservation; +- fallback text parsing. + +Improve later: + +- split semantic blocks into modules: + `krea_cast.py`, `krea_actions.py`, `krea_pov.py`, `krea_clothing.py`; +- add route-level smoke fixtures for representative metadata rows; +- make `_hardcore_action_sentence` dispatch by action family instead of long + conditional chains. + +### SDXL Formatter Path + +Owner: `sdxl_formatter.py`. + +Keep here: + +- trigger behavior; +- style and quality presets; +- tag ordering; +- weighted explicit tags; +- negative-prompt assembly. + +Improve later: + +- move presets into data dictionaries or JSON so adding styles does not require + editing formatter logic; +- add formatter profiles for Pony, SDXL photo, and flat vector; +- make fallback cleanup use the shared field-label inventory. + +### Naturalizer Path + +Owner: `caption_naturalizer.py`. + +Keep here: + +- natural sentence caption assembly; +- training-caption trigger behavior; +- style-tail policy. + +Improve later: + +- share more metadata readers with Krea without sharing Krea prose; +- add a `caption_profile` option for concise/dense LoRA caption styles. + +### Category JSON Path + +Owner: `categories/*.json`. + +Keep here: + +- scalable prompt pool content; +- named scene/expression/composition pools; +- item templates and axes; +- direct category-specific wording. + +Improve later: + +- introduce optional `family` and `action_type` fields on item templates so + Python filters do less keyword guessing; +- add `formatter_hint` fields only where needed, not globally; +- add a JSON audit that checks every referenced expression/composition/scene pool + exists. + +### Node / UI Path + +Owner: `__init__.py`, `loop_nodes.py`, `web/*.js`. + +Keep here: + +- ComfyUI node input/output declarations; +- widget behavior; +- button actions; +- dynamic input slots. + +Improve later: + +- split large node classes into files by family; +- keep node display names, return names, and docs in sync through the audit + helper; +- add small endpoint tests for profile/accumulator/index-switch routes. + +## Path-Specific Improvements + +### Prompt Builder + +Near-term: + +- Add final row hygiene already done through `prompt_hygiene.py`. +- Add a metadata invariant checker for rows before return. +- Normalize every row with one function before JSON serialization. + +Medium-term: + +- Extract category loading and role graph logic. +- Convert keyword-heavy interaction filtering to template metadata. + +### Insta/OF Pair + +Near-term: + +- Normalize pair metadata with one helper. +- Confirm pair prompts, captions, and soft/hard rows carry the same sanitized + scene/camera/clothing fields. + +Medium-term: + +- Make pair camera and clothing phases explicit subfunctions. +- Add smoke fixtures for same-cast, POV man, explicit nude, and different-camera + modes. + +### Krea2 + +Near-term: + +- Add final prose hygiene already done through `prompt_hygiene.py`. +- Add tests for close foreplay, POV oral, POV penetration, aftercare, manual + stimulation, and camera-scene preservation. + +Medium-term: + +- Dispatch action rewriting by action family. +- Split Krea semantic helpers into smaller modules. + +### SDXL + +Near-term: + +- Add final tag hygiene already done through `prompt_hygiene.py`. +- Add smoke tests for trigger preservation and duplicate tag removal. + +Medium-term: + +- Make style/quality presets data-driven. + +### Naturalizer + +Near-term: + +- Add final prose hygiene already done through `prompt_hygiene.py`. +- Verify training captions keep trigger exactly once. + +Medium-term: + +- Add caption profiles for training and browsing use cases. + +### Camera / Scene + +Near-term: + +- Keep Qwen/orbit as camera source. +- Keep scene-camera adapters scoped by location family. +- Use the memory note in + `/home/ethanfel/.codex/memories/scene-camera-system.md` when editing POV. + +Medium-term: + +- Move coworking adapter into a scene-camera adapter module. +- Build new adapters one location family at a time. + +## Invariants To Preserve + +- Metadata is the preferred formatter input. +- Prompt Builder should output structured rows even if raw prompt text is rough. +- Krea should fix prose and semantic action readability, not category selection. +- SDXL should produce tag-style output and preserve model triggers as requested. +- Naturalizer should output training-friendly captions without changing the + selected content. +- Generic cleanup belongs in `prompt_hygiene.py`; semantic cleanup belongs in + the owning route. + +## Recommended Next Passes + +1. Add metadata invariant checks and small smoke fixtures. +2. Split Krea action/POV/clothing helpers into separate modules. +3. Add category JSON pool reference validation to `tools/prompt_map_audit.py`. +4. Extract scene-camera adapters from `prompt_builder.py`. +5. Split `__init__.py` node classes by family after behavior is covered by smoke + checks. diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index 61fe83a..af7dbc1 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -605,6 +605,25 @@ Naturalizer field consumption: | Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity | `_insta_pair_from_row` | | Text fallback | `caption` or `prompt` text | `_text_to_prose` | +### Final Text Hygiene + +`prompt_hygiene.py` owns route-agnostic final cleanup. It is intentionally +small: whitespace, punctuation, empty field labels, adjacent duplicate +sentences, repeated trigger prefixes, duplicate comma-list items, and dangling +connectors. + +It is called from: + +- `prompt_builder.build_prompt` +- `prompt_builder.build_insta_of_pair` +- `krea_formatter.format_krea2_prompt` +- `sdxl_formatter.format_sdxl_prompt` +- `caption_naturalizer.naturalize_caption` + +Do not put semantic fixes in `prompt_hygiene.py`. Sexual action readability, +POV geometry, clothing state, Krea prose, SDXL weighting, and training-caption +policy still belong to their route-specific owner. + ## Utility / Workflow Nodes These do not own prompt pool wording, but they affect execution and review: @@ -616,6 +635,7 @@ These do not own prompt pool wording, but they affect execution and review: | Accumulator | `loop_nodes.py`, `web/accumulator_preview.js` | Stores generated values/images during workflow execution and previews/reorders/deletes them. | | Persistent text preview | `loop_nodes.py`, `web/preview_any_text.js` | Stores any value as text and keeps it after workflow reload. | | SDXL bucket size | `SxCPSDXLBucketSize` in `__init__.py` | Random/fixed SDXL bucket width and height selection. | +| Krea2 resolution selector | `SxCPKrea2ResolutionSelector` in `__init__.py` | Krea-compatible width/height and API aspect/resolution helper. | ## Drift Audit Helper @@ -655,6 +675,7 @@ or pool appears there but not in this map, update the relevant route table. | Camera prompt missing from Krea2 | Row `camera_directive` / `camera_scene_directive`, then Krea `_camera_phrase`. | | Trigger missing in Krea2 fallback | `format_krea2_prompt` preserve-trigger fallback behavior. | | SDXL tags too weak/wrong style | `sdxl_formatter.py` presets and `_row_core_tags` / `_soft_tags` / `_hard_tags`. | +| Duplicate punctuation, empty labels, repeated trigger, repeated tag item | `prompt_hygiene.py`, then the route-specific formatter if the repeated content is semantic. | | Saved profile does not match liked character | Profile save/load path and whether the saved input is row metadata or regenerated slot config. | | Accumulator preview behavior wrong | `loop_nodes.py` accumulator methods and `web/accumulator_preview.js`. | diff --git a/krea_formatter.py b/krea_formatter.py index 16afcba..3ab707b 100644 --- a/krea_formatter.py +++ b/krea_formatter.py @@ -4,6 +4,11 @@ import json import re from typing import Any +try: + from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text +except ImportError: # Allows local smoke tests with `python -c`. + from prompt_hygiene import sanitize_negative_text, sanitize_prose_text + TRIGGER_CANDIDATES = ( "sxcpinup_coloredpencil", @@ -2678,20 +2683,21 @@ def format_krea2_prompt( if row and row.get("mode") == "Insta/OF": soft_prompt, soft_negative, hard_prompt, hard_negative = _insta_pair_to_krea(row, detail_level, style_mode) - selected = hard_prompt if target == "hardcore" else soft_prompt if target == "softcore" else soft_prompt - selected_negative = hard_negative if target == "hardcore" else soft_negative if extra_positive.strip(): - selected = f"{selected.rstrip()} {extra_positive.strip()}" soft_prompt = f"{soft_prompt.rstrip()} {extra_positive.strip()}" hard_prompt = f"{hard_prompt.rstrip()} {extra_positive.strip()}" - negative = _combine_negative(selected_negative, negative_prompt, extra_negative) + soft_prompt = sanitize_prose_text(soft_prompt, triggers=TRIGGER_CANDIDATES) + hard_prompt = sanitize_prose_text(hard_prompt, triggers=TRIGGER_CANDIDATES) + selected = hard_prompt if target == "hardcore" else soft_prompt if target == "softcore" else soft_prompt + selected_negative = hard_negative if target == "hardcore" else soft_negative + negative = sanitize_negative_text(_combine_negative(selected_negative, negative_prompt, extra_negative)) return { "krea_prompt": selected, "negative_prompt": negative, "krea_softcore_prompt": soft_prompt, "krea_hardcore_prompt": hard_prompt, - "softcore_negative_prompt": _combine_negative(soft_negative, extra_negative), - "hardcore_negative_prompt": _combine_negative(hard_negative, extra_negative), + "softcore_negative_prompt": sanitize_negative_text(_combine_negative(soft_negative, extra_negative)), + "hardcore_negative_prompt": sanitize_negative_text(_combine_negative(hard_negative, extra_negative)), "method": f"{method}:krea2(insta_of_pair)", } @@ -2704,7 +2710,8 @@ def format_krea2_prompt( if extra_positive.strip(): prompt = f"{prompt.rstrip()} {extra_positive.strip()}" - negative = _combine_negative(extracted_negative, negative_prompt, extra_negative) + prompt = sanitize_prose_text(prompt, triggers=TRIGGER_CANDIDATES) + negative = sanitize_negative_text(_combine_negative(extracted_negative, negative_prompt, extra_negative)) return { "krea_prompt": prompt, "negative_prompt": negative, diff --git a/prompt_builder.py b/prompt_builder.py index 206f76d..60a99d6 100644 --- a/prompt_builder.py +++ b/prompt_builder.py @@ -10,8 +10,18 @@ from typing import Any, Callable try: from . import generate_prompt_batches as g + from .prompt_hygiene import ( + sanitize_caption_text, + sanitize_negative_text, + sanitize_prompt_text, + ) except ImportError: # Allows local smoke tests with `python -c`. import generate_prompt_batches as g + from prompt_hygiene import ( + sanitize_caption_text, + sanitize_negative_text, + sanitize_prompt_text, + ) ROOT_DIR = Path(__file__).resolve().parent @@ -7609,7 +7619,11 @@ def build_prompt( row = _apply_camera_config(row, camera_config) active_trigger = trigger.strip() or g.TRIGGER row["prompt"] = _prepend_trigger(row["prompt"], active_trigger, bool(prepend_trigger_to_prompt)) - row["negative_prompt"] = _combined_negative(row.get("negative_prompt", g.NEGATIVE_PROMPT), extra_negative) + row["prompt"] = sanitize_prompt_text(row["prompt"], triggers=(active_trigger,)) + row["caption"] = sanitize_caption_text(row.get("caption", ""), triggers=(active_trigger,)) + row["negative_prompt"] = sanitize_negative_text( + _combined_negative(row.get("negative_prompt", g.NEGATIVE_PROMPT), extra_negative) + ) row["trigger"] = active_trigger row.setdefault("expression_intensity", expression_intensity) row.setdefault("expression_intensity_source", expression_intensity_source) @@ -8794,8 +8808,10 @@ def build_insta_of_pair( soft_prompt = _insta_of_active_trigger(soft_prompt, active_trigger, bool(prepend_trigger_to_prompt)) hard_prompt = _insta_of_active_trigger(hard_prompt, active_trigger, bool(prepend_trigger_to_prompt)) - soft_negative = _combined_negative(INSTA_OF_SOFT_NEGATIVE, extra_negative) - hard_negative = _combined_negative(INSTA_OF_NEGATIVE, extra_negative) + soft_prompt = sanitize_prompt_text(soft_prompt, triggers=(active_trigger,)) + hard_prompt = sanitize_prompt_text(hard_prompt, triggers=(active_trigger,)) + soft_negative = sanitize_negative_text(_combined_negative(INSTA_OF_SOFT_NEGATIVE, extra_negative)) + hard_negative = sanitize_negative_text(_combined_negative(INSTA_OF_NEGATIVE, extra_negative)) soft_caption_parts = [ active_trigger, "Insta/OF softcore mode", @@ -8810,7 +8826,10 @@ def build_insta_of_pair( soft_row["composition"], _camera_caption_text(soft_camera_config) if soft_camera_directive else "", ] - soft_caption = ", ".join(str(part).strip() for part in soft_caption_parts if str(part).strip()) + soft_caption = sanitize_caption_text( + ", ".join(str(part).strip() for part in soft_caption_parts if str(part).strip()), + triggers=(active_trigger,), + ) hard_caption_parts = [ active_trigger, "Insta/OF hardcore mode", @@ -8824,7 +8843,10 @@ def build_insta_of_pair( hard_composition, _camera_caption_text(hard_camera_config) if hard_camera_directive else "", ] - hard_caption = ", ".join(str(part).strip() for part in hard_caption_parts if str(part).strip()) + hard_caption = sanitize_caption_text( + ", ".join(str(part).strip() for part in hard_caption_parts if str(part).strip()), + triggers=(active_trigger,), + ) metadata = { "mode": "Insta/OF", "options": options, diff --git a/prompt_hygiene.py b/prompt_hygiene.py new file mode 100644 index 0000000..0f9d1e4 --- /dev/null +++ b/prompt_hygiene.py @@ -0,0 +1,169 @@ +from __future__ import annotations + +import re +from typing import Any, Iterable + + +EMPTY_FIELD_LABELS = ( + "Ages", + "Body types", + "Cast", + "Cast descriptors", + "Characters", + "Scene", + "Setting", + "Pose", + "Sexual pose", + "Sexual scene", + "Facial expression", + "Facial expressions", + "Clothing", + "Erotic outfit", + "Prop/detail", + "Composition", + "Role graph", + "Camera", + "Camera control", + "Camera priority", + "Use", + "Avoid", +) + + +def clean_spacing(value: Any) -> str: + text = "" if value is None else str(value) + text = text.replace("\n", " ") + text = re.sub(r"\s+", " ", text).strip() + text = re.sub(r"\s+([,.;:])", r"\1", text) + text = re.sub(r"([,;:]){2,}", r"\1", text) + text = re.sub(r"\.\s*\.", ".", text) + text = re.sub(r",\s*\.", ".", text) + text = re.sub(r":\s*\.", ".", text) + text = re.sub(r";\s*\.", ".", text) + text = re.sub(r"\(\s+", "(", text) + text = re.sub(r"\s+\)", ")", text) + return text.strip() + + +def _strip_empty_fields(text: str) -> str: + if not text: + return "" + labels = "|".join(re.escape(label) for label in EMPTY_FIELD_LABELS) + text = re.sub(rf"\b(?:{labels})\s*:\s*[.,;]", "", text, flags=re.IGNORECASE) + text = re.sub(rf"\b(?:{labels}):\s*(?=\.|,|;|$)", "", text, flags=re.IGNORECASE) + text = re.sub(rf"\b(?:{labels})\.(?=\s|$)", "", text, flags=re.IGNORECASE) + text = re.sub(rf"\b(?:{labels}):\s*(?:none|null|n/a)\b[.,;]?", "", text, flags=re.IGNORECASE) + return clean_spacing(text) + + +def _drop_dangling_connectors(text: str) -> str: + text = re.sub(r"\b(?:with|and|or|while|featuring)\s*([,.;])", r"\1", text, flags=re.IGNORECASE) + text = re.sub(r"([,.;])\s*(?:with|and|or|while|featuring)\s*([,.;])", r"\1", text, flags=re.IGNORECASE) + text = re.sub(r"\bwith\s*,", "", text, flags=re.IGNORECASE) + text = re.sub(r",\s*and\s*\.", ".", text, flags=re.IGNORECASE) + return clean_spacing(text) + + +def _sentence_key(text: str, triggers: Iterable[str] = ()) -> str: + key_text = text + for trigger in triggers: + trigger = str(trigger or "").strip() + if trigger: + key_text = re.sub(rf"^{re.escape(trigger)}\s*[,.;]\s*", "", key_text, flags=re.IGNORECASE) + return re.sub(r"\W+", " ", key_text.lower()).strip() + + +def _dedupe_adjacent_sentences(text: str, triggers: Iterable[str] = ()) -> str: + parts = [part.strip() for part in re.split(r"(?<=[.!?])\s+", text) if part.strip()] + deduped: list[str] = [] + previous = "" + for part in parts: + key = _sentence_key(part, triggers) + if key and key != previous: + deduped.append(part) + previous = key + return " ".join(deduped) + + +def _dedupe_labeled_sentences(text: str) -> str: + parts = [part.strip() for part in re.split(r"(?<=[.!?])\s+", text) if part.strip()] + seen: set[tuple[str, str]] = set() + deduped: list[str] = [] + for part in parts: + match = re.match(r"^([A-Za-z][A-Za-z /_-]{1,40}):\s*(.+)$", part) + if not match: + deduped.append(part) + continue + key = (match.group(1).strip().lower(), re.sub(r"\W+", " ", match.group(2).lower()).strip()) + if key not in seen: + deduped.append(part) + seen.add(key) + return " ".join(deduped) + + +def _trigger_prefix_key(text: str, triggers: Iterable[str]) -> str: + lowered = text.lower().strip() + for trigger in triggers: + trigger = str(trigger or "").strip() + if trigger and lowered.startswith(trigger.lower()): + return trigger + return "" + + +def _dedupe_trigger_prefix(text: str, triggers: Iterable[str]) -> str: + text = clean_spacing(text) + trigger = _trigger_prefix_key(text, triggers) + if not trigger: + return text + pattern = rf"^(?:{re.escape(trigger)}\s*[,.;]\s*)+" + return f"{trigger}, {re.sub(pattern, '', text, flags=re.IGNORECASE).strip(' ,.;')}" + + +def _split_comma_items(text: str) -> list[str]: + return [part.strip(" ,.;") for part in re.split(r"\s*[,;]\s*", clean_spacing(text)) if part.strip(" ,.;")] + + +def dedupe_comma_list(text: Any) -> str: + items: list[str] = [] + seen: set[str] = set() + for item in _split_comma_items(str(text or "")): + key = re.sub(r"\W+", " ", item.lower()).strip() + if key and key not in seen: + items.append(item) + seen.add(key) + return ", ".join(items) + + +def sanitize_prose_text(value: Any, triggers: Iterable[str] = ()) -> str: + text = clean_spacing(value) + if not text: + return "" + text = _strip_empty_fields(text) + text = _drop_dangling_connectors(text) + text = _dedupe_labeled_sentences(text) + text = _dedupe_trigger_prefix(text, triggers) + text = _dedupe_adjacent_sentences(text, triggers) + return clean_spacing(text).strip(" ,;") + + +def sanitize_prompt_text(value: Any, triggers: Iterable[str] = ()) -> str: + return sanitize_prose_text(value, triggers=triggers) + + +def sanitize_caption_text(value: Any, triggers: Iterable[str] = ()) -> str: + return sanitize_prose_text(value, triggers=triggers) + + +def sanitize_tag_prompt(value: Any, triggers: Iterable[str] = ()) -> str: + text = clean_spacing(value) + if not text: + return "" + trigger = _trigger_prefix_key(text, triggers) + if trigger: + text = re.sub(rf"^(?:{re.escape(trigger)}\s*[,;]\s*)+", "", text, flags=re.IGNORECASE).strip(" ,;") + return f"{trigger}, {dedupe_comma_list(text)}" if text else trigger + return dedupe_comma_list(text) + + +def sanitize_negative_text(value: Any) -> str: + return dedupe_comma_list(value) diff --git a/sdxl_formatter.py b/sdxl_formatter.py index 1193b00..598c6fb 100644 --- a/sdxl_formatter.py +++ b/sdxl_formatter.py @@ -4,6 +4,11 @@ import json import re from typing import Any +try: + from .prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt +except ImportError: # Allows local smoke tests with `python -c`. + from prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt + TRIGGER_CANDIDATES = ( "sxcpinup_coloredpencil", @@ -432,11 +437,14 @@ def _assemble_prompt( custom_quality: str, extra_positive: str, ) -> str: - return _combine_tags( - _style_prefix(style_preset, trigger, prepend_trigger, custom_style), - body_tags, - _quality_tail(quality_preset, custom_quality), - extra_positive, + return sanitize_tag_prompt( + _combine_tags( + _style_prefix(style_preset, trigger, prepend_trigger, custom_style), + body_tags, + _quality_tail(quality_preset, custom_quality), + extra_positive, + ), + triggers=(trigger,), ) @@ -504,14 +512,22 @@ def format_sdxl_prompt( extra_positive, ) selected = hard_prompt if target == "hardcore" else soft_prompt - selected_negative = row.get("hardcore_negative_prompt") if target == "hardcore" else row.get("softcore_negative_prompt") + selected_negative = ( + row.get("hardcore_negative_prompt") if target == "hardcore" else row.get("softcore_negative_prompt") + ) return { "sdxl_prompt": selected, - "negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, selected_negative, negative_prompt, extra_negative), + "negative_prompt": sanitize_negative_text( + _combine_negative(SDXL_DEFAULT_NEGATIVE, selected_negative, negative_prompt, extra_negative) + ), "sdxl_softcore_prompt": soft_prompt, "sdxl_hardcore_prompt": hard_prompt, - "softcore_negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("softcore_negative_prompt"), extra_negative), - "hardcore_negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("hardcore_negative_prompt"), extra_negative), + "softcore_negative_prompt": sanitize_negative_text( + _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("softcore_negative_prompt"), extra_negative) + ), + "hardcore_negative_prompt": sanitize_negative_text( + _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("hardcore_negative_prompt"), extra_negative) + ), "method": f"{method}:sdxl(insta_of_pair)", } @@ -534,7 +550,9 @@ def format_sdxl_prompt( ) return { "sdxl_prompt": prompt, - "negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, extracted_negative, negative_prompt, extra_negative), + "negative_prompt": sanitize_negative_text( + _combine_negative(SDXL_DEFAULT_NEGATIVE, extracted_negative, negative_prompt, extra_negative) + ), "sdxl_softcore_prompt": "", "sdxl_hardcore_prompt": "", "softcore_negative_prompt": "",