Add prompt hygiene architecture pass
This commit is contained in:
@@ -4,6 +4,11 @@ import json
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
from .prompt_hygiene import sanitize_prose_text
|
||||
except ImportError: # Allows local smoke tests with `python -c`.
|
||||
from prompt_hygiene import sanitize_prose_text
|
||||
|
||||
|
||||
OLD_TRIGGER = "sxcpinup_coloredpencil"
|
||||
DEFAULT_TRIGGER = "sxcppnl7"
|
||||
@@ -724,6 +729,8 @@ def naturalize_caption(
|
||||
row, row_method = _row_from_inputs(source_text, metadata_json, input_hint)
|
||||
if row is not None:
|
||||
prose, method = _metadata_to_prose(row, detail_level, keep_style)
|
||||
return _with_trigger(prose, trigger, include_trigger), f"{row_method}:{method}"
|
||||
caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,))
|
||||
return caption, f"{row_method}:{method}"
|
||||
prose, method = _text_to_prose(source_text, detail_level, keep_style)
|
||||
return _with_trigger(prose, trigger, include_trigger), method
|
||||
caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,))
|
||||
return caption, method
|
||||
|
||||
@@ -0,0 +1,301 @@
|
||||
# Prompt Architecture Improvement Plan
|
||||
|
||||
This is a working research note for organizing the prompt builder around the
|
||||
routing map in `docs/prompt-pool-routing-map.md`.
|
||||
|
||||
## Current Branch Additions
|
||||
|
||||
The current branch adds two major surfaces:
|
||||
|
||||
- `SxCP Krea2 Resolution Selector` in `__init__.py`, with README notes.
|
||||
- Expanded hardcore interaction/manual/action pools in
|
||||
`categories/sexual_poses.json`,
|
||||
`categories/expression_composition_pools.json`, `prompt_builder.py`, and
|
||||
`krea_formatter.py`.
|
||||
|
||||
The map audit currently sees:
|
||||
|
||||
- 15 sexual pose subcategories.
|
||||
- 94 sexual pose item templates.
|
||||
- 23 expression pools.
|
||||
- 24 composition pools.
|
||||
- A new Krea2 resolution node with width/height/API aspect outputs.
|
||||
|
||||
## Architectural Finding
|
||||
|
||||
The project has a good functional map, but ownership is still mixed inside large
|
||||
files:
|
||||
|
||||
- `prompt_builder.py` owns selection, character resolution, role graph logic,
|
||||
camera adaptation, pair assembly, and some final string cleanup.
|
||||
- `krea_formatter.py` owns metadata parsing, cast naturalization, sexual action
|
||||
rewriting, POV rewriting, clothing cleanup, camera preservation, fallback
|
||||
parsing, and final prose assembly.
|
||||
- `sdxl_formatter.py` owns tag assembly and style/quality presets.
|
||||
- `caption_naturalizer.py` owns training-caption prose.
|
||||
- Category JSON files own scalable pool content, but Python still owns several
|
||||
compatibility and role-graph decisions.
|
||||
|
||||
The biggest maintainability risk is not the number of pools. The risk is that
|
||||
selection, semantic rewriting, and final text hygiene are too interleaved. When a
|
||||
prompt has wrong text, it is easy to patch the wrong layer.
|
||||
|
||||
## First Refactor Boundary
|
||||
|
||||
Generic text hygiene now has one home:
|
||||
|
||||
- `prompt_hygiene.py`
|
||||
|
||||
It should only handle route-agnostic cleanup:
|
||||
|
||||
- whitespace and punctuation normalization;
|
||||
- empty field-label removal;
|
||||
- repeated trigger prefix cleanup;
|
||||
- duplicate comma-list item removal;
|
||||
- adjacent duplicate sentence cleanup;
|
||||
- simple dangling connector cleanup.
|
||||
|
||||
It must not make semantic decisions such as sexual action positioning, POV
|
||||
geometry, clothing state, or model-specific tag weighting. Those stay in the
|
||||
route-specific owner.
|
||||
|
||||
Current integration points:
|
||||
|
||||
- `prompt_builder.build_prompt`
|
||||
- `prompt_builder.build_insta_of_pair`
|
||||
- `krea_formatter.format_krea2_prompt`
|
||||
- `sdxl_formatter.format_sdxl_prompt`
|
||||
- `caption_naturalizer.naturalize_caption`
|
||||
|
||||
## Target Organization
|
||||
|
||||
### Generation Layer
|
||||
|
||||
Owner: `prompt_builder.py` plus `categories/*.json`.
|
||||
|
||||
Keep here:
|
||||
|
||||
- category/subcategory/item selection;
|
||||
- seed axis routing;
|
||||
- character slot/profile resolution;
|
||||
- scene/expression/composition pool selection;
|
||||
- role graph creation from structured category axes;
|
||||
- metadata row construction.
|
||||
|
||||
Move or isolate later:
|
||||
|
||||
- role graph generation for hardcore interaction categories into a dedicated
|
||||
module, for example `hardcore_role_graphs.py`;
|
||||
- camera-scene adapters into `scene_camera_adapters.py`;
|
||||
- category-library loading and inheritance helpers into `category_library.py`.
|
||||
|
||||
### Pair / Adapter Layer
|
||||
|
||||
Owner today: `build_insta_of_pair`.
|
||||
|
||||
Keep here:
|
||||
|
||||
- soft/hard row creation;
|
||||
- continuity policy;
|
||||
- softcore cast policy;
|
||||
- pair-level camera routing;
|
||||
- pair metadata shape.
|
||||
|
||||
Improve later:
|
||||
|
||||
- make a single pair metadata sanitizer that normalizes `softcore_row`,
|
||||
`hardcore_row`, pair prompts, negatives, captions, and camera fields;
|
||||
- split pair assembly into small functions by phase:
|
||||
`build_soft_row`, `build_hard_row`, `resolve_pair_camera`,
|
||||
`resolve_pair_clothing`, `assemble_pair_metadata`.
|
||||
|
||||
### Krea2 Formatter Path
|
||||
|
||||
Owner: `krea_formatter.py`.
|
||||
|
||||
Keep here:
|
||||
|
||||
- Krea prose style;
|
||||
- cast prose;
|
||||
- hardcore action sentence rewriting;
|
||||
- POV sentence rewriting;
|
||||
- clothing naturalization;
|
||||
- camera-scene preservation;
|
||||
- fallback text parsing.
|
||||
|
||||
Improve later:
|
||||
|
||||
- split semantic blocks into modules:
|
||||
`krea_cast.py`, `krea_actions.py`, `krea_pov.py`, `krea_clothing.py`;
|
||||
- add route-level smoke fixtures for representative metadata rows;
|
||||
- make `_hardcore_action_sentence` dispatch by action family instead of long
|
||||
conditional chains.
|
||||
|
||||
### SDXL Formatter Path
|
||||
|
||||
Owner: `sdxl_formatter.py`.
|
||||
|
||||
Keep here:
|
||||
|
||||
- trigger behavior;
|
||||
- style and quality presets;
|
||||
- tag ordering;
|
||||
- weighted explicit tags;
|
||||
- negative-prompt assembly.
|
||||
|
||||
Improve later:
|
||||
|
||||
- move presets into data dictionaries or JSON so adding styles does not require
|
||||
editing formatter logic;
|
||||
- add formatter profiles for Pony, SDXL photo, and flat vector;
|
||||
- make fallback cleanup use the shared field-label inventory.
|
||||
|
||||
### Naturalizer Path
|
||||
|
||||
Owner: `caption_naturalizer.py`.
|
||||
|
||||
Keep here:
|
||||
|
||||
- natural sentence caption assembly;
|
||||
- training-caption trigger behavior;
|
||||
- style-tail policy.
|
||||
|
||||
Improve later:
|
||||
|
||||
- share more metadata readers with Krea without sharing Krea prose;
|
||||
- add a `caption_profile` option for concise/dense LoRA caption styles.
|
||||
|
||||
### Category JSON Path
|
||||
|
||||
Owner: `categories/*.json`.
|
||||
|
||||
Keep here:
|
||||
|
||||
- scalable prompt pool content;
|
||||
- named scene/expression/composition pools;
|
||||
- item templates and axes;
|
||||
- direct category-specific wording.
|
||||
|
||||
Improve later:
|
||||
|
||||
- introduce optional `family` and `action_type` fields on item templates so
|
||||
Python filters do less keyword guessing;
|
||||
- add `formatter_hint` fields only where needed, not globally;
|
||||
- add a JSON audit that checks every referenced expression/composition/scene pool
|
||||
exists.
|
||||
|
||||
### Node / UI Path
|
||||
|
||||
Owner: `__init__.py`, `loop_nodes.py`, `web/*.js`.
|
||||
|
||||
Keep here:
|
||||
|
||||
- ComfyUI node input/output declarations;
|
||||
- widget behavior;
|
||||
- button actions;
|
||||
- dynamic input slots.
|
||||
|
||||
Improve later:
|
||||
|
||||
- split large node classes into files by family;
|
||||
- keep node display names, return names, and docs in sync through the audit
|
||||
helper;
|
||||
- add small endpoint tests for profile/accumulator/index-switch routes.
|
||||
|
||||
## Path-Specific Improvements
|
||||
|
||||
### Prompt Builder
|
||||
|
||||
Near-term:
|
||||
|
||||
- Add final row hygiene already done through `prompt_hygiene.py`.
|
||||
- Add a metadata invariant checker for rows before return.
|
||||
- Normalize every row with one function before JSON serialization.
|
||||
|
||||
Medium-term:
|
||||
|
||||
- Extract category loading and role graph logic.
|
||||
- Convert keyword-heavy interaction filtering to template metadata.
|
||||
|
||||
### Insta/OF Pair
|
||||
|
||||
Near-term:
|
||||
|
||||
- Normalize pair metadata with one helper.
|
||||
- Confirm pair prompts, captions, and soft/hard rows carry the same sanitized
|
||||
scene/camera/clothing fields.
|
||||
|
||||
Medium-term:
|
||||
|
||||
- Make pair camera and clothing phases explicit subfunctions.
|
||||
- Add smoke fixtures for same-cast, POV man, explicit nude, and different-camera
|
||||
modes.
|
||||
|
||||
### Krea2
|
||||
|
||||
Near-term:
|
||||
|
||||
- Add final prose hygiene already done through `prompt_hygiene.py`.
|
||||
- Add tests for close foreplay, POV oral, POV penetration, aftercare, manual
|
||||
stimulation, and camera-scene preservation.
|
||||
|
||||
Medium-term:
|
||||
|
||||
- Dispatch action rewriting by action family.
|
||||
- Split Krea semantic helpers into smaller modules.
|
||||
|
||||
### SDXL
|
||||
|
||||
Near-term:
|
||||
|
||||
- Add final tag hygiene already done through `prompt_hygiene.py`.
|
||||
- Add smoke tests for trigger preservation and duplicate tag removal.
|
||||
|
||||
Medium-term:
|
||||
|
||||
- Make style/quality presets data-driven.
|
||||
|
||||
### Naturalizer
|
||||
|
||||
Near-term:
|
||||
|
||||
- Add final prose hygiene already done through `prompt_hygiene.py`.
|
||||
- Verify training captions keep trigger exactly once.
|
||||
|
||||
Medium-term:
|
||||
|
||||
- Add caption profiles for training and browsing use cases.
|
||||
|
||||
### Camera / Scene
|
||||
|
||||
Near-term:
|
||||
|
||||
- Keep Qwen/orbit as camera source.
|
||||
- Keep scene-camera adapters scoped by location family.
|
||||
- Use the memory note in
|
||||
`/home/ethanfel/.codex/memories/scene-camera-system.md` when editing POV.
|
||||
|
||||
Medium-term:
|
||||
|
||||
- Move coworking adapter into a scene-camera adapter module.
|
||||
- Build new adapters one location family at a time.
|
||||
|
||||
## Invariants To Preserve
|
||||
|
||||
- Metadata is the preferred formatter input.
|
||||
- Prompt Builder should output structured rows even if raw prompt text is rough.
|
||||
- Krea should fix prose and semantic action readability, not category selection.
|
||||
- SDXL should produce tag-style output and preserve model triggers as requested.
|
||||
- Naturalizer should output training-friendly captions without changing the
|
||||
selected content.
|
||||
- Generic cleanup belongs in `prompt_hygiene.py`; semantic cleanup belongs in
|
||||
the owning route.
|
||||
|
||||
## Recommended Next Passes
|
||||
|
||||
1. Add metadata invariant checks and small smoke fixtures.
|
||||
2. Split Krea action/POV/clothing helpers into separate modules.
|
||||
3. Add category JSON pool reference validation to `tools/prompt_map_audit.py`.
|
||||
4. Extract scene-camera adapters from `prompt_builder.py`.
|
||||
5. Split `__init__.py` node classes by family after behavior is covered by smoke
|
||||
checks.
|
||||
@@ -605,6 +605,25 @@ Naturalizer field consumption:
|
||||
| Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity | `_insta_pair_from_row` |
|
||||
| Text fallback | `caption` or `prompt` text | `_text_to_prose` |
|
||||
|
||||
### Final Text Hygiene
|
||||
|
||||
`prompt_hygiene.py` owns route-agnostic final cleanup. It is intentionally
|
||||
small: whitespace, punctuation, empty field labels, adjacent duplicate
|
||||
sentences, repeated trigger prefixes, duplicate comma-list items, and dangling
|
||||
connectors.
|
||||
|
||||
It is called from:
|
||||
|
||||
- `prompt_builder.build_prompt`
|
||||
- `prompt_builder.build_insta_of_pair`
|
||||
- `krea_formatter.format_krea2_prompt`
|
||||
- `sdxl_formatter.format_sdxl_prompt`
|
||||
- `caption_naturalizer.naturalize_caption`
|
||||
|
||||
Do not put semantic fixes in `prompt_hygiene.py`. Sexual action readability,
|
||||
POV geometry, clothing state, Krea prose, SDXL weighting, and training-caption
|
||||
policy still belong to their route-specific owner.
|
||||
|
||||
## Utility / Workflow Nodes
|
||||
|
||||
These do not own prompt pool wording, but they affect execution and review:
|
||||
@@ -616,6 +635,7 @@ These do not own prompt pool wording, but they affect execution and review:
|
||||
| Accumulator | `loop_nodes.py`, `web/accumulator_preview.js` | Stores generated values/images during workflow execution and previews/reorders/deletes them. |
|
||||
| Persistent text preview | `loop_nodes.py`, `web/preview_any_text.js` | Stores any value as text and keeps it after workflow reload. |
|
||||
| SDXL bucket size | `SxCPSDXLBucketSize` in `__init__.py` | Random/fixed SDXL bucket width and height selection. |
|
||||
| Krea2 resolution selector | `SxCPKrea2ResolutionSelector` in `__init__.py` | Krea-compatible width/height and API aspect/resolution helper. |
|
||||
|
||||
## Drift Audit Helper
|
||||
|
||||
@@ -655,6 +675,7 @@ or pool appears there but not in this map, update the relevant route table.
|
||||
| Camera prompt missing from Krea2 | Row `camera_directive` / `camera_scene_directive`, then Krea `_camera_phrase`. |
|
||||
| Trigger missing in Krea2 fallback | `format_krea2_prompt` preserve-trigger fallback behavior. |
|
||||
| SDXL tags too weak/wrong style | `sdxl_formatter.py` presets and `_row_core_tags` / `_soft_tags` / `_hard_tags`. |
|
||||
| Duplicate punctuation, empty labels, repeated trigger, repeated tag item | `prompt_hygiene.py`, then the route-specific formatter if the repeated content is semantic. |
|
||||
| Saved profile does not match liked character | Profile save/load path and whether the saved input is row metadata or regenerated slot config. |
|
||||
| Accumulator preview behavior wrong | `loop_nodes.py` accumulator methods and `web/accumulator_preview.js`. |
|
||||
|
||||
|
||||
+14
-7
@@ -4,6 +4,11 @@ import json
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text
|
||||
except ImportError: # Allows local smoke tests with `python -c`.
|
||||
from prompt_hygiene import sanitize_negative_text, sanitize_prose_text
|
||||
|
||||
|
||||
TRIGGER_CANDIDATES = (
|
||||
"sxcpinup_coloredpencil",
|
||||
@@ -2678,20 +2683,21 @@ def format_krea2_prompt(
|
||||
|
||||
if row and row.get("mode") == "Insta/OF":
|
||||
soft_prompt, soft_negative, hard_prompt, hard_negative = _insta_pair_to_krea(row, detail_level, style_mode)
|
||||
selected = hard_prompt if target == "hardcore" else soft_prompt if target == "softcore" else soft_prompt
|
||||
selected_negative = hard_negative if target == "hardcore" else soft_negative
|
||||
if extra_positive.strip():
|
||||
selected = f"{selected.rstrip()} {extra_positive.strip()}"
|
||||
soft_prompt = f"{soft_prompt.rstrip()} {extra_positive.strip()}"
|
||||
hard_prompt = f"{hard_prompt.rstrip()} {extra_positive.strip()}"
|
||||
negative = _combine_negative(selected_negative, negative_prompt, extra_negative)
|
||||
soft_prompt = sanitize_prose_text(soft_prompt, triggers=TRIGGER_CANDIDATES)
|
||||
hard_prompt = sanitize_prose_text(hard_prompt, triggers=TRIGGER_CANDIDATES)
|
||||
selected = hard_prompt if target == "hardcore" else soft_prompt if target == "softcore" else soft_prompt
|
||||
selected_negative = hard_negative if target == "hardcore" else soft_negative
|
||||
negative = sanitize_negative_text(_combine_negative(selected_negative, negative_prompt, extra_negative))
|
||||
return {
|
||||
"krea_prompt": selected,
|
||||
"negative_prompt": negative,
|
||||
"krea_softcore_prompt": soft_prompt,
|
||||
"krea_hardcore_prompt": hard_prompt,
|
||||
"softcore_negative_prompt": _combine_negative(soft_negative, extra_negative),
|
||||
"hardcore_negative_prompt": _combine_negative(hard_negative, extra_negative),
|
||||
"softcore_negative_prompt": sanitize_negative_text(_combine_negative(soft_negative, extra_negative)),
|
||||
"hardcore_negative_prompt": sanitize_negative_text(_combine_negative(hard_negative, extra_negative)),
|
||||
"method": f"{method}:krea2(insta_of_pair)",
|
||||
}
|
||||
|
||||
@@ -2704,7 +2710,8 @@ def format_krea2_prompt(
|
||||
|
||||
if extra_positive.strip():
|
||||
prompt = f"{prompt.rstrip()} {extra_positive.strip()}"
|
||||
negative = _combine_negative(extracted_negative, negative_prompt, extra_negative)
|
||||
prompt = sanitize_prose_text(prompt, triggers=TRIGGER_CANDIDATES)
|
||||
negative = sanitize_negative_text(_combine_negative(extracted_negative, negative_prompt, extra_negative))
|
||||
return {
|
||||
"krea_prompt": prompt,
|
||||
"negative_prompt": negative,
|
||||
|
||||
+27
-5
@@ -10,8 +10,18 @@ from typing import Any, Callable
|
||||
|
||||
try:
|
||||
from . import generate_prompt_batches as g
|
||||
from .prompt_hygiene import (
|
||||
sanitize_caption_text,
|
||||
sanitize_negative_text,
|
||||
sanitize_prompt_text,
|
||||
)
|
||||
except ImportError: # Allows local smoke tests with `python -c`.
|
||||
import generate_prompt_batches as g
|
||||
from prompt_hygiene import (
|
||||
sanitize_caption_text,
|
||||
sanitize_negative_text,
|
||||
sanitize_prompt_text,
|
||||
)
|
||||
|
||||
|
||||
ROOT_DIR = Path(__file__).resolve().parent
|
||||
@@ -7609,7 +7619,11 @@ def build_prompt(
|
||||
row = _apply_camera_config(row, camera_config)
|
||||
active_trigger = trigger.strip() or g.TRIGGER
|
||||
row["prompt"] = _prepend_trigger(row["prompt"], active_trigger, bool(prepend_trigger_to_prompt))
|
||||
row["negative_prompt"] = _combined_negative(row.get("negative_prompt", g.NEGATIVE_PROMPT), extra_negative)
|
||||
row["prompt"] = sanitize_prompt_text(row["prompt"], triggers=(active_trigger,))
|
||||
row["caption"] = sanitize_caption_text(row.get("caption", ""), triggers=(active_trigger,))
|
||||
row["negative_prompt"] = sanitize_negative_text(
|
||||
_combined_negative(row.get("negative_prompt", g.NEGATIVE_PROMPT), extra_negative)
|
||||
)
|
||||
row["trigger"] = active_trigger
|
||||
row.setdefault("expression_intensity", expression_intensity)
|
||||
row.setdefault("expression_intensity_source", expression_intensity_source)
|
||||
@@ -8794,8 +8808,10 @@ def build_insta_of_pair(
|
||||
|
||||
soft_prompt = _insta_of_active_trigger(soft_prompt, active_trigger, bool(prepend_trigger_to_prompt))
|
||||
hard_prompt = _insta_of_active_trigger(hard_prompt, active_trigger, bool(prepend_trigger_to_prompt))
|
||||
soft_negative = _combined_negative(INSTA_OF_SOFT_NEGATIVE, extra_negative)
|
||||
hard_negative = _combined_negative(INSTA_OF_NEGATIVE, extra_negative)
|
||||
soft_prompt = sanitize_prompt_text(soft_prompt, triggers=(active_trigger,))
|
||||
hard_prompt = sanitize_prompt_text(hard_prompt, triggers=(active_trigger,))
|
||||
soft_negative = sanitize_negative_text(_combined_negative(INSTA_OF_SOFT_NEGATIVE, extra_negative))
|
||||
hard_negative = sanitize_negative_text(_combined_negative(INSTA_OF_NEGATIVE, extra_negative))
|
||||
soft_caption_parts = [
|
||||
active_trigger,
|
||||
"Insta/OF softcore mode",
|
||||
@@ -8810,7 +8826,10 @@ def build_insta_of_pair(
|
||||
soft_row["composition"],
|
||||
_camera_caption_text(soft_camera_config) if soft_camera_directive else "",
|
||||
]
|
||||
soft_caption = ", ".join(str(part).strip() for part in soft_caption_parts if str(part).strip())
|
||||
soft_caption = sanitize_caption_text(
|
||||
", ".join(str(part).strip() for part in soft_caption_parts if str(part).strip()),
|
||||
triggers=(active_trigger,),
|
||||
)
|
||||
hard_caption_parts = [
|
||||
active_trigger,
|
||||
"Insta/OF hardcore mode",
|
||||
@@ -8824,7 +8843,10 @@ def build_insta_of_pair(
|
||||
hard_composition,
|
||||
_camera_caption_text(hard_camera_config) if hard_camera_directive else "",
|
||||
]
|
||||
hard_caption = ", ".join(str(part).strip() for part in hard_caption_parts if str(part).strip())
|
||||
hard_caption = sanitize_caption_text(
|
||||
", ".join(str(part).strip() for part in hard_caption_parts if str(part).strip()),
|
||||
triggers=(active_trigger,),
|
||||
)
|
||||
metadata = {
|
||||
"mode": "Insta/OF",
|
||||
"options": options,
|
||||
|
||||
@@ -0,0 +1,169 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Iterable
|
||||
|
||||
|
||||
EMPTY_FIELD_LABELS = (
|
||||
"Ages",
|
||||
"Body types",
|
||||
"Cast",
|
||||
"Cast descriptors",
|
||||
"Characters",
|
||||
"Scene",
|
||||
"Setting",
|
||||
"Pose",
|
||||
"Sexual pose",
|
||||
"Sexual scene",
|
||||
"Facial expression",
|
||||
"Facial expressions",
|
||||
"Clothing",
|
||||
"Erotic outfit",
|
||||
"Prop/detail",
|
||||
"Composition",
|
||||
"Role graph",
|
||||
"Camera",
|
||||
"Camera control",
|
||||
"Camera priority",
|
||||
"Use",
|
||||
"Avoid",
|
||||
)
|
||||
|
||||
|
||||
def clean_spacing(value: Any) -> str:
|
||||
text = "" if value is None else str(value)
|
||||
text = text.replace("\n", " ")
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
text = re.sub(r"\s+([,.;:])", r"\1", text)
|
||||
text = re.sub(r"([,;:]){2,}", r"\1", text)
|
||||
text = re.sub(r"\.\s*\.", ".", text)
|
||||
text = re.sub(r",\s*\.", ".", text)
|
||||
text = re.sub(r":\s*\.", ".", text)
|
||||
text = re.sub(r";\s*\.", ".", text)
|
||||
text = re.sub(r"\(\s+", "(", text)
|
||||
text = re.sub(r"\s+\)", ")", text)
|
||||
return text.strip()
|
||||
|
||||
|
||||
def _strip_empty_fields(text: str) -> str:
|
||||
if not text:
|
||||
return ""
|
||||
labels = "|".join(re.escape(label) for label in EMPTY_FIELD_LABELS)
|
||||
text = re.sub(rf"\b(?:{labels})\s*:\s*[.,;]", "", text, flags=re.IGNORECASE)
|
||||
text = re.sub(rf"\b(?:{labels}):\s*(?=\.|,|;|$)", "", text, flags=re.IGNORECASE)
|
||||
text = re.sub(rf"\b(?:{labels})\.(?=\s|$)", "", text, flags=re.IGNORECASE)
|
||||
text = re.sub(rf"\b(?:{labels}):\s*(?:none|null|n/a)\b[.,;]?", "", text, flags=re.IGNORECASE)
|
||||
return clean_spacing(text)
|
||||
|
||||
|
||||
def _drop_dangling_connectors(text: str) -> str:
|
||||
text = re.sub(r"\b(?:with|and|or|while|featuring)\s*([,.;])", r"\1", text, flags=re.IGNORECASE)
|
||||
text = re.sub(r"([,.;])\s*(?:with|and|or|while|featuring)\s*([,.;])", r"\1", text, flags=re.IGNORECASE)
|
||||
text = re.sub(r"\bwith\s*,", "", text, flags=re.IGNORECASE)
|
||||
text = re.sub(r",\s*and\s*\.", ".", text, flags=re.IGNORECASE)
|
||||
return clean_spacing(text)
|
||||
|
||||
|
||||
def _sentence_key(text: str, triggers: Iterable[str] = ()) -> str:
|
||||
key_text = text
|
||||
for trigger in triggers:
|
||||
trigger = str(trigger or "").strip()
|
||||
if trigger:
|
||||
key_text = re.sub(rf"^{re.escape(trigger)}\s*[,.;]\s*", "", key_text, flags=re.IGNORECASE)
|
||||
return re.sub(r"\W+", " ", key_text.lower()).strip()
|
||||
|
||||
|
||||
def _dedupe_adjacent_sentences(text: str, triggers: Iterable[str] = ()) -> str:
|
||||
parts = [part.strip() for part in re.split(r"(?<=[.!?])\s+", text) if part.strip()]
|
||||
deduped: list[str] = []
|
||||
previous = ""
|
||||
for part in parts:
|
||||
key = _sentence_key(part, triggers)
|
||||
if key and key != previous:
|
||||
deduped.append(part)
|
||||
previous = key
|
||||
return " ".join(deduped)
|
||||
|
||||
|
||||
def _dedupe_labeled_sentences(text: str) -> str:
|
||||
parts = [part.strip() for part in re.split(r"(?<=[.!?])\s+", text) if part.strip()]
|
||||
seen: set[tuple[str, str]] = set()
|
||||
deduped: list[str] = []
|
||||
for part in parts:
|
||||
match = re.match(r"^([A-Za-z][A-Za-z /_-]{1,40}):\s*(.+)$", part)
|
||||
if not match:
|
||||
deduped.append(part)
|
||||
continue
|
||||
key = (match.group(1).strip().lower(), re.sub(r"\W+", " ", match.group(2).lower()).strip())
|
||||
if key not in seen:
|
||||
deduped.append(part)
|
||||
seen.add(key)
|
||||
return " ".join(deduped)
|
||||
|
||||
|
||||
def _trigger_prefix_key(text: str, triggers: Iterable[str]) -> str:
|
||||
lowered = text.lower().strip()
|
||||
for trigger in triggers:
|
||||
trigger = str(trigger or "").strip()
|
||||
if trigger and lowered.startswith(trigger.lower()):
|
||||
return trigger
|
||||
return ""
|
||||
|
||||
|
||||
def _dedupe_trigger_prefix(text: str, triggers: Iterable[str]) -> str:
|
||||
text = clean_spacing(text)
|
||||
trigger = _trigger_prefix_key(text, triggers)
|
||||
if not trigger:
|
||||
return text
|
||||
pattern = rf"^(?:{re.escape(trigger)}\s*[,.;]\s*)+"
|
||||
return f"{trigger}, {re.sub(pattern, '', text, flags=re.IGNORECASE).strip(' ,.;')}"
|
||||
|
||||
|
||||
def _split_comma_items(text: str) -> list[str]:
|
||||
return [part.strip(" ,.;") for part in re.split(r"\s*[,;]\s*", clean_spacing(text)) if part.strip(" ,.;")]
|
||||
|
||||
|
||||
def dedupe_comma_list(text: Any) -> str:
|
||||
items: list[str] = []
|
||||
seen: set[str] = set()
|
||||
for item in _split_comma_items(str(text or "")):
|
||||
key = re.sub(r"\W+", " ", item.lower()).strip()
|
||||
if key and key not in seen:
|
||||
items.append(item)
|
||||
seen.add(key)
|
||||
return ", ".join(items)
|
||||
|
||||
|
||||
def sanitize_prose_text(value: Any, triggers: Iterable[str] = ()) -> str:
|
||||
text = clean_spacing(value)
|
||||
if not text:
|
||||
return ""
|
||||
text = _strip_empty_fields(text)
|
||||
text = _drop_dangling_connectors(text)
|
||||
text = _dedupe_labeled_sentences(text)
|
||||
text = _dedupe_trigger_prefix(text, triggers)
|
||||
text = _dedupe_adjacent_sentences(text, triggers)
|
||||
return clean_spacing(text).strip(" ,;")
|
||||
|
||||
|
||||
def sanitize_prompt_text(value: Any, triggers: Iterable[str] = ()) -> str:
|
||||
return sanitize_prose_text(value, triggers=triggers)
|
||||
|
||||
|
||||
def sanitize_caption_text(value: Any, triggers: Iterable[str] = ()) -> str:
|
||||
return sanitize_prose_text(value, triggers=triggers)
|
||||
|
||||
|
||||
def sanitize_tag_prompt(value: Any, triggers: Iterable[str] = ()) -> str:
|
||||
text = clean_spacing(value)
|
||||
if not text:
|
||||
return ""
|
||||
trigger = _trigger_prefix_key(text, triggers)
|
||||
if trigger:
|
||||
text = re.sub(rf"^(?:{re.escape(trigger)}\s*[,;]\s*)+", "", text, flags=re.IGNORECASE).strip(" ,;")
|
||||
return f"{trigger}, {dedupe_comma_list(text)}" if text else trigger
|
||||
return dedupe_comma_list(text)
|
||||
|
||||
|
||||
def sanitize_negative_text(value: Any) -> str:
|
||||
return dedupe_comma_list(value)
|
||||
+28
-10
@@ -4,6 +4,11 @@ import json
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
from .prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
|
||||
except ImportError: # Allows local smoke tests with `python -c`.
|
||||
from prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
|
||||
|
||||
|
||||
TRIGGER_CANDIDATES = (
|
||||
"sxcpinup_coloredpencil",
|
||||
@@ -432,11 +437,14 @@ def _assemble_prompt(
|
||||
custom_quality: str,
|
||||
extra_positive: str,
|
||||
) -> str:
|
||||
return _combine_tags(
|
||||
_style_prefix(style_preset, trigger, prepend_trigger, custom_style),
|
||||
body_tags,
|
||||
_quality_tail(quality_preset, custom_quality),
|
||||
extra_positive,
|
||||
return sanitize_tag_prompt(
|
||||
_combine_tags(
|
||||
_style_prefix(style_preset, trigger, prepend_trigger, custom_style),
|
||||
body_tags,
|
||||
_quality_tail(quality_preset, custom_quality),
|
||||
extra_positive,
|
||||
),
|
||||
triggers=(trigger,),
|
||||
)
|
||||
|
||||
|
||||
@@ -504,14 +512,22 @@ def format_sdxl_prompt(
|
||||
extra_positive,
|
||||
)
|
||||
selected = hard_prompt if target == "hardcore" else soft_prompt
|
||||
selected_negative = row.get("hardcore_negative_prompt") if target == "hardcore" else row.get("softcore_negative_prompt")
|
||||
selected_negative = (
|
||||
row.get("hardcore_negative_prompt") if target == "hardcore" else row.get("softcore_negative_prompt")
|
||||
)
|
||||
return {
|
||||
"sdxl_prompt": selected,
|
||||
"negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, selected_negative, negative_prompt, extra_negative),
|
||||
"negative_prompt": sanitize_negative_text(
|
||||
_combine_negative(SDXL_DEFAULT_NEGATIVE, selected_negative, negative_prompt, extra_negative)
|
||||
),
|
||||
"sdxl_softcore_prompt": soft_prompt,
|
||||
"sdxl_hardcore_prompt": hard_prompt,
|
||||
"softcore_negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("softcore_negative_prompt"), extra_negative),
|
||||
"hardcore_negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("hardcore_negative_prompt"), extra_negative),
|
||||
"softcore_negative_prompt": sanitize_negative_text(
|
||||
_combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("softcore_negative_prompt"), extra_negative)
|
||||
),
|
||||
"hardcore_negative_prompt": sanitize_negative_text(
|
||||
_combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("hardcore_negative_prompt"), extra_negative)
|
||||
),
|
||||
"method": f"{method}:sdxl(insta_of_pair)",
|
||||
}
|
||||
|
||||
@@ -534,7 +550,9 @@ def format_sdxl_prompt(
|
||||
)
|
||||
return {
|
||||
"sdxl_prompt": prompt,
|
||||
"negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, extracted_negative, negative_prompt, extra_negative),
|
||||
"negative_prompt": sanitize_negative_text(
|
||||
_combine_negative(SDXL_DEFAULT_NEGATIVE, extracted_negative, negative_prompt, extra_negative)
|
||||
),
|
||||
"sdxl_softcore_prompt": "",
|
||||
"sdxl_hardcore_prompt": "",
|
||||
"softcore_negative_prompt": "",
|
||||
|
||||
Reference in New Issue
Block a user