Add prompt hygiene architecture pass
This commit is contained in:
@@ -4,6 +4,11 @@ import json
|
|||||||
import re
|
import re
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
try:
|
||||||
|
from .prompt_hygiene import sanitize_prose_text
|
||||||
|
except ImportError: # Allows local smoke tests with `python -c`.
|
||||||
|
from prompt_hygiene import sanitize_prose_text
|
||||||
|
|
||||||
|
|
||||||
OLD_TRIGGER = "sxcpinup_coloredpencil"
|
OLD_TRIGGER = "sxcpinup_coloredpencil"
|
||||||
DEFAULT_TRIGGER = "sxcppnl7"
|
DEFAULT_TRIGGER = "sxcppnl7"
|
||||||
@@ -724,6 +729,8 @@ def naturalize_caption(
|
|||||||
row, row_method = _row_from_inputs(source_text, metadata_json, input_hint)
|
row, row_method = _row_from_inputs(source_text, metadata_json, input_hint)
|
||||||
if row is not None:
|
if row is not None:
|
||||||
prose, method = _metadata_to_prose(row, detail_level, keep_style)
|
prose, method = _metadata_to_prose(row, detail_level, keep_style)
|
||||||
return _with_trigger(prose, trigger, include_trigger), f"{row_method}:{method}"
|
caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,))
|
||||||
|
return caption, f"{row_method}:{method}"
|
||||||
prose, method = _text_to_prose(source_text, detail_level, keep_style)
|
prose, method = _text_to_prose(source_text, detail_level, keep_style)
|
||||||
return _with_trigger(prose, trigger, include_trigger), method
|
caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,))
|
||||||
|
return caption, method
|
||||||
|
|||||||
@@ -0,0 +1,301 @@
|
|||||||
|
# Prompt Architecture Improvement Plan
|
||||||
|
|
||||||
|
This is a working research note for organizing the prompt builder around the
|
||||||
|
routing map in `docs/prompt-pool-routing-map.md`.
|
||||||
|
|
||||||
|
## Current Branch Additions
|
||||||
|
|
||||||
|
The current branch adds two major surfaces:
|
||||||
|
|
||||||
|
- `SxCP Krea2 Resolution Selector` in `__init__.py`, with README notes.
|
||||||
|
- Expanded hardcore interaction/manual/action pools in
|
||||||
|
`categories/sexual_poses.json`,
|
||||||
|
`categories/expression_composition_pools.json`, `prompt_builder.py`, and
|
||||||
|
`krea_formatter.py`.
|
||||||
|
|
||||||
|
The map audit currently sees:
|
||||||
|
|
||||||
|
- 15 sexual pose subcategories.
|
||||||
|
- 94 sexual pose item templates.
|
||||||
|
- 23 expression pools.
|
||||||
|
- 24 composition pools.
|
||||||
|
- A new Krea2 resolution node with width/height/API aspect outputs.
|
||||||
|
|
||||||
|
## Architectural Finding
|
||||||
|
|
||||||
|
The project has a good functional map, but ownership is still mixed inside large
|
||||||
|
files:
|
||||||
|
|
||||||
|
- `prompt_builder.py` owns selection, character resolution, role graph logic,
|
||||||
|
camera adaptation, pair assembly, and some final string cleanup.
|
||||||
|
- `krea_formatter.py` owns metadata parsing, cast naturalization, sexual action
|
||||||
|
rewriting, POV rewriting, clothing cleanup, camera preservation, fallback
|
||||||
|
parsing, and final prose assembly.
|
||||||
|
- `sdxl_formatter.py` owns tag assembly and style/quality presets.
|
||||||
|
- `caption_naturalizer.py` owns training-caption prose.
|
||||||
|
- Category JSON files own scalable pool content, but Python still owns several
|
||||||
|
compatibility and role-graph decisions.
|
||||||
|
|
||||||
|
The biggest maintainability risk is not the number of pools. The risk is that
|
||||||
|
selection, semantic rewriting, and final text hygiene are too interleaved. When a
|
||||||
|
prompt has wrong text, it is easy to patch the wrong layer.
|
||||||
|
|
||||||
|
## First Refactor Boundary
|
||||||
|
|
||||||
|
Generic text hygiene now has one home:
|
||||||
|
|
||||||
|
- `prompt_hygiene.py`
|
||||||
|
|
||||||
|
It should only handle route-agnostic cleanup:
|
||||||
|
|
||||||
|
- whitespace and punctuation normalization;
|
||||||
|
- empty field-label removal;
|
||||||
|
- repeated trigger prefix cleanup;
|
||||||
|
- duplicate comma-list item removal;
|
||||||
|
- adjacent duplicate sentence cleanup;
|
||||||
|
- simple dangling connector cleanup.
|
||||||
|
|
||||||
|
It must not make semantic decisions such as sexual action positioning, POV
|
||||||
|
geometry, clothing state, or model-specific tag weighting. Those stay in the
|
||||||
|
route-specific owner.
|
||||||
|
|
||||||
|
Current integration points:
|
||||||
|
|
||||||
|
- `prompt_builder.build_prompt`
|
||||||
|
- `prompt_builder.build_insta_of_pair`
|
||||||
|
- `krea_formatter.format_krea2_prompt`
|
||||||
|
- `sdxl_formatter.format_sdxl_prompt`
|
||||||
|
- `caption_naturalizer.naturalize_caption`
|
||||||
|
|
||||||
|
## Target Organization
|
||||||
|
|
||||||
|
### Generation Layer
|
||||||
|
|
||||||
|
Owner: `prompt_builder.py` plus `categories/*.json`.
|
||||||
|
|
||||||
|
Keep here:
|
||||||
|
|
||||||
|
- category/subcategory/item selection;
|
||||||
|
- seed axis routing;
|
||||||
|
- character slot/profile resolution;
|
||||||
|
- scene/expression/composition pool selection;
|
||||||
|
- role graph creation from structured category axes;
|
||||||
|
- metadata row construction.
|
||||||
|
|
||||||
|
Move or isolate later:
|
||||||
|
|
||||||
|
- role graph generation for hardcore interaction categories into a dedicated
|
||||||
|
module, for example `hardcore_role_graphs.py`;
|
||||||
|
- camera-scene adapters into `scene_camera_adapters.py`;
|
||||||
|
- category-library loading and inheritance helpers into `category_library.py`.
|
||||||
|
|
||||||
|
### Pair / Adapter Layer
|
||||||
|
|
||||||
|
Owner today: `build_insta_of_pair`.
|
||||||
|
|
||||||
|
Keep here:
|
||||||
|
|
||||||
|
- soft/hard row creation;
|
||||||
|
- continuity policy;
|
||||||
|
- softcore cast policy;
|
||||||
|
- pair-level camera routing;
|
||||||
|
- pair metadata shape.
|
||||||
|
|
||||||
|
Improve later:
|
||||||
|
|
||||||
|
- make a single pair metadata sanitizer that normalizes `softcore_row`,
|
||||||
|
`hardcore_row`, pair prompts, negatives, captions, and camera fields;
|
||||||
|
- split pair assembly into small functions by phase:
|
||||||
|
`build_soft_row`, `build_hard_row`, `resolve_pair_camera`,
|
||||||
|
`resolve_pair_clothing`, `assemble_pair_metadata`.
|
||||||
|
|
||||||
|
### Krea2 Formatter Path
|
||||||
|
|
||||||
|
Owner: `krea_formatter.py`.
|
||||||
|
|
||||||
|
Keep here:
|
||||||
|
|
||||||
|
- Krea prose style;
|
||||||
|
- cast prose;
|
||||||
|
- hardcore action sentence rewriting;
|
||||||
|
- POV sentence rewriting;
|
||||||
|
- clothing naturalization;
|
||||||
|
- camera-scene preservation;
|
||||||
|
- fallback text parsing.
|
||||||
|
|
||||||
|
Improve later:
|
||||||
|
|
||||||
|
- split semantic blocks into modules:
|
||||||
|
`krea_cast.py`, `krea_actions.py`, `krea_pov.py`, `krea_clothing.py`;
|
||||||
|
- add route-level smoke fixtures for representative metadata rows;
|
||||||
|
- make `_hardcore_action_sentence` dispatch by action family instead of long
|
||||||
|
conditional chains.
|
||||||
|
|
||||||
|
### SDXL Formatter Path
|
||||||
|
|
||||||
|
Owner: `sdxl_formatter.py`.
|
||||||
|
|
||||||
|
Keep here:
|
||||||
|
|
||||||
|
- trigger behavior;
|
||||||
|
- style and quality presets;
|
||||||
|
- tag ordering;
|
||||||
|
- weighted explicit tags;
|
||||||
|
- negative-prompt assembly.
|
||||||
|
|
||||||
|
Improve later:
|
||||||
|
|
||||||
|
- move presets into data dictionaries or JSON so adding styles does not require
|
||||||
|
editing formatter logic;
|
||||||
|
- add formatter profiles for Pony, SDXL photo, and flat vector;
|
||||||
|
- make fallback cleanup use the shared field-label inventory.
|
||||||
|
|
||||||
|
### Naturalizer Path
|
||||||
|
|
||||||
|
Owner: `caption_naturalizer.py`.
|
||||||
|
|
||||||
|
Keep here:
|
||||||
|
|
||||||
|
- natural sentence caption assembly;
|
||||||
|
- training-caption trigger behavior;
|
||||||
|
- style-tail policy.
|
||||||
|
|
||||||
|
Improve later:
|
||||||
|
|
||||||
|
- share more metadata readers with Krea without sharing Krea prose;
|
||||||
|
- add a `caption_profile` option for concise/dense LoRA caption styles.
|
||||||
|
|
||||||
|
### Category JSON Path
|
||||||
|
|
||||||
|
Owner: `categories/*.json`.
|
||||||
|
|
||||||
|
Keep here:
|
||||||
|
|
||||||
|
- scalable prompt pool content;
|
||||||
|
- named scene/expression/composition pools;
|
||||||
|
- item templates and axes;
|
||||||
|
- direct category-specific wording.
|
||||||
|
|
||||||
|
Improve later:
|
||||||
|
|
||||||
|
- introduce optional `family` and `action_type` fields on item templates so
|
||||||
|
Python filters do less keyword guessing;
|
||||||
|
- add `formatter_hint` fields only where needed, not globally;
|
||||||
|
- add a JSON audit that checks every referenced expression/composition/scene pool
|
||||||
|
exists.
|
||||||
|
|
||||||
|
### Node / UI Path
|
||||||
|
|
||||||
|
Owner: `__init__.py`, `loop_nodes.py`, `web/*.js`.
|
||||||
|
|
||||||
|
Keep here:
|
||||||
|
|
||||||
|
- ComfyUI node input/output declarations;
|
||||||
|
- widget behavior;
|
||||||
|
- button actions;
|
||||||
|
- dynamic input slots.
|
||||||
|
|
||||||
|
Improve later:
|
||||||
|
|
||||||
|
- split large node classes into files by family;
|
||||||
|
- keep node display names, return names, and docs in sync through the audit
|
||||||
|
helper;
|
||||||
|
- add small endpoint tests for profile/accumulator/index-switch routes.
|
||||||
|
|
||||||
|
## Path-Specific Improvements
|
||||||
|
|
||||||
|
### Prompt Builder
|
||||||
|
|
||||||
|
Near-term:
|
||||||
|
|
||||||
|
- Add final row hygiene already done through `prompt_hygiene.py`.
|
||||||
|
- Add a metadata invariant checker for rows before return.
|
||||||
|
- Normalize every row with one function before JSON serialization.
|
||||||
|
|
||||||
|
Medium-term:
|
||||||
|
|
||||||
|
- Extract category loading and role graph logic.
|
||||||
|
- Convert keyword-heavy interaction filtering to template metadata.
|
||||||
|
|
||||||
|
### Insta/OF Pair
|
||||||
|
|
||||||
|
Near-term:
|
||||||
|
|
||||||
|
- Normalize pair metadata with one helper.
|
||||||
|
- Confirm pair prompts, captions, and soft/hard rows carry the same sanitized
|
||||||
|
scene/camera/clothing fields.
|
||||||
|
|
||||||
|
Medium-term:
|
||||||
|
|
||||||
|
- Make pair camera and clothing phases explicit subfunctions.
|
||||||
|
- Add smoke fixtures for same-cast, POV man, explicit nude, and different-camera
|
||||||
|
modes.
|
||||||
|
|
||||||
|
### Krea2
|
||||||
|
|
||||||
|
Near-term:
|
||||||
|
|
||||||
|
- Add final prose hygiene already done through `prompt_hygiene.py`.
|
||||||
|
- Add tests for close foreplay, POV oral, POV penetration, aftercare, manual
|
||||||
|
stimulation, and camera-scene preservation.
|
||||||
|
|
||||||
|
Medium-term:
|
||||||
|
|
||||||
|
- Dispatch action rewriting by action family.
|
||||||
|
- Split Krea semantic helpers into smaller modules.
|
||||||
|
|
||||||
|
### SDXL
|
||||||
|
|
||||||
|
Near-term:
|
||||||
|
|
||||||
|
- Add final tag hygiene already done through `prompt_hygiene.py`.
|
||||||
|
- Add smoke tests for trigger preservation and duplicate tag removal.
|
||||||
|
|
||||||
|
Medium-term:
|
||||||
|
|
||||||
|
- Make style/quality presets data-driven.
|
||||||
|
|
||||||
|
### Naturalizer
|
||||||
|
|
||||||
|
Near-term:
|
||||||
|
|
||||||
|
- Add final prose hygiene already done through `prompt_hygiene.py`.
|
||||||
|
- Verify training captions keep trigger exactly once.
|
||||||
|
|
||||||
|
Medium-term:
|
||||||
|
|
||||||
|
- Add caption profiles for training and browsing use cases.
|
||||||
|
|
||||||
|
### Camera / Scene
|
||||||
|
|
||||||
|
Near-term:
|
||||||
|
|
||||||
|
- Keep Qwen/orbit as camera source.
|
||||||
|
- Keep scene-camera adapters scoped by location family.
|
||||||
|
- Use the memory note in
|
||||||
|
`/home/ethanfel/.codex/memories/scene-camera-system.md` when editing POV.
|
||||||
|
|
||||||
|
Medium-term:
|
||||||
|
|
||||||
|
- Move coworking adapter into a scene-camera adapter module.
|
||||||
|
- Build new adapters one location family at a time.
|
||||||
|
|
||||||
|
## Invariants To Preserve
|
||||||
|
|
||||||
|
- Metadata is the preferred formatter input.
|
||||||
|
- Prompt Builder should output structured rows even if raw prompt text is rough.
|
||||||
|
- Krea should fix prose and semantic action readability, not category selection.
|
||||||
|
- SDXL should produce tag-style output and preserve model triggers as requested.
|
||||||
|
- Naturalizer should output training-friendly captions without changing the
|
||||||
|
selected content.
|
||||||
|
- Generic cleanup belongs in `prompt_hygiene.py`; semantic cleanup belongs in
|
||||||
|
the owning route.
|
||||||
|
|
||||||
|
## Recommended Next Passes
|
||||||
|
|
||||||
|
1. Add metadata invariant checks and small smoke fixtures.
|
||||||
|
2. Split Krea action/POV/clothing helpers into separate modules.
|
||||||
|
3. Add category JSON pool reference validation to `tools/prompt_map_audit.py`.
|
||||||
|
4. Extract scene-camera adapters from `prompt_builder.py`.
|
||||||
|
5. Split `__init__.py` node classes by family after behavior is covered by smoke
|
||||||
|
checks.
|
||||||
@@ -605,6 +605,25 @@ Naturalizer field consumption:
|
|||||||
| Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity | `_insta_pair_from_row` |
|
| Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity | `_insta_pair_from_row` |
|
||||||
| Text fallback | `caption` or `prompt` text | `_text_to_prose` |
|
| Text fallback | `caption` or `prompt` text | `_text_to_prose` |
|
||||||
|
|
||||||
|
### Final Text Hygiene
|
||||||
|
|
||||||
|
`prompt_hygiene.py` owns route-agnostic final cleanup. It is intentionally
|
||||||
|
small: whitespace, punctuation, empty field labels, adjacent duplicate
|
||||||
|
sentences, repeated trigger prefixes, duplicate comma-list items, and dangling
|
||||||
|
connectors.
|
||||||
|
|
||||||
|
It is called from:
|
||||||
|
|
||||||
|
- `prompt_builder.build_prompt`
|
||||||
|
- `prompt_builder.build_insta_of_pair`
|
||||||
|
- `krea_formatter.format_krea2_prompt`
|
||||||
|
- `sdxl_formatter.format_sdxl_prompt`
|
||||||
|
- `caption_naturalizer.naturalize_caption`
|
||||||
|
|
||||||
|
Do not put semantic fixes in `prompt_hygiene.py`. Sexual action readability,
|
||||||
|
POV geometry, clothing state, Krea prose, SDXL weighting, and training-caption
|
||||||
|
policy still belong to their route-specific owner.
|
||||||
|
|
||||||
## Utility / Workflow Nodes
|
## Utility / Workflow Nodes
|
||||||
|
|
||||||
These do not own prompt pool wording, but they affect execution and review:
|
These do not own prompt pool wording, but they affect execution and review:
|
||||||
@@ -616,6 +635,7 @@ These do not own prompt pool wording, but they affect execution and review:
|
|||||||
| Accumulator | `loop_nodes.py`, `web/accumulator_preview.js` | Stores generated values/images during workflow execution and previews/reorders/deletes them. |
|
| Accumulator | `loop_nodes.py`, `web/accumulator_preview.js` | Stores generated values/images during workflow execution and previews/reorders/deletes them. |
|
||||||
| Persistent text preview | `loop_nodes.py`, `web/preview_any_text.js` | Stores any value as text and keeps it after workflow reload. |
|
| Persistent text preview | `loop_nodes.py`, `web/preview_any_text.js` | Stores any value as text and keeps it after workflow reload. |
|
||||||
| SDXL bucket size | `SxCPSDXLBucketSize` in `__init__.py` | Random/fixed SDXL bucket width and height selection. |
|
| SDXL bucket size | `SxCPSDXLBucketSize` in `__init__.py` | Random/fixed SDXL bucket width and height selection. |
|
||||||
|
| Krea2 resolution selector | `SxCPKrea2ResolutionSelector` in `__init__.py` | Krea-compatible width/height and API aspect/resolution helper. |
|
||||||
|
|
||||||
## Drift Audit Helper
|
## Drift Audit Helper
|
||||||
|
|
||||||
@@ -655,6 +675,7 @@ or pool appears there but not in this map, update the relevant route table.
|
|||||||
| Camera prompt missing from Krea2 | Row `camera_directive` / `camera_scene_directive`, then Krea `_camera_phrase`. |
|
| Camera prompt missing from Krea2 | Row `camera_directive` / `camera_scene_directive`, then Krea `_camera_phrase`. |
|
||||||
| Trigger missing in Krea2 fallback | `format_krea2_prompt` preserve-trigger fallback behavior. |
|
| Trigger missing in Krea2 fallback | `format_krea2_prompt` preserve-trigger fallback behavior. |
|
||||||
| SDXL tags too weak/wrong style | `sdxl_formatter.py` presets and `_row_core_tags` / `_soft_tags` / `_hard_tags`. |
|
| SDXL tags too weak/wrong style | `sdxl_formatter.py` presets and `_row_core_tags` / `_soft_tags` / `_hard_tags`. |
|
||||||
|
| Duplicate punctuation, empty labels, repeated trigger, repeated tag item | `prompt_hygiene.py`, then the route-specific formatter if the repeated content is semantic. |
|
||||||
| Saved profile does not match liked character | Profile save/load path and whether the saved input is row metadata or regenerated slot config. |
|
| Saved profile does not match liked character | Profile save/load path and whether the saved input is row metadata or regenerated slot config. |
|
||||||
| Accumulator preview behavior wrong | `loop_nodes.py` accumulator methods and `web/accumulator_preview.js`. |
|
| Accumulator preview behavior wrong | `loop_nodes.py` accumulator methods and `web/accumulator_preview.js`. |
|
||||||
|
|
||||||
|
|||||||
+14
-7
@@ -4,6 +4,11 @@ import json
|
|||||||
import re
|
import re
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
try:
|
||||||
|
from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text
|
||||||
|
except ImportError: # Allows local smoke tests with `python -c`.
|
||||||
|
from prompt_hygiene import sanitize_negative_text, sanitize_prose_text
|
||||||
|
|
||||||
|
|
||||||
TRIGGER_CANDIDATES = (
|
TRIGGER_CANDIDATES = (
|
||||||
"sxcpinup_coloredpencil",
|
"sxcpinup_coloredpencil",
|
||||||
@@ -2678,20 +2683,21 @@ def format_krea2_prompt(
|
|||||||
|
|
||||||
if row and row.get("mode") == "Insta/OF":
|
if row and row.get("mode") == "Insta/OF":
|
||||||
soft_prompt, soft_negative, hard_prompt, hard_negative = _insta_pair_to_krea(row, detail_level, style_mode)
|
soft_prompt, soft_negative, hard_prompt, hard_negative = _insta_pair_to_krea(row, detail_level, style_mode)
|
||||||
selected = hard_prompt if target == "hardcore" else soft_prompt if target == "softcore" else soft_prompt
|
|
||||||
selected_negative = hard_negative if target == "hardcore" else soft_negative
|
|
||||||
if extra_positive.strip():
|
if extra_positive.strip():
|
||||||
selected = f"{selected.rstrip()} {extra_positive.strip()}"
|
|
||||||
soft_prompt = f"{soft_prompt.rstrip()} {extra_positive.strip()}"
|
soft_prompt = f"{soft_prompt.rstrip()} {extra_positive.strip()}"
|
||||||
hard_prompt = f"{hard_prompt.rstrip()} {extra_positive.strip()}"
|
hard_prompt = f"{hard_prompt.rstrip()} {extra_positive.strip()}"
|
||||||
negative = _combine_negative(selected_negative, negative_prompt, extra_negative)
|
soft_prompt = sanitize_prose_text(soft_prompt, triggers=TRIGGER_CANDIDATES)
|
||||||
|
hard_prompt = sanitize_prose_text(hard_prompt, triggers=TRIGGER_CANDIDATES)
|
||||||
|
selected = hard_prompt if target == "hardcore" else soft_prompt if target == "softcore" else soft_prompt
|
||||||
|
selected_negative = hard_negative if target == "hardcore" else soft_negative
|
||||||
|
negative = sanitize_negative_text(_combine_negative(selected_negative, negative_prompt, extra_negative))
|
||||||
return {
|
return {
|
||||||
"krea_prompt": selected,
|
"krea_prompt": selected,
|
||||||
"negative_prompt": negative,
|
"negative_prompt": negative,
|
||||||
"krea_softcore_prompt": soft_prompt,
|
"krea_softcore_prompt": soft_prompt,
|
||||||
"krea_hardcore_prompt": hard_prompt,
|
"krea_hardcore_prompt": hard_prompt,
|
||||||
"softcore_negative_prompt": _combine_negative(soft_negative, extra_negative),
|
"softcore_negative_prompt": sanitize_negative_text(_combine_negative(soft_negative, extra_negative)),
|
||||||
"hardcore_negative_prompt": _combine_negative(hard_negative, extra_negative),
|
"hardcore_negative_prompt": sanitize_negative_text(_combine_negative(hard_negative, extra_negative)),
|
||||||
"method": f"{method}:krea2(insta_of_pair)",
|
"method": f"{method}:krea2(insta_of_pair)",
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2704,7 +2710,8 @@ def format_krea2_prompt(
|
|||||||
|
|
||||||
if extra_positive.strip():
|
if extra_positive.strip():
|
||||||
prompt = f"{prompt.rstrip()} {extra_positive.strip()}"
|
prompt = f"{prompt.rstrip()} {extra_positive.strip()}"
|
||||||
negative = _combine_negative(extracted_negative, negative_prompt, extra_negative)
|
prompt = sanitize_prose_text(prompt, triggers=TRIGGER_CANDIDATES)
|
||||||
|
negative = sanitize_negative_text(_combine_negative(extracted_negative, negative_prompt, extra_negative))
|
||||||
return {
|
return {
|
||||||
"krea_prompt": prompt,
|
"krea_prompt": prompt,
|
||||||
"negative_prompt": negative,
|
"negative_prompt": negative,
|
||||||
|
|||||||
+27
-5
@@ -10,8 +10,18 @@ from typing import Any, Callable
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
from . import generate_prompt_batches as g
|
from . import generate_prompt_batches as g
|
||||||
|
from .prompt_hygiene import (
|
||||||
|
sanitize_caption_text,
|
||||||
|
sanitize_negative_text,
|
||||||
|
sanitize_prompt_text,
|
||||||
|
)
|
||||||
except ImportError: # Allows local smoke tests with `python -c`.
|
except ImportError: # Allows local smoke tests with `python -c`.
|
||||||
import generate_prompt_batches as g
|
import generate_prompt_batches as g
|
||||||
|
from prompt_hygiene import (
|
||||||
|
sanitize_caption_text,
|
||||||
|
sanitize_negative_text,
|
||||||
|
sanitize_prompt_text,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
ROOT_DIR = Path(__file__).resolve().parent
|
ROOT_DIR = Path(__file__).resolve().parent
|
||||||
@@ -7609,7 +7619,11 @@ def build_prompt(
|
|||||||
row = _apply_camera_config(row, camera_config)
|
row = _apply_camera_config(row, camera_config)
|
||||||
active_trigger = trigger.strip() or g.TRIGGER
|
active_trigger = trigger.strip() or g.TRIGGER
|
||||||
row["prompt"] = _prepend_trigger(row["prompt"], active_trigger, bool(prepend_trigger_to_prompt))
|
row["prompt"] = _prepend_trigger(row["prompt"], active_trigger, bool(prepend_trigger_to_prompt))
|
||||||
row["negative_prompt"] = _combined_negative(row.get("negative_prompt", g.NEGATIVE_PROMPT), extra_negative)
|
row["prompt"] = sanitize_prompt_text(row["prompt"], triggers=(active_trigger,))
|
||||||
|
row["caption"] = sanitize_caption_text(row.get("caption", ""), triggers=(active_trigger,))
|
||||||
|
row["negative_prompt"] = sanitize_negative_text(
|
||||||
|
_combined_negative(row.get("negative_prompt", g.NEGATIVE_PROMPT), extra_negative)
|
||||||
|
)
|
||||||
row["trigger"] = active_trigger
|
row["trigger"] = active_trigger
|
||||||
row.setdefault("expression_intensity", expression_intensity)
|
row.setdefault("expression_intensity", expression_intensity)
|
||||||
row.setdefault("expression_intensity_source", expression_intensity_source)
|
row.setdefault("expression_intensity_source", expression_intensity_source)
|
||||||
@@ -8794,8 +8808,10 @@ def build_insta_of_pair(
|
|||||||
|
|
||||||
soft_prompt = _insta_of_active_trigger(soft_prompt, active_trigger, bool(prepend_trigger_to_prompt))
|
soft_prompt = _insta_of_active_trigger(soft_prompt, active_trigger, bool(prepend_trigger_to_prompt))
|
||||||
hard_prompt = _insta_of_active_trigger(hard_prompt, active_trigger, bool(prepend_trigger_to_prompt))
|
hard_prompt = _insta_of_active_trigger(hard_prompt, active_trigger, bool(prepend_trigger_to_prompt))
|
||||||
soft_negative = _combined_negative(INSTA_OF_SOFT_NEGATIVE, extra_negative)
|
soft_prompt = sanitize_prompt_text(soft_prompt, triggers=(active_trigger,))
|
||||||
hard_negative = _combined_negative(INSTA_OF_NEGATIVE, extra_negative)
|
hard_prompt = sanitize_prompt_text(hard_prompt, triggers=(active_trigger,))
|
||||||
|
soft_negative = sanitize_negative_text(_combined_negative(INSTA_OF_SOFT_NEGATIVE, extra_negative))
|
||||||
|
hard_negative = sanitize_negative_text(_combined_negative(INSTA_OF_NEGATIVE, extra_negative))
|
||||||
soft_caption_parts = [
|
soft_caption_parts = [
|
||||||
active_trigger,
|
active_trigger,
|
||||||
"Insta/OF softcore mode",
|
"Insta/OF softcore mode",
|
||||||
@@ -8810,7 +8826,10 @@ def build_insta_of_pair(
|
|||||||
soft_row["composition"],
|
soft_row["composition"],
|
||||||
_camera_caption_text(soft_camera_config) if soft_camera_directive else "",
|
_camera_caption_text(soft_camera_config) if soft_camera_directive else "",
|
||||||
]
|
]
|
||||||
soft_caption = ", ".join(str(part).strip() for part in soft_caption_parts if str(part).strip())
|
soft_caption = sanitize_caption_text(
|
||||||
|
", ".join(str(part).strip() for part in soft_caption_parts if str(part).strip()),
|
||||||
|
triggers=(active_trigger,),
|
||||||
|
)
|
||||||
hard_caption_parts = [
|
hard_caption_parts = [
|
||||||
active_trigger,
|
active_trigger,
|
||||||
"Insta/OF hardcore mode",
|
"Insta/OF hardcore mode",
|
||||||
@@ -8824,7 +8843,10 @@ def build_insta_of_pair(
|
|||||||
hard_composition,
|
hard_composition,
|
||||||
_camera_caption_text(hard_camera_config) if hard_camera_directive else "",
|
_camera_caption_text(hard_camera_config) if hard_camera_directive else "",
|
||||||
]
|
]
|
||||||
hard_caption = ", ".join(str(part).strip() for part in hard_caption_parts if str(part).strip())
|
hard_caption = sanitize_caption_text(
|
||||||
|
", ".join(str(part).strip() for part in hard_caption_parts if str(part).strip()),
|
||||||
|
triggers=(active_trigger,),
|
||||||
|
)
|
||||||
metadata = {
|
metadata = {
|
||||||
"mode": "Insta/OF",
|
"mode": "Insta/OF",
|
||||||
"options": options,
|
"options": options,
|
||||||
|
|||||||
@@ -0,0 +1,169 @@
|
|||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import re
|
||||||
|
from typing import Any, Iterable
|
||||||
|
|
||||||
|
|
||||||
|
EMPTY_FIELD_LABELS = (
|
||||||
|
"Ages",
|
||||||
|
"Body types",
|
||||||
|
"Cast",
|
||||||
|
"Cast descriptors",
|
||||||
|
"Characters",
|
||||||
|
"Scene",
|
||||||
|
"Setting",
|
||||||
|
"Pose",
|
||||||
|
"Sexual pose",
|
||||||
|
"Sexual scene",
|
||||||
|
"Facial expression",
|
||||||
|
"Facial expressions",
|
||||||
|
"Clothing",
|
||||||
|
"Erotic outfit",
|
||||||
|
"Prop/detail",
|
||||||
|
"Composition",
|
||||||
|
"Role graph",
|
||||||
|
"Camera",
|
||||||
|
"Camera control",
|
||||||
|
"Camera priority",
|
||||||
|
"Use",
|
||||||
|
"Avoid",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def clean_spacing(value: Any) -> str:
|
||||||
|
text = "" if value is None else str(value)
|
||||||
|
text = text.replace("\n", " ")
|
||||||
|
text = re.sub(r"\s+", " ", text).strip()
|
||||||
|
text = re.sub(r"\s+([,.;:])", r"\1", text)
|
||||||
|
text = re.sub(r"([,;:]){2,}", r"\1", text)
|
||||||
|
text = re.sub(r"\.\s*\.", ".", text)
|
||||||
|
text = re.sub(r",\s*\.", ".", text)
|
||||||
|
text = re.sub(r":\s*\.", ".", text)
|
||||||
|
text = re.sub(r";\s*\.", ".", text)
|
||||||
|
text = re.sub(r"\(\s+", "(", text)
|
||||||
|
text = re.sub(r"\s+\)", ")", text)
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _strip_empty_fields(text: str) -> str:
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
labels = "|".join(re.escape(label) for label in EMPTY_FIELD_LABELS)
|
||||||
|
text = re.sub(rf"\b(?:{labels})\s*:\s*[.,;]", "", text, flags=re.IGNORECASE)
|
||||||
|
text = re.sub(rf"\b(?:{labels}):\s*(?=\.|,|;|$)", "", text, flags=re.IGNORECASE)
|
||||||
|
text = re.sub(rf"\b(?:{labels})\.(?=\s|$)", "", text, flags=re.IGNORECASE)
|
||||||
|
text = re.sub(rf"\b(?:{labels}):\s*(?:none|null|n/a)\b[.,;]?", "", text, flags=re.IGNORECASE)
|
||||||
|
return clean_spacing(text)
|
||||||
|
|
||||||
|
|
||||||
|
def _drop_dangling_connectors(text: str) -> str:
|
||||||
|
text = re.sub(r"\b(?:with|and|or|while|featuring)\s*([,.;])", r"\1", text, flags=re.IGNORECASE)
|
||||||
|
text = re.sub(r"([,.;])\s*(?:with|and|or|while|featuring)\s*([,.;])", r"\1", text, flags=re.IGNORECASE)
|
||||||
|
text = re.sub(r"\bwith\s*,", "", text, flags=re.IGNORECASE)
|
||||||
|
text = re.sub(r",\s*and\s*\.", ".", text, flags=re.IGNORECASE)
|
||||||
|
return clean_spacing(text)
|
||||||
|
|
||||||
|
|
||||||
|
def _sentence_key(text: str, triggers: Iterable[str] = ()) -> str:
|
||||||
|
key_text = text
|
||||||
|
for trigger in triggers:
|
||||||
|
trigger = str(trigger or "").strip()
|
||||||
|
if trigger:
|
||||||
|
key_text = re.sub(rf"^{re.escape(trigger)}\s*[,.;]\s*", "", key_text, flags=re.IGNORECASE)
|
||||||
|
return re.sub(r"\W+", " ", key_text.lower()).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _dedupe_adjacent_sentences(text: str, triggers: Iterable[str] = ()) -> str:
|
||||||
|
parts = [part.strip() for part in re.split(r"(?<=[.!?])\s+", text) if part.strip()]
|
||||||
|
deduped: list[str] = []
|
||||||
|
previous = ""
|
||||||
|
for part in parts:
|
||||||
|
key = _sentence_key(part, triggers)
|
||||||
|
if key and key != previous:
|
||||||
|
deduped.append(part)
|
||||||
|
previous = key
|
||||||
|
return " ".join(deduped)
|
||||||
|
|
||||||
|
|
||||||
|
def _dedupe_labeled_sentences(text: str) -> str:
|
||||||
|
parts = [part.strip() for part in re.split(r"(?<=[.!?])\s+", text) if part.strip()]
|
||||||
|
seen: set[tuple[str, str]] = set()
|
||||||
|
deduped: list[str] = []
|
||||||
|
for part in parts:
|
||||||
|
match = re.match(r"^([A-Za-z][A-Za-z /_-]{1,40}):\s*(.+)$", part)
|
||||||
|
if not match:
|
||||||
|
deduped.append(part)
|
||||||
|
continue
|
||||||
|
key = (match.group(1).strip().lower(), re.sub(r"\W+", " ", match.group(2).lower()).strip())
|
||||||
|
if key not in seen:
|
||||||
|
deduped.append(part)
|
||||||
|
seen.add(key)
|
||||||
|
return " ".join(deduped)
|
||||||
|
|
||||||
|
|
||||||
|
def _trigger_prefix_key(text: str, triggers: Iterable[str]) -> str:
|
||||||
|
lowered = text.lower().strip()
|
||||||
|
for trigger in triggers:
|
||||||
|
trigger = str(trigger or "").strip()
|
||||||
|
if trigger and lowered.startswith(trigger.lower()):
|
||||||
|
return trigger
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _dedupe_trigger_prefix(text: str, triggers: Iterable[str]) -> str:
|
||||||
|
text = clean_spacing(text)
|
||||||
|
trigger = _trigger_prefix_key(text, triggers)
|
||||||
|
if not trigger:
|
||||||
|
return text
|
||||||
|
pattern = rf"^(?:{re.escape(trigger)}\s*[,.;]\s*)+"
|
||||||
|
return f"{trigger}, {re.sub(pattern, '', text, flags=re.IGNORECASE).strip(' ,.;')}"
|
||||||
|
|
||||||
|
|
||||||
|
def _split_comma_items(text: str) -> list[str]:
|
||||||
|
return [part.strip(" ,.;") for part in re.split(r"\s*[,;]\s*", clean_spacing(text)) if part.strip(" ,.;")]
|
||||||
|
|
||||||
|
|
||||||
|
def dedupe_comma_list(text: Any) -> str:
|
||||||
|
items: list[str] = []
|
||||||
|
seen: set[str] = set()
|
||||||
|
for item in _split_comma_items(str(text or "")):
|
||||||
|
key = re.sub(r"\W+", " ", item.lower()).strip()
|
||||||
|
if key and key not in seen:
|
||||||
|
items.append(item)
|
||||||
|
seen.add(key)
|
||||||
|
return ", ".join(items)
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_prose_text(value: Any, triggers: Iterable[str] = ()) -> str:
|
||||||
|
text = clean_spacing(value)
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
text = _strip_empty_fields(text)
|
||||||
|
text = _drop_dangling_connectors(text)
|
||||||
|
text = _dedupe_labeled_sentences(text)
|
||||||
|
text = _dedupe_trigger_prefix(text, triggers)
|
||||||
|
text = _dedupe_adjacent_sentences(text, triggers)
|
||||||
|
return clean_spacing(text).strip(" ,;")
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_prompt_text(value: Any, triggers: Iterable[str] = ()) -> str:
|
||||||
|
return sanitize_prose_text(value, triggers=triggers)
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_caption_text(value: Any, triggers: Iterable[str] = ()) -> str:
|
||||||
|
return sanitize_prose_text(value, triggers=triggers)
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_tag_prompt(value: Any, triggers: Iterable[str] = ()) -> str:
|
||||||
|
text = clean_spacing(value)
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
trigger = _trigger_prefix_key(text, triggers)
|
||||||
|
if trigger:
|
||||||
|
text = re.sub(rf"^(?:{re.escape(trigger)}\s*[,;]\s*)+", "", text, flags=re.IGNORECASE).strip(" ,;")
|
||||||
|
return f"{trigger}, {dedupe_comma_list(text)}" if text else trigger
|
||||||
|
return dedupe_comma_list(text)
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_negative_text(value: Any) -> str:
|
||||||
|
return dedupe_comma_list(value)
|
||||||
+24
-6
@@ -4,6 +4,11 @@ import json
|
|||||||
import re
|
import re
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
|
try:
|
||||||
|
from .prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
|
||||||
|
except ImportError: # Allows local smoke tests with `python -c`.
|
||||||
|
from prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
|
||||||
|
|
||||||
|
|
||||||
TRIGGER_CANDIDATES = (
|
TRIGGER_CANDIDATES = (
|
||||||
"sxcpinup_coloredpencil",
|
"sxcpinup_coloredpencil",
|
||||||
@@ -432,11 +437,14 @@ def _assemble_prompt(
|
|||||||
custom_quality: str,
|
custom_quality: str,
|
||||||
extra_positive: str,
|
extra_positive: str,
|
||||||
) -> str:
|
) -> str:
|
||||||
return _combine_tags(
|
return sanitize_tag_prompt(
|
||||||
|
_combine_tags(
|
||||||
_style_prefix(style_preset, trigger, prepend_trigger, custom_style),
|
_style_prefix(style_preset, trigger, prepend_trigger, custom_style),
|
||||||
body_tags,
|
body_tags,
|
||||||
_quality_tail(quality_preset, custom_quality),
|
_quality_tail(quality_preset, custom_quality),
|
||||||
extra_positive,
|
extra_positive,
|
||||||
|
),
|
||||||
|
triggers=(trigger,),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -504,14 +512,22 @@ def format_sdxl_prompt(
|
|||||||
extra_positive,
|
extra_positive,
|
||||||
)
|
)
|
||||||
selected = hard_prompt if target == "hardcore" else soft_prompt
|
selected = hard_prompt if target == "hardcore" else soft_prompt
|
||||||
selected_negative = row.get("hardcore_negative_prompt") if target == "hardcore" else row.get("softcore_negative_prompt")
|
selected_negative = (
|
||||||
|
row.get("hardcore_negative_prompt") if target == "hardcore" else row.get("softcore_negative_prompt")
|
||||||
|
)
|
||||||
return {
|
return {
|
||||||
"sdxl_prompt": selected,
|
"sdxl_prompt": selected,
|
||||||
"negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, selected_negative, negative_prompt, extra_negative),
|
"negative_prompt": sanitize_negative_text(
|
||||||
|
_combine_negative(SDXL_DEFAULT_NEGATIVE, selected_negative, negative_prompt, extra_negative)
|
||||||
|
),
|
||||||
"sdxl_softcore_prompt": soft_prompt,
|
"sdxl_softcore_prompt": soft_prompt,
|
||||||
"sdxl_hardcore_prompt": hard_prompt,
|
"sdxl_hardcore_prompt": hard_prompt,
|
||||||
"softcore_negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("softcore_negative_prompt"), extra_negative),
|
"softcore_negative_prompt": sanitize_negative_text(
|
||||||
"hardcore_negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("hardcore_negative_prompt"), extra_negative),
|
_combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("softcore_negative_prompt"), extra_negative)
|
||||||
|
),
|
||||||
|
"hardcore_negative_prompt": sanitize_negative_text(
|
||||||
|
_combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("hardcore_negative_prompt"), extra_negative)
|
||||||
|
),
|
||||||
"method": f"{method}:sdxl(insta_of_pair)",
|
"method": f"{method}:sdxl(insta_of_pair)",
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -534,7 +550,9 @@ def format_sdxl_prompt(
|
|||||||
)
|
)
|
||||||
return {
|
return {
|
||||||
"sdxl_prompt": prompt,
|
"sdxl_prompt": prompt,
|
||||||
"negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, extracted_negative, negative_prompt, extra_negative),
|
"negative_prompt": sanitize_negative_text(
|
||||||
|
_combine_negative(SDXL_DEFAULT_NEGATIVE, extracted_negative, negative_prompt, extra_negative)
|
||||||
|
),
|
||||||
"sdxl_softcore_prompt": "",
|
"sdxl_softcore_prompt": "",
|
||||||
"sdxl_hardcore_prompt": "",
|
"sdxl_hardcore_prompt": "",
|
||||||
"softcore_negative_prompt": "",
|
"softcore_negative_prompt": "",
|
||||||
|
|||||||
Reference in New Issue
Block a user