2 Commits

Author SHA1 Message Date
Ethanfel cff1a248ec Add prompt route smoke checks 2026-06-26 14:50:45 +02:00
Ethanfel b3cd8d77a1 Add prompt hygiene architecture pass 2026-06-26 13:26:06 +02:00
8 changed files with 998 additions and 25 deletions
+9 -2
View File
@@ -4,6 +4,11 @@ import json
import re import re
from typing import Any from typing import Any
try:
from .prompt_hygiene import sanitize_prose_text
except ImportError: # Allows local smoke tests with `python -c`.
from prompt_hygiene import sanitize_prose_text
OLD_TRIGGER = "sxcpinup_coloredpencil" OLD_TRIGGER = "sxcpinup_coloredpencil"
DEFAULT_TRIGGER = "sxcppnl7" DEFAULT_TRIGGER = "sxcppnl7"
@@ -724,6 +729,8 @@ def naturalize_caption(
row, row_method = _row_from_inputs(source_text, metadata_json, input_hint) row, row_method = _row_from_inputs(source_text, metadata_json, input_hint)
if row is not None: if row is not None:
prose, method = _metadata_to_prose(row, detail_level, keep_style) prose, method = _metadata_to_prose(row, detail_level, keep_style)
return _with_trigger(prose, trigger, include_trigger), f"{row_method}:{method}" caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,))
return caption, f"{row_method}:{method}"
prose, method = _text_to_prose(source_text, detail_level, keep_style) prose, method = _text_to_prose(source_text, detail_level, keep_style)
return _with_trigger(prose, trigger, include_trigger), method caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,))
return caption, method
@@ -0,0 +1,309 @@
# Prompt Architecture Improvement Plan
This is a working research note for organizing the prompt builder around the
routing map in `docs/prompt-pool-routing-map.md`.
## Current Branch Additions
The current branch adds two major surfaces:
- `SxCP Krea2 Resolution Selector` in `__init__.py`, with README notes.
- Expanded hardcore interaction/manual/action pools in
`categories/sexual_poses.json`,
`categories/expression_composition_pools.json`, `prompt_builder.py`, and
`krea_formatter.py`.
The map audit currently sees:
- 15 sexual pose subcategories.
- 94 sexual pose item templates.
- 23 expression pools.
- 24 composition pools.
- A new Krea2 resolution node with width/height/API aspect outputs.
## Architectural Finding
The project has a good functional map, but ownership is still mixed inside large
files:
- `prompt_builder.py` owns selection, character resolution, role graph logic,
camera adaptation, pair assembly, and some final string cleanup.
- `krea_formatter.py` owns metadata parsing, cast naturalization, sexual action
rewriting, POV rewriting, clothing cleanup, camera preservation, fallback
parsing, and final prose assembly.
- `sdxl_formatter.py` owns tag assembly and style/quality presets.
- `caption_naturalizer.py` owns training-caption prose.
- Category JSON files own scalable pool content, but Python still owns several
compatibility and role-graph decisions.
The biggest maintainability risk is not the number of pools. The risk is that
selection, semantic rewriting, and final text hygiene are too interleaved. When a
prompt has wrong text, it is easy to patch the wrong layer.
## First Refactor Boundary
Generic text hygiene now has one home:
- `prompt_hygiene.py`
It should only handle route-agnostic cleanup:
- whitespace and punctuation normalization;
- empty field-label removal;
- repeated trigger prefix cleanup;
- duplicate comma-list item removal;
- adjacent duplicate sentence cleanup;
- simple dangling connector cleanup.
It must not make semantic decisions such as sexual action positioning, POV
geometry, clothing state, or model-specific tag weighting. Those stay in the
route-specific owner.
Current integration points:
- `prompt_builder.build_prompt`
- `prompt_builder.build_insta_of_pair`
- `krea_formatter.format_krea2_prompt`
- `sdxl_formatter.format_sdxl_prompt`
- `caption_naturalizer.naturalize_caption`
## Target Organization
### Generation Layer
Owner: `prompt_builder.py` plus `categories/*.json`.
Keep here:
- category/subcategory/item selection;
- seed axis routing;
- character slot/profile resolution;
- scene/expression/composition pool selection;
- role graph creation from structured category axes;
- metadata row construction.
Move or isolate later:
- role graph generation for hardcore interaction categories into a dedicated
module, for example `hardcore_role_graphs.py`;
- camera-scene adapters into `scene_camera_adapters.py`;
- category-library loading and inheritance helpers into `category_library.py`.
### Pair / Adapter Layer
Owner today: `build_insta_of_pair`.
Keep here:
- soft/hard row creation;
- continuity policy;
- softcore cast policy;
- pair-level camera routing;
- pair metadata shape.
Improve later:
- make a single pair metadata sanitizer that normalizes `softcore_row`,
`hardcore_row`, pair prompts, negatives, captions, and camera fields;
- split pair assembly into small functions by phase:
`build_soft_row`, `build_hard_row`, `resolve_pair_camera`,
`resolve_pair_clothing`, `assemble_pair_metadata`.
### Krea2 Formatter Path
Owner: `krea_formatter.py`.
Keep here:
- Krea prose style;
- cast prose;
- hardcore action sentence rewriting;
- POV sentence rewriting;
- clothing naturalization;
- camera-scene preservation;
- fallback text parsing.
Improve later:
- split semantic blocks into modules:
`krea_cast.py`, `krea_actions.py`, `krea_pov.py`, `krea_clothing.py`;
- add route-level smoke fixtures for representative metadata rows;
- make `_hardcore_action_sentence` dispatch by action family instead of long
conditional chains.
### SDXL Formatter Path
Owner: `sdxl_formatter.py`.
Keep here:
- trigger behavior;
- style and quality presets;
- tag ordering;
- weighted explicit tags;
- negative-prompt assembly.
Improve later:
- move presets into data dictionaries or JSON so adding styles does not require
editing formatter logic;
- add formatter profiles for Pony, SDXL photo, and flat vector;
- make fallback cleanup use the shared field-label inventory.
### Naturalizer Path
Owner: `caption_naturalizer.py`.
Keep here:
- natural sentence caption assembly;
- training-caption trigger behavior;
- style-tail policy.
Improve later:
- share more metadata readers with Krea without sharing Krea prose;
- add a `caption_profile` option for concise/dense LoRA caption styles.
### Category JSON Path
Owner: `categories/*.json`.
Keep here:
- scalable prompt pool content;
- named scene/expression/composition pools;
- item templates and axes;
- direct category-specific wording.
Improve later:
- introduce optional `family` and `action_type` fields on item templates so
Python filters do less keyword guessing;
- add `formatter_hint` fields only where needed, not globally;
- add a JSON audit that checks every referenced expression/composition/scene pool
exists.
### Node / UI Path
Owner: `__init__.py`, `loop_nodes.py`, `web/*.js`.
Keep here:
- ComfyUI node input/output declarations;
- widget behavior;
- button actions;
- dynamic input slots.
Improve later:
- split large node classes into files by family;
- keep node display names, return names, and docs in sync through the audit
helper;
- add small endpoint tests for profile/accumulator/index-switch routes.
## Path-Specific Improvements
### Prompt Builder
Near-term:
- Add final row hygiene already done through `prompt_hygiene.py`.
- Add a metadata smoke checker for representative rows through
`tools/prompt_smoke.py`.
- Normalize every row with one function before JSON serialization.
Medium-term:
- Extract category loading and role graph logic.
- Convert keyword-heavy interaction filtering to template metadata.
### Insta/OF Pair
Near-term:
- Normalize pair metadata with one helper.
- Confirm pair prompts, captions, and soft/hard rows carry the same sanitized
scene/camera/clothing fields.
- Keep same-room pair continuity synchronized in both assembled prompt text and
`hardcore_row.scene_text`; `tools/prompt_smoke.py` covers this drift case.
Medium-term:
- Make pair camera and clothing phases explicit subfunctions.
- Add smoke fixtures for same-cast, POV man, explicit nude, and different-camera
modes.
### Krea2
Near-term:
- Add final prose hygiene already done through `prompt_hygiene.py`.
- Add smoke coverage through `tools/prompt_smoke.py` for metadata-driven Krea2
formatting across built-in rows, hardcore rows, same-cast pairs, and POV
pairs. Expand it next for close foreplay, POV penetration, and camera-scene
preservation.
Medium-term:
- Dispatch action rewriting by action family.
- Split Krea semantic helpers into smaller modules.
### SDXL
Near-term:
- Add final tag hygiene already done through `prompt_hygiene.py`.
- Add smoke tests for trigger preservation and duplicate tag removal through
`tools/prompt_smoke.py`.
Medium-term:
- Make style/quality presets data-driven.
### Naturalizer
Near-term:
- Add final prose hygiene already done through `prompt_hygiene.py`.
- Verify training captions keep trigger exactly once through
`tools/prompt_smoke.py`.
Medium-term:
- Add caption profiles for training and browsing use cases.
### Camera / Scene
Near-term:
- Keep Qwen/orbit as camera source.
- Keep scene-camera adapters scoped by location family.
- Use the memory note in
`/home/ethanfel/.codex/memories/scene-camera-system.md` when editing POV.
Medium-term:
- Move coworking adapter into a scene-camera adapter module.
- Build new adapters one location family at a time.
## Invariants To Preserve
- Metadata is the preferred formatter input.
- Prompt Builder should output structured rows even if raw prompt text is rough.
- Krea should fix prose and semantic action readability, not category selection.
- SDXL should produce tag-style output and preserve model triggers as requested.
- Naturalizer should output training-friendly captions without changing the
selected content.
- Generic cleanup belongs in `prompt_hygiene.py`; semantic cleanup belongs in
the owning route.
## Recommended Next Passes
1. Expand `tools/prompt_smoke.py` with camera-scene, explicit nude, and
different-camera pair fixtures.
2. Split Krea action/POV/clothing helpers into separate modules.
3. Add category JSON pool reference validation to `tools/prompt_map_audit.py`.
4. Extract scene-camera adapters from `prompt_builder.py`.
5. Split `__init__.py` node classes by family after behavior is covered by smoke
checks.
+43
View File
@@ -605,6 +605,25 @@ Naturalizer field consumption:
| Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity | `_insta_pair_from_row` | | Insta/OF pair | `softcore_row`, `hardcore_row`, pair options and continuity | `_insta_pair_from_row` |
| Text fallback | `caption` or `prompt` text | `_text_to_prose` | | Text fallback | `caption` or `prompt` text | `_text_to_prose` |
### Final Text Hygiene
`prompt_hygiene.py` owns route-agnostic final cleanup. It is intentionally
small: whitespace, punctuation, empty field labels, adjacent duplicate
sentences, repeated trigger prefixes, duplicate comma-list items, and dangling
connectors.
It is called from:
- `prompt_builder.build_prompt`
- `prompt_builder.build_insta_of_pair`
- `krea_formatter.format_krea2_prompt`
- `sdxl_formatter.format_sdxl_prompt`
- `caption_naturalizer.naturalize_caption`
Do not put semantic fixes in `prompt_hygiene.py`. Sexual action readability,
POV geometry, clothing state, Krea prose, SDXL weighting, and training-caption
policy still belong to their route-specific owner.
## Utility / Workflow Nodes ## Utility / Workflow Nodes
These do not own prompt pool wording, but they affect execution and review: These do not own prompt pool wording, but they affect execution and review:
@@ -616,6 +635,7 @@ These do not own prompt pool wording, but they affect execution and review:
| Accumulator | `loop_nodes.py`, `web/accumulator_preview.js` | Stores generated values/images during workflow execution and previews/reorders/deletes them. | | Accumulator | `loop_nodes.py`, `web/accumulator_preview.js` | Stores generated values/images during workflow execution and previews/reorders/deletes them. |
| Persistent text preview | `loop_nodes.py`, `web/preview_any_text.js` | Stores any value as text and keeps it after workflow reload. | | Persistent text preview | `loop_nodes.py`, `web/preview_any_text.js` | Stores any value as text and keeps it after workflow reload. |
| SDXL bucket size | `SxCPSDXLBucketSize` in `__init__.py` | Random/fixed SDXL bucket width and height selection. | | SDXL bucket size | `SxCPSDXLBucketSize` in `__init__.py` | Random/fixed SDXL bucket width and height selection. |
| Krea2 resolution selector | `SxCPKrea2ResolutionSelector` in `__init__.py` | Krea-compatible width/height and API aspect/resolution helper. |
## Drift Audit Helper ## Drift Audit Helper
@@ -635,6 +655,28 @@ The script does not import ComfyUI. It parses the repo and prints:
Use its output to spot doc drift after adding a new node or pool. If a new node Use its output to spot doc drift after adding a new node or pool. If a new node
or pool appears there but not in this map, update the relevant route table. or pool appears there but not in this map, update the relevant route table.
## Behavioral Smoke Helper
Route behavior should be checked when changing prompt generation, pair assembly,
formatter metadata parsing, trigger handling, expression disabling, or scene
continuity. Run:
```bash
python tools/prompt_smoke.py
```
The script does not import ComfyUI. It builds representative metadata rows and
pair metadata through the core Python APIs, then verifies:
- generated rows keep prompt, negative prompt, scene, composition, action item,
and role graph metadata populated;
- Krea2, SDXL, and natural caption routes use metadata instead of text fallback;
- SDXL and caption trigger handling keeps one trigger;
- negative prompts do not duplicate comma-list items;
- same-room Insta/OF continuity keeps prompt text and `hardcore_row.scene_text`
synchronized;
- expression-disabled rows do not fall back to generated expression text.
## Editing Cheatsheet ## Editing Cheatsheet
| Symptom | First file/function to inspect | | Symptom | First file/function to inspect |
@@ -655,6 +697,7 @@ or pool appears there but not in this map, update the relevant route table.
| Camera prompt missing from Krea2 | Row `camera_directive` / `camera_scene_directive`, then Krea `_camera_phrase`. | | Camera prompt missing from Krea2 | Row `camera_directive` / `camera_scene_directive`, then Krea `_camera_phrase`. |
| Trigger missing in Krea2 fallback | `format_krea2_prompt` preserve-trigger fallback behavior. | | Trigger missing in Krea2 fallback | `format_krea2_prompt` preserve-trigger fallback behavior. |
| SDXL tags too weak/wrong style | `sdxl_formatter.py` presets and `_row_core_tags` / `_soft_tags` / `_hard_tags`. | | SDXL tags too weak/wrong style | `sdxl_formatter.py` presets and `_row_core_tags` / `_soft_tags` / `_hard_tags`. |
| Duplicate punctuation, empty labels, repeated trigger, repeated tag item | `prompt_hygiene.py`, then the route-specific formatter if the repeated content is semantic. |
| Saved profile does not match liked character | Profile save/load path and whether the saved input is row metadata or regenerated slot config. | | Saved profile does not match liked character | Profile save/load path and whether the saved input is row metadata or regenerated slot config. |
| Accumulator preview behavior wrong | `loop_nodes.py` accumulator methods and `web/accumulator_preview.js`. | | Accumulator preview behavior wrong | `loop_nodes.py` accumulator methods and `web/accumulator_preview.js`. |
+14 -7
View File
@@ -4,6 +4,11 @@ import json
import re import re
from typing import Any from typing import Any
try:
from .prompt_hygiene import sanitize_negative_text, sanitize_prose_text
except ImportError: # Allows local smoke tests with `python -c`.
from prompt_hygiene import sanitize_negative_text, sanitize_prose_text
TRIGGER_CANDIDATES = ( TRIGGER_CANDIDATES = (
"sxcpinup_coloredpencil", "sxcpinup_coloredpencil",
@@ -2678,20 +2683,21 @@ def format_krea2_prompt(
if row and row.get("mode") == "Insta/OF": if row and row.get("mode") == "Insta/OF":
soft_prompt, soft_negative, hard_prompt, hard_negative = _insta_pair_to_krea(row, detail_level, style_mode) soft_prompt, soft_negative, hard_prompt, hard_negative = _insta_pair_to_krea(row, detail_level, style_mode)
selected = hard_prompt if target == "hardcore" else soft_prompt if target == "softcore" else soft_prompt
selected_negative = hard_negative if target == "hardcore" else soft_negative
if extra_positive.strip(): if extra_positive.strip():
selected = f"{selected.rstrip()} {extra_positive.strip()}"
soft_prompt = f"{soft_prompt.rstrip()} {extra_positive.strip()}" soft_prompt = f"{soft_prompt.rstrip()} {extra_positive.strip()}"
hard_prompt = f"{hard_prompt.rstrip()} {extra_positive.strip()}" hard_prompt = f"{hard_prompt.rstrip()} {extra_positive.strip()}"
negative = _combine_negative(selected_negative, negative_prompt, extra_negative) soft_prompt = sanitize_prose_text(soft_prompt, triggers=TRIGGER_CANDIDATES)
hard_prompt = sanitize_prose_text(hard_prompt, triggers=TRIGGER_CANDIDATES)
selected = hard_prompt if target == "hardcore" else soft_prompt if target == "softcore" else soft_prompt
selected_negative = hard_negative if target == "hardcore" else soft_negative
negative = sanitize_negative_text(_combine_negative(selected_negative, negative_prompt, extra_negative))
return { return {
"krea_prompt": selected, "krea_prompt": selected,
"negative_prompt": negative, "negative_prompt": negative,
"krea_softcore_prompt": soft_prompt, "krea_softcore_prompt": soft_prompt,
"krea_hardcore_prompt": hard_prompt, "krea_hardcore_prompt": hard_prompt,
"softcore_negative_prompt": _combine_negative(soft_negative, extra_negative), "softcore_negative_prompt": sanitize_negative_text(_combine_negative(soft_negative, extra_negative)),
"hardcore_negative_prompt": _combine_negative(hard_negative, extra_negative), "hardcore_negative_prompt": sanitize_negative_text(_combine_negative(hard_negative, extra_negative)),
"method": f"{method}:krea2(insta_of_pair)", "method": f"{method}:krea2(insta_of_pair)",
} }
@@ -2704,7 +2710,8 @@ def format_krea2_prompt(
if extra_positive.strip(): if extra_positive.strip():
prompt = f"{prompt.rstrip()} {extra_positive.strip()}" prompt = f"{prompt.rstrip()} {extra_positive.strip()}"
negative = _combine_negative(extracted_negative, negative_prompt, extra_negative) prompt = sanitize_prose_text(prompt, triggers=TRIGGER_CANDIDATES)
negative = sanitize_negative_text(_combine_negative(extracted_negative, negative_prompt, extra_negative))
return { return {
"krea_prompt": prompt, "krea_prompt": prompt,
"negative_prompt": negative, "negative_prompt": negative,
+31 -6
View File
@@ -10,8 +10,18 @@ from typing import Any, Callable
try: try:
from . import generate_prompt_batches as g from . import generate_prompt_batches as g
from .prompt_hygiene import (
sanitize_caption_text,
sanitize_negative_text,
sanitize_prompt_text,
)
except ImportError: # Allows local smoke tests with `python -c`. except ImportError: # Allows local smoke tests with `python -c`.
import generate_prompt_batches as g import generate_prompt_batches as g
from prompt_hygiene import (
sanitize_caption_text,
sanitize_negative_text,
sanitize_prompt_text,
)
ROOT_DIR = Path(__file__).resolve().parent ROOT_DIR = Path(__file__).resolve().parent
@@ -7609,7 +7619,11 @@ def build_prompt(
row = _apply_camera_config(row, camera_config) row = _apply_camera_config(row, camera_config)
active_trigger = trigger.strip() or g.TRIGGER active_trigger = trigger.strip() or g.TRIGGER
row["prompt"] = _prepend_trigger(row["prompt"], active_trigger, bool(prepend_trigger_to_prompt)) row["prompt"] = _prepend_trigger(row["prompt"], active_trigger, bool(prepend_trigger_to_prompt))
row["negative_prompt"] = _combined_negative(row.get("negative_prompt", g.NEGATIVE_PROMPT), extra_negative) row["prompt"] = sanitize_prompt_text(row["prompt"], triggers=(active_trigger,))
row["caption"] = sanitize_caption_text(row.get("caption", ""), triggers=(active_trigger,))
row["negative_prompt"] = sanitize_negative_text(
_combined_negative(row.get("negative_prompt", g.NEGATIVE_PROMPT), extra_negative)
)
row["trigger"] = active_trigger row["trigger"] = active_trigger
row.setdefault("expression_intensity", expression_intensity) row.setdefault("expression_intensity", expression_intensity)
row.setdefault("expression_intensity_source", expression_intensity_source) row.setdefault("expression_intensity_source", expression_intensity_source)
@@ -8647,6 +8661,9 @@ def build_insta_of_pair(
soft_row = _apply_coworking_composition(soft_row, soft_subject_kind) soft_row = _apply_coworking_composition(soft_row, soft_subject_kind)
hard_row = _apply_coworking_composition(hard_row, hard_subject_kind) hard_row = _apply_coworking_composition(hard_row, hard_subject_kind)
hard_scene = soft_row["scene_text"] if options["continuity"] == "same_creator_same_room" else hard_row["scene_text"] hard_scene = soft_row["scene_text"] if options["continuity"] == "same_creator_same_room" else hard_row["scene_text"]
if hard_scene != hard_row.get("scene_text"):
hard_row["source_scene_text"] = hard_row.get("source_scene_text") or hard_row.get("scene_text", "")
hard_row["scene_text"] = hard_scene
hard_composition = _coworking_composition_prompt(hard_scene, hard_row["composition"], hard_subject_kind) hard_composition = _coworking_composition_prompt(hard_scene, hard_row["composition"], hard_subject_kind)
if hard_composition != hard_row["composition"]: if hard_composition != hard_row["composition"]:
hard_row["source_composition"] = hard_row.get("source_composition") or hard_row["composition"] hard_row["source_composition"] = hard_row.get("source_composition") or hard_row["composition"]
@@ -8744,7 +8761,7 @@ def build_insta_of_pair(
if "body is fully exposed" in hard_clothing_state.lower() or "bare skin unobstructed" in hard_clothing_state.lower(): if "body is fully exposed" in hard_clothing_state.lower() or "bare skin unobstructed" in hard_clothing_state.lower():
hard_scene = _body_exposure_scene_text(hard_scene) hard_scene = _body_exposure_scene_text(hard_scene)
hard_row["source_scene_text"] = hard_row.get("source_scene_text") or hard_row.get("scene_text", "") hard_row["source_scene_text"] = hard_row.get("source_scene_text") or hard_row.get("scene_text", "")
hard_row["scene_text"] = _body_exposure_scene_text(hard_row.get("scene_text", "")) hard_row["scene_text"] = hard_scene
hard_detail_density = options["hardcore_detail_density"] hard_detail_density = options["hardcore_detail_density"]
hard_detail_directive = { hard_detail_directive = {
"compact": "Use one compact position-first sexual action sentence; avoid repeated aftermath wording. ", "compact": "Use one compact position-first sexual action sentence; avoid repeated aftermath wording. ",
@@ -8794,8 +8811,10 @@ def build_insta_of_pair(
soft_prompt = _insta_of_active_trigger(soft_prompt, active_trigger, bool(prepend_trigger_to_prompt)) soft_prompt = _insta_of_active_trigger(soft_prompt, active_trigger, bool(prepend_trigger_to_prompt))
hard_prompt = _insta_of_active_trigger(hard_prompt, active_trigger, bool(prepend_trigger_to_prompt)) hard_prompt = _insta_of_active_trigger(hard_prompt, active_trigger, bool(prepend_trigger_to_prompt))
soft_negative = _combined_negative(INSTA_OF_SOFT_NEGATIVE, extra_negative) soft_prompt = sanitize_prompt_text(soft_prompt, triggers=(active_trigger,))
hard_negative = _combined_negative(INSTA_OF_NEGATIVE, extra_negative) hard_prompt = sanitize_prompt_text(hard_prompt, triggers=(active_trigger,))
soft_negative = sanitize_negative_text(_combined_negative(INSTA_OF_SOFT_NEGATIVE, extra_negative))
hard_negative = sanitize_negative_text(_combined_negative(INSTA_OF_NEGATIVE, extra_negative))
soft_caption_parts = [ soft_caption_parts = [
active_trigger, active_trigger,
"Insta/OF softcore mode", "Insta/OF softcore mode",
@@ -8810,7 +8829,10 @@ def build_insta_of_pair(
soft_row["composition"], soft_row["composition"],
_camera_caption_text(soft_camera_config) if soft_camera_directive else "", _camera_caption_text(soft_camera_config) if soft_camera_directive else "",
] ]
soft_caption = ", ".join(str(part).strip() for part in soft_caption_parts if str(part).strip()) soft_caption = sanitize_caption_text(
", ".join(str(part).strip() for part in soft_caption_parts if str(part).strip()),
triggers=(active_trigger,),
)
hard_caption_parts = [ hard_caption_parts = [
active_trigger, active_trigger,
"Insta/OF hardcore mode", "Insta/OF hardcore mode",
@@ -8824,7 +8846,10 @@ def build_insta_of_pair(
hard_composition, hard_composition,
_camera_caption_text(hard_camera_config) if hard_camera_directive else "", _camera_caption_text(hard_camera_config) if hard_camera_directive else "",
] ]
hard_caption = ", ".join(str(part).strip() for part in hard_caption_parts if str(part).strip()) hard_caption = sanitize_caption_text(
", ".join(str(part).strip() for part in hard_caption_parts if str(part).strip()),
triggers=(active_trigger,),
)
metadata = { metadata = {
"mode": "Insta/OF", "mode": "Insta/OF",
"options": options, "options": options,
+169
View File
@@ -0,0 +1,169 @@
from __future__ import annotations
import re
from typing import Any, Iterable
EMPTY_FIELD_LABELS = (
"Ages",
"Body types",
"Cast",
"Cast descriptors",
"Characters",
"Scene",
"Setting",
"Pose",
"Sexual pose",
"Sexual scene",
"Facial expression",
"Facial expressions",
"Clothing",
"Erotic outfit",
"Prop/detail",
"Composition",
"Role graph",
"Camera",
"Camera control",
"Camera priority",
"Use",
"Avoid",
)
def clean_spacing(value: Any) -> str:
text = "" if value is None else str(value)
text = text.replace("\n", " ")
text = re.sub(r"\s+", " ", text).strip()
text = re.sub(r"\s+([,.;:])", r"\1", text)
text = re.sub(r"([,;:]){2,}", r"\1", text)
text = re.sub(r"\.\s*\.", ".", text)
text = re.sub(r",\s*\.", ".", text)
text = re.sub(r":\s*\.", ".", text)
text = re.sub(r";\s*\.", ".", text)
text = re.sub(r"\(\s+", "(", text)
text = re.sub(r"\s+\)", ")", text)
return text.strip()
def _strip_empty_fields(text: str) -> str:
if not text:
return ""
labels = "|".join(re.escape(label) for label in EMPTY_FIELD_LABELS)
text = re.sub(rf"\b(?:{labels})\s*:\s*[.,;]", "", text, flags=re.IGNORECASE)
text = re.sub(rf"\b(?:{labels}):\s*(?=\.|,|;|$)", "", text, flags=re.IGNORECASE)
text = re.sub(rf"\b(?:{labels})\.(?=\s|$)", "", text, flags=re.IGNORECASE)
text = re.sub(rf"\b(?:{labels}):\s*(?:none|null|n/a)\b[.,;]?", "", text, flags=re.IGNORECASE)
return clean_spacing(text)
def _drop_dangling_connectors(text: str) -> str:
text = re.sub(r"\b(?:with|and|or|while|featuring)\s*([,.;])", r"\1", text, flags=re.IGNORECASE)
text = re.sub(r"([,.;])\s*(?:with|and|or|while|featuring)\s*([,.;])", r"\1", text, flags=re.IGNORECASE)
text = re.sub(r"\bwith\s*,", "", text, flags=re.IGNORECASE)
text = re.sub(r",\s*and\s*\.", ".", text, flags=re.IGNORECASE)
return clean_spacing(text)
def _sentence_key(text: str, triggers: Iterable[str] = ()) -> str:
key_text = text
for trigger in triggers:
trigger = str(trigger or "").strip()
if trigger:
key_text = re.sub(rf"^{re.escape(trigger)}\s*[,.;]\s*", "", key_text, flags=re.IGNORECASE)
return re.sub(r"\W+", " ", key_text.lower()).strip()
def _dedupe_adjacent_sentences(text: str, triggers: Iterable[str] = ()) -> str:
parts = [part.strip() for part in re.split(r"(?<=[.!?])\s+", text) if part.strip()]
deduped: list[str] = []
previous = ""
for part in parts:
key = _sentence_key(part, triggers)
if key and key != previous:
deduped.append(part)
previous = key
return " ".join(deduped)
def _dedupe_labeled_sentences(text: str) -> str:
parts = [part.strip() for part in re.split(r"(?<=[.!?])\s+", text) if part.strip()]
seen: set[tuple[str, str]] = set()
deduped: list[str] = []
for part in parts:
match = re.match(r"^([A-Za-z][A-Za-z /_-]{1,40}):\s*(.+)$", part)
if not match:
deduped.append(part)
continue
key = (match.group(1).strip().lower(), re.sub(r"\W+", " ", match.group(2).lower()).strip())
if key not in seen:
deduped.append(part)
seen.add(key)
return " ".join(deduped)
def _trigger_prefix_key(text: str, triggers: Iterable[str]) -> str:
lowered = text.lower().strip()
for trigger in triggers:
trigger = str(trigger or "").strip()
if trigger and lowered.startswith(trigger.lower()):
return trigger
return ""
def _dedupe_trigger_prefix(text: str, triggers: Iterable[str]) -> str:
text = clean_spacing(text)
trigger = _trigger_prefix_key(text, triggers)
if not trigger:
return text
pattern = rf"^(?:{re.escape(trigger)}\s*[,.;]\s*)+"
return f"{trigger}, {re.sub(pattern, '', text, flags=re.IGNORECASE).strip(' ,.;')}"
def _split_comma_items(text: str) -> list[str]:
return [part.strip(" ,.;") for part in re.split(r"\s*[,;]\s*", clean_spacing(text)) if part.strip(" ,.;")]
def dedupe_comma_list(text: Any) -> str:
items: list[str] = []
seen: set[str] = set()
for item in _split_comma_items(str(text or "")):
key = re.sub(r"\W+", " ", item.lower()).strip()
if key and key not in seen:
items.append(item)
seen.add(key)
return ", ".join(items)
def sanitize_prose_text(value: Any, triggers: Iterable[str] = ()) -> str:
text = clean_spacing(value)
if not text:
return ""
text = _strip_empty_fields(text)
text = _drop_dangling_connectors(text)
text = _dedupe_labeled_sentences(text)
text = _dedupe_trigger_prefix(text, triggers)
text = _dedupe_adjacent_sentences(text, triggers)
return clean_spacing(text).strip(" ,;")
def sanitize_prompt_text(value: Any, triggers: Iterable[str] = ()) -> str:
return sanitize_prose_text(value, triggers=triggers)
def sanitize_caption_text(value: Any, triggers: Iterable[str] = ()) -> str:
return sanitize_prose_text(value, triggers=triggers)
def sanitize_tag_prompt(value: Any, triggers: Iterable[str] = ()) -> str:
text = clean_spacing(value)
if not text:
return ""
trigger = _trigger_prefix_key(text, triggers)
if trigger:
text = re.sub(rf"^(?:{re.escape(trigger)}\s*[,;]\s*)+", "", text, flags=re.IGNORECASE).strip(" ,;")
return f"{trigger}, {dedupe_comma_list(text)}" if text else trigger
return dedupe_comma_list(text)
def sanitize_negative_text(value: Any) -> str:
return dedupe_comma_list(value)
+28 -10
View File
@@ -4,6 +4,11 @@ import json
import re import re
from typing import Any from typing import Any
try:
from .prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
except ImportError: # Allows local smoke tests with `python -c`.
from prompt_hygiene import sanitize_negative_text, sanitize_tag_prompt
TRIGGER_CANDIDATES = ( TRIGGER_CANDIDATES = (
"sxcpinup_coloredpencil", "sxcpinup_coloredpencil",
@@ -432,11 +437,14 @@ def _assemble_prompt(
custom_quality: str, custom_quality: str,
extra_positive: str, extra_positive: str,
) -> str: ) -> str:
return _combine_tags( return sanitize_tag_prompt(
_style_prefix(style_preset, trigger, prepend_trigger, custom_style), _combine_tags(
body_tags, _style_prefix(style_preset, trigger, prepend_trigger, custom_style),
_quality_tail(quality_preset, custom_quality), body_tags,
extra_positive, _quality_tail(quality_preset, custom_quality),
extra_positive,
),
triggers=(trigger,),
) )
@@ -504,14 +512,22 @@ def format_sdxl_prompt(
extra_positive, extra_positive,
) )
selected = hard_prompt if target == "hardcore" else soft_prompt selected = hard_prompt if target == "hardcore" else soft_prompt
selected_negative = row.get("hardcore_negative_prompt") if target == "hardcore" else row.get("softcore_negative_prompt") selected_negative = (
row.get("hardcore_negative_prompt") if target == "hardcore" else row.get("softcore_negative_prompt")
)
return { return {
"sdxl_prompt": selected, "sdxl_prompt": selected,
"negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, selected_negative, negative_prompt, extra_negative), "negative_prompt": sanitize_negative_text(
_combine_negative(SDXL_DEFAULT_NEGATIVE, selected_negative, negative_prompt, extra_negative)
),
"sdxl_softcore_prompt": soft_prompt, "sdxl_softcore_prompt": soft_prompt,
"sdxl_hardcore_prompt": hard_prompt, "sdxl_hardcore_prompt": hard_prompt,
"softcore_negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("softcore_negative_prompt"), extra_negative), "softcore_negative_prompt": sanitize_negative_text(
"hardcore_negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("hardcore_negative_prompt"), extra_negative), _combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("softcore_negative_prompt"), extra_negative)
),
"hardcore_negative_prompt": sanitize_negative_text(
_combine_negative(SDXL_DEFAULT_NEGATIVE, row.get("hardcore_negative_prompt"), extra_negative)
),
"method": f"{method}:sdxl(insta_of_pair)", "method": f"{method}:sdxl(insta_of_pair)",
} }
@@ -534,7 +550,9 @@ def format_sdxl_prompt(
) )
return { return {
"sdxl_prompt": prompt, "sdxl_prompt": prompt,
"negative_prompt": _combine_negative(SDXL_DEFAULT_NEGATIVE, extracted_negative, negative_prompt, extra_negative), "negative_prompt": sanitize_negative_text(
_combine_negative(SDXL_DEFAULT_NEGATIVE, extracted_negative, negative_prompt, extra_negative)
),
"sdxl_softcore_prompt": "", "sdxl_softcore_prompt": "",
"sdxl_hardcore_prompt": "", "sdxl_hardcore_prompt": "",
"softcore_negative_prompt": "", "softcore_negative_prompt": "",
+395
View File
@@ -0,0 +1,395 @@
#!/usr/bin/env python3
"""Smoke-test core prompt routes without importing ComfyUI.
The checks here are intentionally lightweight invariants, not golden prompt
snapshots. They prove that representative rows still carry structured metadata
and that the Krea2, SDXL, and caption formatter paths consume metadata instead
of silently falling back to raw text parsing.
"""
from __future__ import annotations
import argparse
import json
import re
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
import caption_naturalizer # noqa: E402
import krea_formatter # noqa: E402
import prompt_builder as pb # noqa: E402
import sdxl_formatter # noqa: E402
Trigger = "sxcppnl7"
SdxlTrigger = "mythp0rt"
@dataclass
class SmokeReport:
passed: list[str] = field(default_factory=list)
failed: list[str] = field(default_factory=list)
def ok(self, name: str) -> None:
self.passed.append(name)
print(f"PASS {name}")
def fail(self, name: str, message: str) -> None:
detail = f"{name}: {message}"
self.failed.append(detail)
print(f"FAIL {detail}")
def _clean_key(value: str) -> str:
return re.sub(r"[^a-z0-9]+", " ", str(value or "").lower()).strip()
def _json(value: Any) -> str:
return json.dumps(value, ensure_ascii=True, sort_keys=True)
def _expect(condition: bool, message: str) -> None:
if not condition:
raise AssertionError(message)
def _expect_text(name: str, value: Any, min_len: int = 8) -> str:
text = str(value or "").strip()
_expect(len(text) >= min_len, f"{name} is empty or too short")
_expect("None" not in text, f"{name} leaked None")
_expect(" " not in text, f"{name} has repeated spaces")
_expect(" ," not in text and " ." not in text, f"{name} has bad punctuation spacing")
return text
def _expect_no_duplicate_comma_items(name: str, value: Any) -> None:
items = [_clean_key(part) for part in str(value or "").split(",")]
items = [part for part in items if part]
duplicates = sorted({part for part in items if items.count(part) > 1})
_expect(not duplicates, f"{name} has duplicate comma items: {duplicates[:5]}")
def _trigger_count(text: str, trigger: str) -> int:
return len(re.findall(rf"(?<![a-z0-9_]){re.escape(trigger)}(?![a-z0-9_])", text, flags=re.IGNORECASE))
def _expect_trigger_once(name: str, value: Any, trigger: str) -> None:
text = str(value or "")
count = _trigger_count(text, trigger)
_expect(count == 1, f"{name} should contain trigger {trigger!r} exactly once, got {count}")
def _expect_row_base(row: dict[str, Any], name: str) -> None:
_expect(isinstance(row, dict), f"{name} did not return a metadata row")
_expect_text(f"{name}.prompt", row.get("prompt"), 20)
_expect_text(f"{name}.negative_prompt", row.get("negative_prompt"), 8)
_expect_no_duplicate_comma_items(f"{name}.negative_prompt", row.get("negative_prompt"))
_expect(json.loads(_json(row)) == row, f"{name} is not JSON-stable")
def _expect_custom_row(row: dict[str, Any], name: str) -> None:
_expect_row_base(row, name)
_expect(row.get("source") == "json_category", f"{name}.source should be json_category")
_expect_text(f"{name}.item", row.get("item"), 8)
_expect_text(f"{name}.scene_text", row.get("scene_text"), 8)
_expect_text(f"{name}.composition", row.get("composition"), 8)
_expect_text(f"{name}.role_graph", row.get("source_role_graph") or row.get("role_graph"), 8)
_expect(isinstance(row.get("item_axis_values"), dict), f"{name}.item_axis_values missing")
def _expect_formatter_outputs(row: dict[str, Any], name: str, *, target: str = "auto") -> None:
metadata = _json(row)
krea = krea_formatter.format_krea2_prompt("", metadata_json=metadata, target=target)
_expect("metadata" in krea.get("method", ""), f"{name}.krea did not use metadata: {krea.get('method')}")
_expect_text(f"{name}.krea_prompt", krea.get("krea_prompt"), 20)
_expect_no_duplicate_comma_items(f"{name}.krea_negative", krea.get("negative_prompt"))
sdxl = sdxl_formatter.format_sdxl_prompt(
"",
metadata_json=metadata,
target=target,
trigger=SdxlTrigger,
prepend_trigger=True,
)
_expect("metadata" in sdxl.get("method", ""), f"{name}.sdxl did not use metadata: {sdxl.get('method')}")
_expect_text(f"{name}.sdxl_prompt", sdxl.get("sdxl_prompt"), 20)
_expect_trigger_once(f"{name}.sdxl_prompt", sdxl.get("sdxl_prompt"), SdxlTrigger)
_expect_no_duplicate_comma_items(f"{name}.sdxl_negative", sdxl.get("negative_prompt"))
caption, method = caption_naturalizer.naturalize_caption(
"",
metadata_json=metadata,
trigger=Trigger,
include_trigger=True,
)
_expect("metadata" in method, f"{name}.caption did not use metadata: {method}")
_expect_text(f"{name}.caption", caption, 20)
_expect_trigger_once(f"{name}.caption", caption, Trigger)
def _character_cast(*, pov_man: bool = False) -> str:
cast = pb.build_character_slot_json(
subject_type="woman",
label="A",
age="25-year-old adult",
ethnicity="western_european",
figure="balanced",
body="slim",
descriptor_detail="full",
expression_intensity=0.65,
softcore_expression_intensity=0.45,
hardcore_expression_intensity=0.85,
)["character_cast"]
return pb.build_character_slot_json(
subject_type="man",
label="A",
age="40-year-old adult",
ethnicity="western_european",
figure="balanced",
body="average",
descriptor_detail="compact",
expression_intensity=0.55,
softcore_expression_intensity=0.35,
hardcore_expression_intensity=0.75,
presence_mode="pov" if pov_man else "visible",
character_cast=cast,
)["character_cast"]
def _action_filter(focus: str) -> str:
kwargs = {
"allow_toys": False,
"allow_double": False,
"allow_penetration": focus in ("penetration_only", "keep_pool"),
"allow_foreplay": focus in ("foreplay_only", "keep_pool"),
"allow_interaction": focus in ("interaction_only", "keep_pool"),
"allow_manual": focus in ("manual_only", "keep_pool"),
"allow_oral": focus in ("oral_only", "keep_pool"),
"allow_outercourse": focus in ("outercourse_only", "keep_pool"),
"allow_anal": focus in ("anal_only", "keep_pool"),
"allow_climax": focus in ("climax_only", "keep_pool"),
}
return pb.build_hardcore_action_filter_json(focus=focus, **kwargs)
def _prompt_row(
*,
name: str,
category: str,
subcategory: str,
seed: int,
character_cast: str = "",
women_count: int = 1,
men_count: int = 1,
hardcore_position_config: str = "",
) -> dict[str, Any]:
row = pb.build_prompt(
category=category,
subcategory=subcategory,
row_number=1,
start_index=1,
seed=seed,
clothing="random",
ethnicity="any",
poses="random",
backside_bias=0.35,
figure="random",
no_plus_women=False,
no_black=False,
minimal_clothing_ratio=0.5,
standard_pose_ratio=0.5,
trigger=Trigger,
prepend_trigger_to_prompt=True,
extra_positive="",
extra_negative="",
character_cast=character_cast,
women_count=women_count,
men_count=men_count,
expression_enabled=True,
expression_intensity=0.6,
hardcore_position_config=hardcore_position_config,
)
_expect_row_base(row, name)
return row
def smoke_builtin_single() -> None:
row = _prompt_row(name="builtin_single_woman", category="woman", subcategory="random", seed=1001, men_count=0)
_expect(row.get("source") == "built_in_generator", "builtin row should come from built-in generator")
_expect_trigger_once("builtin_single_woman.prompt", row.get("prompt"), Trigger)
_expect_formatter_outputs(row, "builtin_single_woman", target="single")
def smoke_hardcore_category_routes() -> None:
cast = _character_cast()
cases = [
("hardcore_penetration", "Penetrative sex", "penetration_only"),
("hardcore_oral", "Oral sex", "oral_only"),
("hardcore_manual", "Manual stimulation", "manual_only"),
("hardcore_outercourse", "Outercourse and genital teasing", "outercourse_only"),
("hardcore_foreplay", "Foreplay and teasing", "foreplay_only"),
("hardcore_aftercare", "Aftercare and cleanup", "interaction_only"),
]
for index, (name, subcategory, focus) in enumerate(cases, start=1101):
row = _prompt_row(
name=name,
category="Hardcore sexual poses",
subcategory=subcategory,
seed=index,
character_cast=cast,
women_count=1,
men_count=1,
hardcore_position_config=_action_filter(focus),
)
_expect_custom_row(row, name)
_expect(row.get("subject_type") == "configured_cast", f"{name} should use configured cast")
_expect_formatter_outputs(row, name, target="single")
def _insta_options(**overrides: Any) -> str:
options = pb.build_insta_of_options_json(
softcore_cast="same_as_hardcore",
hardcore_cast="couple",
hardcore_women_count=1,
hardcore_men_count=1,
softcore_level="lingerie_tease",
hardcore_level="hardcore",
platform_style="hybrid",
continuity="same_creator_same_room",
hardcore_clothing_continuity="explicit_nude",
softcore_camera_mode="standard",
hardcore_camera_mode="standard",
camera_detail="compact",
hardcore_detail_density="balanced",
)
data = json.loads(options)
data.update(overrides)
return _json(data)
def _expect_pair(pair: dict[str, Any], name: str) -> None:
_expect(pair.get("mode") == "Insta/OF", f"{name}.mode should be Insta/OF")
_expect_row_base(pair.get("softcore_row") or {}, f"{name}.softcore_row")
_expect_custom_row(pair.get("hardcore_row") or {}, f"{name}.hardcore_row")
_expect_text(f"{name}.softcore_prompt", pair.get("softcore_prompt"), 20)
_expect_text(f"{name}.hardcore_prompt", pair.get("hardcore_prompt"), 20)
_expect_no_duplicate_comma_items(f"{name}.softcore_negative", pair.get("softcore_negative_prompt"))
_expect_no_duplicate_comma_items(f"{name}.hardcore_negative", pair.get("hardcore_negative_prompt"))
_expect_formatter_outputs(pair, name, target="softcore")
_expect_formatter_outputs(pair, f"{name}.hardcore", target="hardcore")
def smoke_insta_pair() -> None:
pair = pb.build_insta_of_pair(
row_number=1,
start_index=1,
seed=2101,
ethnicity="any",
figure="random",
no_plus_women=False,
no_black=False,
trigger=Trigger,
prepend_trigger_to_prompt=True,
options_json=_insta_options(),
character_cast=_character_cast(),
hardcore_position_config=_action_filter("penetration_only"),
)
_expect_pair(pair, "insta_pair_same_cast")
_expect(pair["softcore_row"].get("scene_text") == pair["hardcore_row"].get("scene_text"), "pair scene continuity broke")
def smoke_insta_pair_pov() -> None:
pair = pb.build_insta_of_pair(
row_number=1,
start_index=1,
seed=2201,
ethnicity="any",
figure="random",
no_plus_women=False,
no_black=False,
trigger=Trigger,
prepend_trigger_to_prompt=True,
options_json=_insta_options(),
character_cast=_character_cast(pov_man=True),
hardcore_position_config=_action_filter("oral_only"),
)
_expect_pair(pair, "insta_pair_pov_man")
pov_labels = pair.get("pov_character_labels") or []
_expect("Man A" in pov_labels, "pair POV labels should include Man A")
hard_row = pair.get("hardcore_row") or {}
_expect("Man A" in (hard_row.get("pov_character_labels") or []), "hard row POV labels should include Man A")
krea = krea_formatter.format_krea2_prompt("", metadata_json=_json(pair), target="hardcore")
prompt = krea.get("krea_prompt") or ""
_expect("viewer" in prompt.lower(), "POV Krea prompt should mention viewer perspective")
def smoke_no_expression_fallback() -> None:
cast = pb.build_character_slot_json(
subject_type="woman",
label="A",
age="25-year-old adult",
ethnicity="western_european",
body="slim",
descriptor_detail="full",
expression_enabled=False,
)["character_cast"]
row = _prompt_row(
name="hardcore_expression_disabled",
category="Hardcore sexual poses",
subcategory="Penetrative sex",
seed=2301,
character_cast=cast,
women_count=1,
men_count=1,
hardcore_position_config=_action_filter("penetration_only"),
)
_expect_custom_row(row, "hardcore_expression_disabled")
_expect(not row.get("expression"), "expression should stay disabled without fallback")
_expect("Facial expressions:" not in row.get("prompt", ""), "disabled expression leaked into prompt")
_expect_formatter_outputs(row, "hardcore_expression_disabled", target="single")
SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
("builtin_single_woman", smoke_builtin_single),
("hardcore_category_routes", smoke_hardcore_category_routes),
("insta_pair_same_cast", smoke_insta_pair),
("insta_pair_pov_man", smoke_insta_pair_pov),
("expression_disabled", smoke_no_expression_fallback),
]
def main(argv: list[str] | None = None) -> int:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--case",
choices=[name for name, _func in SMOKE_CASES],
action="append",
help="Run only the named smoke case. Can be passed multiple times.",
)
args = parser.parse_args(argv)
selected = set(args.case or [])
report = SmokeReport()
for name, func in SMOKE_CASES:
if selected and name not in selected:
continue
try:
func()
except Exception as exc: # noqa: BLE001 - report all smoke failures uniformly.
report.fail(name, str(exc))
else:
report.ok(name)
print(f"\nSummary: {len(report.passed)} passed, {len(report.failed)} failed")
if report.failed:
return 1
return 0
if __name__ == "__main__":
raise SystemExit(main())