diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md index 9e014da..937ac2f 100644 --- a/docs/prompt-architecture-improvement-plan.md +++ b/docs/prompt-architecture-improvement-plan.md @@ -75,6 +75,8 @@ routes: - `Avoid:` positive/negative splitting for fallback text; - the shared prompt field-label inventory and extraction such as `Setting:`, `Sexual scene:`, `Camera control:`, or `Composition:`; +- fallback field-label stripping for tag/text routes that need label-free body + text; - row-value fallback from metadata fields to labeled prompt text. It must not make formatter-style decisions. Krea prose, SDXL tags, and training @@ -291,10 +293,11 @@ Keep here: - formatter profiles for manual controls, Pony flat-vector, SDXL photo, and plain flat-vector styles live in `sdxl_presets.py` and are exposed by `SxCP SDXL Formatter`. +- fallback field-label cleanup delegates to `formatter_input.py`. Improve later: - -- make fallback cleanup use the shared field-label inventory. +- add route-level fixtures for any new SDXL model profile that needs different + tag ordering. ### Naturalizer Path diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index 692ed4e..84a3517 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -95,7 +95,7 @@ Core helper ownership: | `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. | | `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. | | `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. | -| `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. | +| `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, fallback field-label stripping, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. | | `sdxl_presets.py` | SDXL formatter profiles, style presets, quality presets, default negative prompt, and metadata-family tag hints used by the SDXL formatter and node choice lists. | | `caption_policy.py` | Caption naturalizer policy data and helpers: caption profiles, style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. | @@ -661,7 +661,7 @@ SDXL field consumption: | Normal metadata | cast descriptors, age/body/skin/hair/eyes, `action_family`, `position_family`, `position_keys`, item, role graph, scene, camera config/directive | `_row_core_tags`, `_metadata_family_tags`, `_camera_tags` | | Pair softcore | `softcore_row`, pair partner styling, root soft camera config | `_soft_tags` | | Pair hardcore | `hardcore_row`, `action_family`, `position_family`, `position_keys`, `hardcore_clothing_state`, hard camera fields, hard prompt text | `_hard_tags`, `_metadata_family_tags` | -| Text fallback | `source_text`, preserve-trigger setting | `_fallback_text_to_sdxl` | +| Text fallback | `source_text`, preserve-trigger setting, shared field-label stripping | `_fallback_text_to_sdxl` | SDXL is the right place for model trigger handling, tag ordering, weight syntax, quality/style preset changes, and nude-weight defaults. Do not solve those in diff --git a/formatter_input.py b/formatter_input.py index 133c6d5..009e6fb 100644 --- a/formatter_input.py +++ b/formatter_input.py @@ -101,6 +101,18 @@ def split_avoid(text: Any) -> tuple[str, str]: return text[: match.start()].strip(" ."), match.group(1).strip(" .") +def strip_prompt_field_labels( + text: Any, + *, + field_labels: tuple[str, ...] | list[str] = DEFAULT_PROMPT_FIELD_LABELS, +) -> str: + text = clean_text(text) + if not text: + return "" + labels = "|".join(re.escape(name) for name in sorted(field_labels, key=len, reverse=True)) + return clean_text(re.sub(rf"\b(?:{labels}):\s*", "", text)) + + def prompt_field( text: Any, label: str, diff --git a/sdxl_formatter.py b/sdxl_formatter.py index bac0de6..6a4a79b 100644 --- a/sdxl_formatter.py +++ b/sdxl_formatter.py @@ -63,6 +63,10 @@ def _split_avoid(text: str) -> tuple[str, str]: return input_policy.split_avoid(text) +def _strip_prompt_field_labels(text: str) -> str: + return input_policy.strip_prompt_field_labels(text, field_labels=PROMPT_FIELD_LABELS) + + def _prompt_field(text: str, label: str) -> str: return input_policy.prompt_field(text, label, field_labels=PROMPT_FIELD_LABELS) @@ -416,11 +420,7 @@ def _fallback_text_to_sdxl( nude_weight: float, ) -> tuple[str, str, str]: positive, negative = _split_avoid(_strip_trigger(source_text, preserve_trigger)) - positive = re.sub( - r"\b(?:Scene|Setting|Pose|Sexual pose|Sexual scene|Facial expressions?|Composition|Role graph|Camera control):\s*", - "", - positive, - ) + positive = _strip_prompt_field_labels(positive) tags = _combine_tags(positive, ", ".join(_explicit_tags(positive, nude_weight))) return tags, negative, "text(fallback)" diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index c99cbe9..cb4bcc2 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -904,9 +904,19 @@ def smoke_formatter_input_policy() -> None: formatter_input.prompt_field(labeled_prompt, "Camera control") == "side view", "Shared formatter field-label inventory lost Camera control parsing", ) + stripped_labels = formatter_input.strip_prompt_field_labels( + "Characters: woman. Erotic outfit: sheer dress. Camera: side view." + ) + _expect("Characters:" not in stripped_labels, "Shared label stripper did not remove Characters label") + _expect("Erotic outfit:" not in stripped_labels, "Shared label stripper did not remove Erotic outfit label") + _expect("Camera:" not in stripped_labels, "Shared label stripper did not remove Camera label") _expect(krea_formatter._clean("a b , c") == formatter_input.clean_text("a b , c"), "Krea clean helper is not delegated") _expect(sdxl_formatter._clean("a b , c") == formatter_input.clean_text("a b , c"), "SDXL clean helper is not delegated") + _expect( + sdxl_formatter._strip_prompt_field_labels("Characters: woman. Camera: side view.") == "woman. side view.", + "SDXL label stripper should delegate to formatter_input", + ) _expect(caption_naturalizer._clean_text("a b , c") == formatter_input.clean_text("a b , c"), "Caption clean helper is not delegated") _expect(krea_formatter._strip_trigger(f"{Trigger}, prompt text", False) == "prompt text", "Krea trigger stripping changed") _expect(sdxl_formatter._strip_trigger(f"{SdxlTrigger}, prompt text", False) == "prompt text", "SDXL trigger stripping changed") @@ -921,6 +931,19 @@ def smoke_formatter_input_policy() -> None: _expect_text("formatter_input.krea_prompt", krea.get("krea_prompt"), 20) _expect_text("formatter_input.sdxl_prompt", sdxl.get("sdxl_prompt"), 20) _expect_text("formatter_input.caption", caption, 20) + fallback_sdxl = sdxl_formatter.format_sdxl_prompt( + "Characters: woman. Erotic outfit: sheer dress. Camera: side view. Avoid: blur", + input_hint="prompt", + style_preset="none", + quality_preset="none", + trigger=SdxlTrigger, + prepend_trigger=False, + ) + fallback_prompt = fallback_sdxl.get("sdxl_prompt", "") + _expect("Characters:" not in fallback_prompt, "SDXL fallback leaked Characters label") + _expect("Erotic outfit:" not in fallback_prompt, "SDXL fallback leaked Erotic outfit label") + _expect("Camera:" not in fallback_prompt, "SDXL fallback leaked Camera label") + _expect("blur" in fallback_sdxl.get("negative_prompt", ""), "SDXL fallback lost Avoid negative text") def smoke_formatter_cast_policy() -> None: