Share fallback field-label cleanup

This commit is contained in:
2026-06-27 01:53:06 +02:00
parent 5ab2433ca7
commit 2d3d668359
5 changed files with 47 additions and 9 deletions
+5 -2
View File
@@ -75,6 +75,8 @@ routes:
- `Avoid:` positive/negative splitting for fallback text;
- the shared prompt field-label inventory and extraction such as `Setting:`,
`Sexual scene:`, `Camera control:`, or `Composition:`;
- fallback field-label stripping for tag/text routes that need label-free body
text;
- row-value fallback from metadata fields to labeled prompt text.
It must not make formatter-style decisions. Krea prose, SDXL tags, and training
@@ -291,10 +293,11 @@ Keep here:
- formatter profiles for manual controls, Pony flat-vector, SDXL photo, and
plain flat-vector styles live in `sdxl_presets.py` and are exposed by
`SxCP SDXL Formatter`.
- fallback field-label cleanup delegates to `formatter_input.py`.
Improve later:
- make fallback cleanup use the shared field-label inventory.
- add route-level fixtures for any new SDXL model profile that needs different
tag ordering.
### Naturalizer Path
+2 -2
View File
@@ -95,7 +95,7 @@ Core helper ownership:
| `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. |
| `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. |
| `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. |
| `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
| `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, fallback field-label stripping, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
| `sdxl_presets.py` | SDXL formatter profiles, style presets, quality presets, default negative prompt, and metadata-family tag hints used by the SDXL formatter and node choice lists. |
| `caption_policy.py` | Caption naturalizer policy data and helpers: caption profiles, style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. |
@@ -661,7 +661,7 @@ SDXL field consumption:
| Normal metadata | cast descriptors, age/body/skin/hair/eyes, `action_family`, `position_family`, `position_keys`, item, role graph, scene, camera config/directive | `_row_core_tags`, `_metadata_family_tags`, `_camera_tags` |
| Pair softcore | `softcore_row`, pair partner styling, root soft camera config | `_soft_tags` |
| Pair hardcore | `hardcore_row`, `action_family`, `position_family`, `position_keys`, `hardcore_clothing_state`, hard camera fields, hard prompt text | `_hard_tags`, `_metadata_family_tags` |
| Text fallback | `source_text`, preserve-trigger setting | `_fallback_text_to_sdxl` |
| Text fallback | `source_text`, preserve-trigger setting, shared field-label stripping | `_fallback_text_to_sdxl` |
SDXL is the right place for model trigger handling, tag ordering, weight syntax,
quality/style preset changes, and nude-weight defaults. Do not solve those in
+12
View File
@@ -101,6 +101,18 @@ def split_avoid(text: Any) -> tuple[str, str]:
return text[: match.start()].strip(" ."), match.group(1).strip(" .")
def strip_prompt_field_labels(
text: Any,
*,
field_labels: tuple[str, ...] | list[str] = DEFAULT_PROMPT_FIELD_LABELS,
) -> str:
text = clean_text(text)
if not text:
return ""
labels = "|".join(re.escape(name) for name in sorted(field_labels, key=len, reverse=True))
return clean_text(re.sub(rf"\b(?:{labels}):\s*", "", text))
def prompt_field(
text: Any,
label: str,
+5 -5
View File
@@ -63,6 +63,10 @@ def _split_avoid(text: str) -> tuple[str, str]:
return input_policy.split_avoid(text)
def _strip_prompt_field_labels(text: str) -> str:
return input_policy.strip_prompt_field_labels(text, field_labels=PROMPT_FIELD_LABELS)
def _prompt_field(text: str, label: str) -> str:
return input_policy.prompt_field(text, label, field_labels=PROMPT_FIELD_LABELS)
@@ -416,11 +420,7 @@ def _fallback_text_to_sdxl(
nude_weight: float,
) -> tuple[str, str, str]:
positive, negative = _split_avoid(_strip_trigger(source_text, preserve_trigger))
positive = re.sub(
r"\b(?:Scene|Setting|Pose|Sexual pose|Sexual scene|Facial expressions?|Composition|Role graph|Camera control):\s*",
"",
positive,
)
positive = _strip_prompt_field_labels(positive)
tags = _combine_tags(positive, ", ".join(_explicit_tags(positive, nude_weight)))
return tags, negative, "text(fallback)"
+23
View File
@@ -904,9 +904,19 @@ def smoke_formatter_input_policy() -> None:
formatter_input.prompt_field(labeled_prompt, "Camera control") == "side view",
"Shared formatter field-label inventory lost Camera control parsing",
)
stripped_labels = formatter_input.strip_prompt_field_labels(
"Characters: woman. Erotic outfit: sheer dress. Camera: side view."
)
_expect("Characters:" not in stripped_labels, "Shared label stripper did not remove Characters label")
_expect("Erotic outfit:" not in stripped_labels, "Shared label stripper did not remove Erotic outfit label")
_expect("Camera:" not in stripped_labels, "Shared label stripper did not remove Camera label")
_expect(krea_formatter._clean("a b , c") == formatter_input.clean_text("a b , c"), "Krea clean helper is not delegated")
_expect(sdxl_formatter._clean("a b , c") == formatter_input.clean_text("a b , c"), "SDXL clean helper is not delegated")
_expect(
sdxl_formatter._strip_prompt_field_labels("Characters: woman. Camera: side view.") == "woman. side view.",
"SDXL label stripper should delegate to formatter_input",
)
_expect(caption_naturalizer._clean_text("a b , c") == formatter_input.clean_text("a b , c"), "Caption clean helper is not delegated")
_expect(krea_formatter._strip_trigger(f"{Trigger}, prompt text", False) == "prompt text", "Krea trigger stripping changed")
_expect(sdxl_formatter._strip_trigger(f"{SdxlTrigger}, prompt text", False) == "prompt text", "SDXL trigger stripping changed")
@@ -921,6 +931,19 @@ def smoke_formatter_input_policy() -> None:
_expect_text("formatter_input.krea_prompt", krea.get("krea_prompt"), 20)
_expect_text("formatter_input.sdxl_prompt", sdxl.get("sdxl_prompt"), 20)
_expect_text("formatter_input.caption", caption, 20)
fallback_sdxl = sdxl_formatter.format_sdxl_prompt(
"Characters: woman. Erotic outfit: sheer dress. Camera: side view. Avoid: blur",
input_hint="prompt",
style_preset="none",
quality_preset="none",
trigger=SdxlTrigger,
prepend_trigger=False,
)
fallback_prompt = fallback_sdxl.get("sdxl_prompt", "")
_expect("Characters:" not in fallback_prompt, "SDXL fallback leaked Characters label")
_expect("Erotic outfit:" not in fallback_prompt, "SDXL fallback leaked Erotic outfit label")
_expect("Camera:" not in fallback_prompt, "SDXL fallback leaked Camera label")
_expect("blur" in fallback_sdxl.get("negative_prompt", ""), "SDXL fallback lost Avoid negative text")
def smoke_formatter_cast_policy() -> None: