Harden formatter prompt hygiene simulation

2026-06-27 19:02:04 +02:00
parent 80e7e6e156
commit c6f0fc34af
2 changed files with 71 additions and 3 deletions
@@ -16,6 +16,10 @@ DEFAULT_PROMPT_FIELD_LABELS = (
    "Cast",
    "Cast descriptors",
    "Characters",
    "Softcore setup",
    "Hardcore setup",
    "POV participant",
    "Body exposure",
    "Scene",
    "Setting",
    "Pose",
@@ -24,7 +28,13 @@ DEFAULT_PROMPT_FIELD_LABELS = (
    "Facial expression",
    "Facial expressions",
    "Clothing",
    "Clothing state",
    "Visual clothing state",
    "Outfit",
    "Erotic outfit",
    "Teaser outfit detail",
    "Softcore visual reference",
    "Visible remaining styling",
    "Prop/detail",
    "Composition",
    "Role graph",
@@ -29,6 +29,7 @@ import sdxl_tag_policy  # noqa: E402
 TRIGGER = "sxcppnl7"
 SDXL_TRIGGER = "mythp0rt"
 OLD_TRIGGER = "sxcpinup_coloredpencil"
 SOFTCORE_NOISE_TERMS = (
    "the image focuses",
@@ -41,10 +42,29 @@ SOFTCORE_NOISE_TERMS = (
 )
 FORMATTER_LABEL_LEAKS = (
    "body exposure:",
    "camera control:",
    "characters:",
    "clothing:",
    "clothing state:",
    "composition:",
    "facial expression:",
    "facial expressions:",
    "hardcore setup:",
    "outfit:",
    "pose:",
    "pov participant:",
    "role graph:",
    "setting:",
    "sexual pose:",
    "sexual scene:",
    "softcore setup:",
    "softcore visual reference:",
    "cast descriptors:",
    "shared cast descriptors:",
    "teaser outfit detail:",
    "visual clothing state:",
    "visible remaining styling:",
 )
 HARDCORE_NOISE_TERMS = (
@@ -367,6 +387,36 @@ def _contains_all(text: str, required: tuple[str, ...]) -> bool:
    return all(term.lower() in lower for term in required)
 def _trigger_count(text: str, trigger: str) -> int:
    return len(re.findall(rf"(?<![a-z0-9_]){re.escape(trigger)}(?![a-z0-9_])", text, flags=re.IGNORECASE))
 def _formatter_trigger_issues(name: str, prompts: dict[str, str]) -> list[str]:
    issues: list[str] = []
    krea_prompt = prompts["krea"]
    sdxl_prompt = prompts["sdxl"]
    caption_text = prompts["caption"]
    for trigger in (TRIGGER, SDXL_TRIGGER, OLD_TRIGGER):
        if _trigger_count(krea_prompt, trigger):
            issues.append(f"{name}.krea_prompt: unexpected_trigger:{trigger}")
    sdxl_count = _trigger_count(sdxl_prompt, SDXL_TRIGGER)
    if sdxl_count != 1:
        issues.append(f"{name}.sdxl_prompt: trigger_count:{SDXL_TRIGGER}:{sdxl_count}")
    for trigger in (TRIGGER, OLD_TRIGGER):
        if _trigger_count(sdxl_prompt, trigger):
            issues.append(f"{name}.sdxl_prompt: unexpected_trigger:{trigger}")
    caption_count = _trigger_count(caption_text, TRIGGER)
    if caption_count != 1:
        issues.append(f"{name}.caption: trigger_count:{TRIGGER}:{caption_count}")
    for trigger in (SDXL_TRIGGER, OLD_TRIGGER):
        if _trigger_count(caption_text, trigger):
            issues.append(f"{name}.caption: unexpected_trigger:{trigger}")
    return issues
 def _formatter_expectation_issues(
    name: str,
    formats: dict[str, Any],
@@ -463,12 +513,18 @@ def _formatter_issues(
    krea_prompt = str(krea.get("krea_prompt") or "")
    sdxl_prompt = str(sdxl.get("sdxl_prompt") or "")
    caption_text = str(caption.get("natural_caption") or "")
    prompts = {
        "krea": krea_prompt,
        "sdxl": sdxl_prompt,
        "caption": caption_text,
    }
    for label, value in (
        (f"{name}.krea_prompt", krea_prompt),
        (f"{name}.sdxl_prompt", sdxl_prompt),
        (f"{name}.caption", caption_text),
    ):
        issues.extend(_text_issues(label, value, min_len=20))
    issues.extend(_formatter_trigger_issues(name, prompts))
    for formatter_name, method in (
        ("krea", krea.get("method")),
@@ -487,10 +543,12 @@ def _formatter_issues(
        if duplicates:
            issues.append(f"{label}: duplicate_comma_items:{duplicates[:5]}")
-    lower_krea = krea_prompt.lower()
+    for formatter_name, prompt in prompts.items():
        lower_prompt = prompt.lower()
        for leak in FORMATTER_LABEL_LEAKS:
-        if leak in lower_krea:
+            if leak in lower_prompt:
-            issues.append(f"{name}.krea_prompt: leaked_label:{leak}")
+                issues.append(f"{name}.{formatter_name}: leaked_label:{leak}")
    lower_krea = krea_prompt.lower()
    for noise in HARDCORE_NOISE_TERMS:
        if noise in lower_krea:
            issues.append(f"{name}.krea_prompt: hardcore_noise:{noise}")