Harden formatter prompt hygiene simulation

This commit is contained in:
2026-06-27 19:02:04 +02:00
parent 80e7e6e156
commit c6f0fc34af
2 changed files with 71 additions and 3 deletions
+61 -3
View File
@@ -29,6 +29,7 @@ import sdxl_tag_policy # noqa: E402
TRIGGER = "sxcppnl7"
SDXL_TRIGGER = "mythp0rt"
OLD_TRIGGER = "sxcpinup_coloredpencil"
SOFTCORE_NOISE_TERMS = (
"the image focuses",
@@ -41,10 +42,29 @@ SOFTCORE_NOISE_TERMS = (
)
FORMATTER_LABEL_LEAKS = (
"body exposure:",
"camera control:",
"characters:",
"clothing:",
"clothing state:",
"composition:",
"facial expression:",
"facial expressions:",
"hardcore setup:",
"outfit:",
"pose:",
"pov participant:",
"role graph:",
"setting:",
"sexual pose:",
"sexual scene:",
"softcore setup:",
"softcore visual reference:",
"cast descriptors:",
"shared cast descriptors:",
"teaser outfit detail:",
"visual clothing state:",
"visible remaining styling:",
)
HARDCORE_NOISE_TERMS = (
@@ -367,6 +387,36 @@ def _contains_all(text: str, required: tuple[str, ...]) -> bool:
return all(term.lower() in lower for term in required)
def _trigger_count(text: str, trigger: str) -> int:
return len(re.findall(rf"(?<![a-z0-9_]){re.escape(trigger)}(?![a-z0-9_])", text, flags=re.IGNORECASE))
def _formatter_trigger_issues(name: str, prompts: dict[str, str]) -> list[str]:
issues: list[str] = []
krea_prompt = prompts["krea"]
sdxl_prompt = prompts["sdxl"]
caption_text = prompts["caption"]
for trigger in (TRIGGER, SDXL_TRIGGER, OLD_TRIGGER):
if _trigger_count(krea_prompt, trigger):
issues.append(f"{name}.krea_prompt: unexpected_trigger:{trigger}")
sdxl_count = _trigger_count(sdxl_prompt, SDXL_TRIGGER)
if sdxl_count != 1:
issues.append(f"{name}.sdxl_prompt: trigger_count:{SDXL_TRIGGER}:{sdxl_count}")
for trigger in (TRIGGER, OLD_TRIGGER):
if _trigger_count(sdxl_prompt, trigger):
issues.append(f"{name}.sdxl_prompt: unexpected_trigger:{trigger}")
caption_count = _trigger_count(caption_text, TRIGGER)
if caption_count != 1:
issues.append(f"{name}.caption: trigger_count:{TRIGGER}:{caption_count}")
for trigger in (SDXL_TRIGGER, OLD_TRIGGER):
if _trigger_count(caption_text, trigger):
issues.append(f"{name}.caption: unexpected_trigger:{trigger}")
return issues
def _formatter_expectation_issues(
name: str,
formats: dict[str, Any],
@@ -463,12 +513,18 @@ def _formatter_issues(
krea_prompt = str(krea.get("krea_prompt") or "")
sdxl_prompt = str(sdxl.get("sdxl_prompt") or "")
caption_text = str(caption.get("natural_caption") or "")
prompts = {
"krea": krea_prompt,
"sdxl": sdxl_prompt,
"caption": caption_text,
}
for label, value in (
(f"{name}.krea_prompt", krea_prompt),
(f"{name}.sdxl_prompt", sdxl_prompt),
(f"{name}.caption", caption_text),
):
issues.extend(_text_issues(label, value, min_len=20))
issues.extend(_formatter_trigger_issues(name, prompts))
for formatter_name, method in (
("krea", krea.get("method")),
@@ -487,10 +543,12 @@ def _formatter_issues(
if duplicates:
issues.append(f"{label}: duplicate_comma_items:{duplicates[:5]}")
for formatter_name, prompt in prompts.items():
lower_prompt = prompt.lower()
for leak in FORMATTER_LABEL_LEAKS:
if leak in lower_prompt:
issues.append(f"{name}.{formatter_name}: leaked_label:{leak}")
lower_krea = krea_prompt.lower()
for leak in FORMATTER_LABEL_LEAKS:
if leak in lower_krea:
issues.append(f"{name}.krea_prompt: leaked_label:{leak}")
for noise in HARDCORE_NOISE_TERMS:
if noise in lower_krea:
issues.append(f"{name}.krea_prompt: hardcore_noise:{noise}")