Harden formatter prompt hygiene simulation
This commit is contained in:
@@ -29,6 +29,7 @@ import sdxl_tag_policy # noqa: E402
|
||||
|
||||
TRIGGER = "sxcppnl7"
|
||||
SDXL_TRIGGER = "mythp0rt"
|
||||
OLD_TRIGGER = "sxcpinup_coloredpencil"
|
||||
|
||||
SOFTCORE_NOISE_TERMS = (
|
||||
"the image focuses",
|
||||
@@ -41,10 +42,29 @@ SOFTCORE_NOISE_TERMS = (
|
||||
)
|
||||
|
||||
FORMATTER_LABEL_LEAKS = (
|
||||
"body exposure:",
|
||||
"camera control:",
|
||||
"characters:",
|
||||
"clothing:",
|
||||
"clothing state:",
|
||||
"composition:",
|
||||
"facial expression:",
|
||||
"facial expressions:",
|
||||
"hardcore setup:",
|
||||
"outfit:",
|
||||
"pose:",
|
||||
"pov participant:",
|
||||
"role graph:",
|
||||
"setting:",
|
||||
"sexual pose:",
|
||||
"sexual scene:",
|
||||
"softcore setup:",
|
||||
"softcore visual reference:",
|
||||
"cast descriptors:",
|
||||
"shared cast descriptors:",
|
||||
"teaser outfit detail:",
|
||||
"visual clothing state:",
|
||||
"visible remaining styling:",
|
||||
)
|
||||
|
||||
HARDCORE_NOISE_TERMS = (
|
||||
@@ -367,6 +387,36 @@ def _contains_all(text: str, required: tuple[str, ...]) -> bool:
|
||||
return all(term.lower() in lower for term in required)
|
||||
|
||||
|
||||
def _trigger_count(text: str, trigger: str) -> int:
|
||||
return len(re.findall(rf"(?<![a-z0-9_]){re.escape(trigger)}(?![a-z0-9_])", text, flags=re.IGNORECASE))
|
||||
|
||||
|
||||
def _formatter_trigger_issues(name: str, prompts: dict[str, str]) -> list[str]:
|
||||
issues: list[str] = []
|
||||
krea_prompt = prompts["krea"]
|
||||
sdxl_prompt = prompts["sdxl"]
|
||||
caption_text = prompts["caption"]
|
||||
|
||||
for trigger in (TRIGGER, SDXL_TRIGGER, OLD_TRIGGER):
|
||||
if _trigger_count(krea_prompt, trigger):
|
||||
issues.append(f"{name}.krea_prompt: unexpected_trigger:{trigger}")
|
||||
|
||||
sdxl_count = _trigger_count(sdxl_prompt, SDXL_TRIGGER)
|
||||
if sdxl_count != 1:
|
||||
issues.append(f"{name}.sdxl_prompt: trigger_count:{SDXL_TRIGGER}:{sdxl_count}")
|
||||
for trigger in (TRIGGER, OLD_TRIGGER):
|
||||
if _trigger_count(sdxl_prompt, trigger):
|
||||
issues.append(f"{name}.sdxl_prompt: unexpected_trigger:{trigger}")
|
||||
|
||||
caption_count = _trigger_count(caption_text, TRIGGER)
|
||||
if caption_count != 1:
|
||||
issues.append(f"{name}.caption: trigger_count:{TRIGGER}:{caption_count}")
|
||||
for trigger in (SDXL_TRIGGER, OLD_TRIGGER):
|
||||
if _trigger_count(caption_text, trigger):
|
||||
issues.append(f"{name}.caption: unexpected_trigger:{trigger}")
|
||||
return issues
|
||||
|
||||
|
||||
def _formatter_expectation_issues(
|
||||
name: str,
|
||||
formats: dict[str, Any],
|
||||
@@ -463,12 +513,18 @@ def _formatter_issues(
|
||||
krea_prompt = str(krea.get("krea_prompt") or "")
|
||||
sdxl_prompt = str(sdxl.get("sdxl_prompt") or "")
|
||||
caption_text = str(caption.get("natural_caption") or "")
|
||||
prompts = {
|
||||
"krea": krea_prompt,
|
||||
"sdxl": sdxl_prompt,
|
||||
"caption": caption_text,
|
||||
}
|
||||
for label, value in (
|
||||
(f"{name}.krea_prompt", krea_prompt),
|
||||
(f"{name}.sdxl_prompt", sdxl_prompt),
|
||||
(f"{name}.caption", caption_text),
|
||||
):
|
||||
issues.extend(_text_issues(label, value, min_len=20))
|
||||
issues.extend(_formatter_trigger_issues(name, prompts))
|
||||
|
||||
for formatter_name, method in (
|
||||
("krea", krea.get("method")),
|
||||
@@ -487,10 +543,12 @@ def _formatter_issues(
|
||||
if duplicates:
|
||||
issues.append(f"{label}: duplicate_comma_items:{duplicates[:5]}")
|
||||
|
||||
for formatter_name, prompt in prompts.items():
|
||||
lower_prompt = prompt.lower()
|
||||
for leak in FORMATTER_LABEL_LEAKS:
|
||||
if leak in lower_prompt:
|
||||
issues.append(f"{name}.{formatter_name}: leaked_label:{leak}")
|
||||
lower_krea = krea_prompt.lower()
|
||||
for leak in FORMATTER_LABEL_LEAKS:
|
||||
if leak in lower_krea:
|
||||
issues.append(f"{name}.krea_prompt: leaked_label:{leak}")
|
||||
for noise in HARDCORE_NOISE_TERMS:
|
||||
if noise in lower_krea:
|
||||
issues.append(f"{name}.krea_prompt: hardcore_noise:{noise}")
|
||||
|
||||
Reference in New Issue
Block a user