Harden formatter prompt hygiene simulation
This commit is contained in:
@@ -16,6 +16,10 @@ DEFAULT_PROMPT_FIELD_LABELS = (
|
|||||||
"Cast",
|
"Cast",
|
||||||
"Cast descriptors",
|
"Cast descriptors",
|
||||||
"Characters",
|
"Characters",
|
||||||
|
"Softcore setup",
|
||||||
|
"Hardcore setup",
|
||||||
|
"POV participant",
|
||||||
|
"Body exposure",
|
||||||
"Scene",
|
"Scene",
|
||||||
"Setting",
|
"Setting",
|
||||||
"Pose",
|
"Pose",
|
||||||
@@ -24,7 +28,13 @@ DEFAULT_PROMPT_FIELD_LABELS = (
|
|||||||
"Facial expression",
|
"Facial expression",
|
||||||
"Facial expressions",
|
"Facial expressions",
|
||||||
"Clothing",
|
"Clothing",
|
||||||
|
"Clothing state",
|
||||||
|
"Visual clothing state",
|
||||||
|
"Outfit",
|
||||||
"Erotic outfit",
|
"Erotic outfit",
|
||||||
|
"Teaser outfit detail",
|
||||||
|
"Softcore visual reference",
|
||||||
|
"Visible remaining styling",
|
||||||
"Prop/detail",
|
"Prop/detail",
|
||||||
"Composition",
|
"Composition",
|
||||||
"Role graph",
|
"Role graph",
|
||||||
|
|||||||
@@ -29,6 +29,7 @@ import sdxl_tag_policy # noqa: E402
|
|||||||
|
|
||||||
TRIGGER = "sxcppnl7"
|
TRIGGER = "sxcppnl7"
|
||||||
SDXL_TRIGGER = "mythp0rt"
|
SDXL_TRIGGER = "mythp0rt"
|
||||||
|
OLD_TRIGGER = "sxcpinup_coloredpencil"
|
||||||
|
|
||||||
SOFTCORE_NOISE_TERMS = (
|
SOFTCORE_NOISE_TERMS = (
|
||||||
"the image focuses",
|
"the image focuses",
|
||||||
@@ -41,10 +42,29 @@ SOFTCORE_NOISE_TERMS = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
FORMATTER_LABEL_LEAKS = (
|
FORMATTER_LABEL_LEAKS = (
|
||||||
|
"body exposure:",
|
||||||
|
"camera control:",
|
||||||
|
"characters:",
|
||||||
|
"clothing:",
|
||||||
|
"clothing state:",
|
||||||
|
"composition:",
|
||||||
|
"facial expression:",
|
||||||
|
"facial expressions:",
|
||||||
|
"hardcore setup:",
|
||||||
|
"outfit:",
|
||||||
|
"pose:",
|
||||||
|
"pov participant:",
|
||||||
"role graph:",
|
"role graph:",
|
||||||
|
"setting:",
|
||||||
|
"sexual pose:",
|
||||||
"sexual scene:",
|
"sexual scene:",
|
||||||
|
"softcore setup:",
|
||||||
|
"softcore visual reference:",
|
||||||
"cast descriptors:",
|
"cast descriptors:",
|
||||||
"shared cast descriptors:",
|
"shared cast descriptors:",
|
||||||
|
"teaser outfit detail:",
|
||||||
|
"visual clothing state:",
|
||||||
|
"visible remaining styling:",
|
||||||
)
|
)
|
||||||
|
|
||||||
HARDCORE_NOISE_TERMS = (
|
HARDCORE_NOISE_TERMS = (
|
||||||
@@ -367,6 +387,36 @@ def _contains_all(text: str, required: tuple[str, ...]) -> bool:
|
|||||||
return all(term.lower() in lower for term in required)
|
return all(term.lower() in lower for term in required)
|
||||||
|
|
||||||
|
|
||||||
|
def _trigger_count(text: str, trigger: str) -> int:
|
||||||
|
return len(re.findall(rf"(?<![a-z0-9_]){re.escape(trigger)}(?![a-z0-9_])", text, flags=re.IGNORECASE))
|
||||||
|
|
||||||
|
|
||||||
|
def _formatter_trigger_issues(name: str, prompts: dict[str, str]) -> list[str]:
|
||||||
|
issues: list[str] = []
|
||||||
|
krea_prompt = prompts["krea"]
|
||||||
|
sdxl_prompt = prompts["sdxl"]
|
||||||
|
caption_text = prompts["caption"]
|
||||||
|
|
||||||
|
for trigger in (TRIGGER, SDXL_TRIGGER, OLD_TRIGGER):
|
||||||
|
if _trigger_count(krea_prompt, trigger):
|
||||||
|
issues.append(f"{name}.krea_prompt: unexpected_trigger:{trigger}")
|
||||||
|
|
||||||
|
sdxl_count = _trigger_count(sdxl_prompt, SDXL_TRIGGER)
|
||||||
|
if sdxl_count != 1:
|
||||||
|
issues.append(f"{name}.sdxl_prompt: trigger_count:{SDXL_TRIGGER}:{sdxl_count}")
|
||||||
|
for trigger in (TRIGGER, OLD_TRIGGER):
|
||||||
|
if _trigger_count(sdxl_prompt, trigger):
|
||||||
|
issues.append(f"{name}.sdxl_prompt: unexpected_trigger:{trigger}")
|
||||||
|
|
||||||
|
caption_count = _trigger_count(caption_text, TRIGGER)
|
||||||
|
if caption_count != 1:
|
||||||
|
issues.append(f"{name}.caption: trigger_count:{TRIGGER}:{caption_count}")
|
||||||
|
for trigger in (SDXL_TRIGGER, OLD_TRIGGER):
|
||||||
|
if _trigger_count(caption_text, trigger):
|
||||||
|
issues.append(f"{name}.caption: unexpected_trigger:{trigger}")
|
||||||
|
return issues
|
||||||
|
|
||||||
|
|
||||||
def _formatter_expectation_issues(
|
def _formatter_expectation_issues(
|
||||||
name: str,
|
name: str,
|
||||||
formats: dict[str, Any],
|
formats: dict[str, Any],
|
||||||
@@ -463,12 +513,18 @@ def _formatter_issues(
|
|||||||
krea_prompt = str(krea.get("krea_prompt") or "")
|
krea_prompt = str(krea.get("krea_prompt") or "")
|
||||||
sdxl_prompt = str(sdxl.get("sdxl_prompt") or "")
|
sdxl_prompt = str(sdxl.get("sdxl_prompt") or "")
|
||||||
caption_text = str(caption.get("natural_caption") or "")
|
caption_text = str(caption.get("natural_caption") or "")
|
||||||
|
prompts = {
|
||||||
|
"krea": krea_prompt,
|
||||||
|
"sdxl": sdxl_prompt,
|
||||||
|
"caption": caption_text,
|
||||||
|
}
|
||||||
for label, value in (
|
for label, value in (
|
||||||
(f"{name}.krea_prompt", krea_prompt),
|
(f"{name}.krea_prompt", krea_prompt),
|
||||||
(f"{name}.sdxl_prompt", sdxl_prompt),
|
(f"{name}.sdxl_prompt", sdxl_prompt),
|
||||||
(f"{name}.caption", caption_text),
|
(f"{name}.caption", caption_text),
|
||||||
):
|
):
|
||||||
issues.extend(_text_issues(label, value, min_len=20))
|
issues.extend(_text_issues(label, value, min_len=20))
|
||||||
|
issues.extend(_formatter_trigger_issues(name, prompts))
|
||||||
|
|
||||||
for formatter_name, method in (
|
for formatter_name, method in (
|
||||||
("krea", krea.get("method")),
|
("krea", krea.get("method")),
|
||||||
@@ -487,10 +543,12 @@ def _formatter_issues(
|
|||||||
if duplicates:
|
if duplicates:
|
||||||
issues.append(f"{label}: duplicate_comma_items:{duplicates[:5]}")
|
issues.append(f"{label}: duplicate_comma_items:{duplicates[:5]}")
|
||||||
|
|
||||||
lower_krea = krea_prompt.lower()
|
for formatter_name, prompt in prompts.items():
|
||||||
|
lower_prompt = prompt.lower()
|
||||||
for leak in FORMATTER_LABEL_LEAKS:
|
for leak in FORMATTER_LABEL_LEAKS:
|
||||||
if leak in lower_krea:
|
if leak in lower_prompt:
|
||||||
issues.append(f"{name}.krea_prompt: leaked_label:{leak}")
|
issues.append(f"{name}.{formatter_name}: leaked_label:{leak}")
|
||||||
|
lower_krea = krea_prompt.lower()
|
||||||
for noise in HARDCORE_NOISE_TERMS:
|
for noise in HARDCORE_NOISE_TERMS:
|
||||||
if noise in lower_krea:
|
if noise in lower_krea:
|
||||||
issues.append(f"{name}.krea_prompt: hardcore_noise:{noise}")
|
issues.append(f"{name}.krea_prompt: hardcore_noise:{noise}")
|
||||||
|
|||||||
Reference in New Issue
Block a user