From c6f0fc34af8872aec505af71f111e4e7099f42d8 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sat, 27 Jun 2026 19:02:04 +0200 Subject: [PATCH] Harden formatter prompt hygiene simulation --- formatter_input.py | 10 +++++ tools/prompt_route_simulation.py | 64 ++++++++++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/formatter_input.py b/formatter_input.py index 4f3426d..082527b 100644 --- a/formatter_input.py +++ b/formatter_input.py @@ -16,6 +16,10 @@ DEFAULT_PROMPT_FIELD_LABELS = ( "Cast", "Cast descriptors", "Characters", + "Softcore setup", + "Hardcore setup", + "POV participant", + "Body exposure", "Scene", "Setting", "Pose", @@ -24,7 +28,13 @@ DEFAULT_PROMPT_FIELD_LABELS = ( "Facial expression", "Facial expressions", "Clothing", + "Clothing state", + "Visual clothing state", + "Outfit", "Erotic outfit", + "Teaser outfit detail", + "Softcore visual reference", + "Visible remaining styling", "Prop/detail", "Composition", "Role graph", diff --git a/tools/prompt_route_simulation.py b/tools/prompt_route_simulation.py index 1f53185..4146cc1 100644 --- a/tools/prompt_route_simulation.py +++ b/tools/prompt_route_simulation.py @@ -29,6 +29,7 @@ import sdxl_tag_policy # noqa: E402 TRIGGER = "sxcppnl7" SDXL_TRIGGER = "mythp0rt" +OLD_TRIGGER = "sxcpinup_coloredpencil" SOFTCORE_NOISE_TERMS = ( "the image focuses", @@ -41,10 +42,29 @@ SOFTCORE_NOISE_TERMS = ( ) FORMATTER_LABEL_LEAKS = ( + "body exposure:", + "camera control:", + "characters:", + "clothing:", + "clothing state:", + "composition:", + "facial expression:", + "facial expressions:", + "hardcore setup:", + "outfit:", + "pose:", + "pov participant:", "role graph:", + "setting:", + "sexual pose:", "sexual scene:", + "softcore setup:", + "softcore visual reference:", "cast descriptors:", "shared cast descriptors:", + "teaser outfit detail:", + "visual clothing state:", + "visible remaining styling:", ) HARDCORE_NOISE_TERMS = ( @@ -367,6 +387,36 @@ def _contains_all(text: str, required: tuple[str, ...]) -> bool: return all(term.lower() in lower for term in required) +def _trigger_count(text: str, trigger: str) -> int: + return len(re.findall(rf"(? list[str]: + issues: list[str] = [] + krea_prompt = prompts["krea"] + sdxl_prompt = prompts["sdxl"] + caption_text = prompts["caption"] + + for trigger in (TRIGGER, SDXL_TRIGGER, OLD_TRIGGER): + if _trigger_count(krea_prompt, trigger): + issues.append(f"{name}.krea_prompt: unexpected_trigger:{trigger}") + + sdxl_count = _trigger_count(sdxl_prompt, SDXL_TRIGGER) + if sdxl_count != 1: + issues.append(f"{name}.sdxl_prompt: trigger_count:{SDXL_TRIGGER}:{sdxl_count}") + for trigger in (TRIGGER, OLD_TRIGGER): + if _trigger_count(sdxl_prompt, trigger): + issues.append(f"{name}.sdxl_prompt: unexpected_trigger:{trigger}") + + caption_count = _trigger_count(caption_text, TRIGGER) + if caption_count != 1: + issues.append(f"{name}.caption: trigger_count:{TRIGGER}:{caption_count}") + for trigger in (SDXL_TRIGGER, OLD_TRIGGER): + if _trigger_count(caption_text, trigger): + issues.append(f"{name}.caption: unexpected_trigger:{trigger}") + return issues + + def _formatter_expectation_issues( name: str, formats: dict[str, Any], @@ -463,12 +513,18 @@ def _formatter_issues( krea_prompt = str(krea.get("krea_prompt") or "") sdxl_prompt = str(sdxl.get("sdxl_prompt") or "") caption_text = str(caption.get("natural_caption") or "") + prompts = { + "krea": krea_prompt, + "sdxl": sdxl_prompt, + "caption": caption_text, + } for label, value in ( (f"{name}.krea_prompt", krea_prompt), (f"{name}.sdxl_prompt", sdxl_prompt), (f"{name}.caption", caption_text), ): issues.extend(_text_issues(label, value, min_len=20)) + issues.extend(_formatter_trigger_issues(name, prompts)) for formatter_name, method in ( ("krea", krea.get("method")), @@ -487,10 +543,12 @@ def _formatter_issues( if duplicates: issues.append(f"{label}: duplicate_comma_items:{duplicates[:5]}") + for formatter_name, prompt in prompts.items(): + lower_prompt = prompt.lower() + for leak in FORMATTER_LABEL_LEAKS: + if leak in lower_prompt: + issues.append(f"{name}.{formatter_name}: leaked_label:{leak}") lower_krea = krea_prompt.lower() - for leak in FORMATTER_LABEL_LEAKS: - if leak in lower_krea: - issues.append(f"{name}.krea_prompt: leaked_label:{leak}") for noise in HARDCORE_NOISE_TERMS: if noise in lower_krea: issues.append(f"{name}.krea_prompt: hardcore_noise:{noise}")