From 030a1255e1286fcfb6f86057fc322c6f1b7f44f0 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sat, 27 Jun 2026 22:23:49 +0200 Subject: [PATCH] Split SDXL sentence-boundary tags --- sdxl_tag_policy.py | 1 + tools/prompt_route_simulation.py | 7 +++++++ tools/prompt_smoke.py | 8 ++++++++ 3 files changed, 16 insertions(+) diff --git a/sdxl_tag_policy.py b/sdxl_tag_policy.py index 636810e..1db7041 100644 --- a/sdxl_tag_policy.py +++ b/sdxl_tag_policy.py @@ -63,6 +63,7 @@ def split_tag_text(text: Any) -> list[str]: text, flags=re.IGNORECASE, ) + text = re.sub(r"(?<=[A-Za-z0-9)])\.\s+(?=[A-Za-z])", ", ", text) text = re.sub(r"(? list[st return [] +def _sdxl_sentence_boundary_tag_issues(name: str, sdxl_prompt: str) -> list[str]: + if re.search(r"\b[a-z][^,.]{0,80}\.\s+(?:woman|man|the|keep|use)\b", sdxl_prompt, flags=re.IGNORECASE): + return [f"{name}.sdxl_prompt: sentence_boundary_tag"] + return [] + + def _trace_dict(formatter_name: str, payload: dict[str, Any]) -> tuple[dict[str, Any], str]: trace_text = str(payload.get("route_trace_json") or "") if not trace_text: @@ -734,6 +740,7 @@ def _formatter_issues( issues.extend(_sdxl_expression_label_issues(name, sdxl_prompt)) issues.extend(_sdxl_hyphen_fragment_issues(name, sdxl_prompt)) issues.extend(_sdxl_character_pair_fragment_issues(name, sdxl_prompt)) + issues.extend(_sdxl_sentence_boundary_tag_issues(name, sdxl_prompt)) for label, value in ( (f"{name}.krea_negative", krea.get("negative_prompt")), diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index f0e5ad3..3eb8a64 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -4683,6 +4683,14 @@ def smoke_sdxl_tag_policy() -> None: subject_pair_tags = sdxl_tag_policy.split_tag_text("Woman A, Man A are mid-transition with hands on hips") _expect("woman and man are mid-transition" in subject_pair_tags, "SDXL tag splitter broke paired character clause") _expect("woman" not in subject_pair_tags and "man are mid-transition" not in subject_pair_tags, "SDXL tag splitter emitted broken paired character fragments") + sentence_boundary_tags = sdxl_tag_policy.split_tag_text("keep hands on hips, breasts, thighs. Man watches close") + sentence_boundary_tags_lower = [tag.lower() for tag in sentence_boundary_tags] + _expect("thighs" in sentence_boundary_tags, "SDXL tag splitter lost pre-period tag") + _expect("man watches close" in sentence_boundary_tags_lower, "SDXL tag splitter did not split sentence-boundary tag") + _expect( + "thighs. man watches close" not in sentence_boundary_tags_lower, + "SDXL tag splitter kept sentence-boundary tag fragment", + ) _expect( sdxl_formatter._camera_tags(row) == sdxl_tag_policy.camera_tags(row), "SDXL formatter camera helper should delegate to sdxl_tag_policy",