ComfyUI-Ethanfel-Prompt-Bui…/prompt_hygiene.py

from __future__ import annotations

import re
from typing import Any, Iterable


EMPTY_FIELD_LABELS = (
    "Ages",
    "Body types",
    "Cast",
    "Cast descriptors",
    "Characters",
    "Scene",
    "Setting",
    "Pose",
    "Sexual pose",
    "Sexual scene",
    "Facial expression",
    "Facial expressions",
    "Clothing",
    "Erotic outfit",
    "Prop/detail",
    "Composition",
    "Role graph",
    "Camera",
    "Camera control",
    "Camera priority",
    "Use",
    "Avoid",
)


def clean_spacing(value: Any) -> str:
    text = "" if value is None else str(value)
    text = text.replace("\n", " ")
    text = re.sub(r"\s+", " ", text).strip()
    text = re.sub(r"\s+([,.;:])", r"\1", text)
    text = re.sub(r"([,;:]){2,}", r"\1", text)
    text = re.sub(r"\.\s*\.", ".", text)
    text = re.sub(r",\s*\.", ".", text)
    text = re.sub(r":\s*\.", ".", text)
    text = re.sub(r";\s*\.", ".", text)
    text = re.sub(r"\(\s+", "(", text)
    text = re.sub(r"\s+\)", ")", text)
    return text.strip()


def _strip_empty_fields(text: str) -> str:
    if not text:
        return ""
    labels = "|".join(re.escape(label) for label in EMPTY_FIELD_LABELS)
    text = re.sub(rf"\b(?:{labels})\s*:\s*[.,;]", "", text, flags=re.IGNORECASE)
    text = re.sub(rf"\b(?:{labels}):\s*(?=\.|,|;|$)", "", text, flags=re.IGNORECASE)
    text = re.sub(rf"\b(?:{labels})\.(?=\s|$)", "", text, flags=re.IGNORECASE)
    text = re.sub(rf"\b(?:{labels}):\s*(?:none|null|n/a)\b[.,;]?", "", text, flags=re.IGNORECASE)
    return clean_spacing(text)


def _drop_dangling_connectors(text: str) -> str:
    text = re.sub(r"\b(?:with|and|or|while|featuring)\s*([,.;])", r"\1", text, flags=re.IGNORECASE)
    text = re.sub(r"([,.;])\s*(?:with|and|or|while|featuring)\s*([,.;])", r"\1", text, flags=re.IGNORECASE)
    text = re.sub(r"\bwith\s*,", "", text, flags=re.IGNORECASE)
    text = re.sub(r",\s*and\s*\.", ".", text, flags=re.IGNORECASE)
    return clean_spacing(text)


def _sentence_key(text: str, triggers: Iterable[str] = ()) -> str:
    key_text = text
    for trigger in triggers:
        trigger = str(trigger or "").strip()
        if trigger:
            key_text = re.sub(rf"^{re.escape(trigger)}\s*[,.;]\s*", "", key_text, flags=re.IGNORECASE)
    return re.sub(r"\W+", " ", key_text.lower()).strip()


def _dedupe_adjacent_sentences(text: str, triggers: Iterable[str] = ()) -> str:
    parts = [part.strip() for part in re.split(r"(?<=[.!?])\s+", text) if part.strip()]
    deduped: list[str] = []
    previous = ""
    for part in parts:
        key = _sentence_key(part, triggers)
        if key and key != previous:
            deduped.append(part)
            previous = key
    return " ".join(deduped)


def _dedupe_labeled_sentences(text: str) -> str:
    parts = [part.strip() for part in re.split(r"(?<=[.!?])\s+", text) if part.strip()]
    seen: set[tuple[str, str]] = set()
    deduped: list[str] = []
    for part in parts:
        match = re.match(r"^([A-Za-z][A-Za-z /_-]{1,40}):\s*(.+)$", part)
        if not match:
            deduped.append(part)
            continue
        key = (match.group(1).strip().lower(), re.sub(r"\W+", " ", match.group(2).lower()).strip())
        if key not in seen:
            deduped.append(part)
            seen.add(key)
    return " ".join(deduped)


def _trigger_prefix_key(text: str, triggers: Iterable[str]) -> str:
    lowered = text.lower().strip()
    for trigger in triggers:
        trigger = str(trigger or "").strip()
        if trigger and lowered.startswith(trigger.lower()):
            return trigger
    return ""


def _dedupe_trigger_prefix(text: str, triggers: Iterable[str]) -> str:
    text = clean_spacing(text)
    trigger = _trigger_prefix_key(text, triggers)
    if not trigger:
        return text
    pattern = rf"^(?:{re.escape(trigger)}\s*[,.;]\s*)+"
    return f"{trigger}, {re.sub(pattern, '', text, flags=re.IGNORECASE).strip(' ,.;')}"


def _split_comma_items(text: str) -> list[str]:
    return [part.strip(" ,.;") for part in re.split(r"\s*[,;]\s*", clean_spacing(text)) if part.strip(" ,.;")]


def dedupe_comma_list(text: Any) -> str:
    items: list[str] = []
    seen: set[str] = set()
    for item in _split_comma_items(str(text or "")):
        key = re.sub(r"\W+", " ", item.lower()).strip()
        if key and key not in seen:
            items.append(item)
            seen.add(key)
    return ", ".join(items)


def sanitize_prose_text(value: Any, triggers: Iterable[str] = ()) -> str:
    text = clean_spacing(value)
    if not text:
        return ""
    text = _strip_empty_fields(text)
    text = _drop_dangling_connectors(text)
    text = _dedupe_labeled_sentences(text)
    text = _dedupe_trigger_prefix(text, triggers)
    text = _dedupe_adjacent_sentences(text, triggers)
    return clean_spacing(text).strip(" ,;")


def sanitize_prompt_text(value: Any, triggers: Iterable[str] = ()) -> str:
    return sanitize_prose_text(value, triggers=triggers)


def sanitize_caption_text(value: Any, triggers: Iterable[str] = ()) -> str:
    return sanitize_prose_text(value, triggers=triggers)


def sanitize_tag_prompt(value: Any, triggers: Iterable[str] = ()) -> str:
    text = clean_spacing(value)
    if not text:
        return ""
    trigger = _trigger_prefix_key(text, triggers)
    if trigger:
        text = re.sub(rf"^(?:{re.escape(trigger)}\s*[,;]\s*)+", "", text, flags=re.IGNORECASE).strip(" ,;")
        return f"{trigger}, {dedupe_comma_list(text)}" if text else trigger
    return dedupe_comma_list(text)


def sanitize_negative_text(value: Any) -> str:
    return dedupe_comma_list(value)