ComfyUI-Ethanfel-Prompt-Bui…/tools/prompt_route_simulation.py

#!/usr/bin/env python3
"""Run representative prompt-route simulations and report quality issues.

This is a diagnostic tool, not a golden snapshot test. It builds a small set of
metadata rows/pairs, sends them through the Krea2, SDXL, and caption routes, and
reports route/noise/seed-control problems in a JSON-friendly structure.
"""

from __future__ import annotations

import argparse
import json
import re
import sys
from pathlib import Path
from typing import Any


ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))

import caption_naturalizer  # noqa: E402
import krea_formatter  # noqa: E402
import prompt_builder as pb  # noqa: E402
import sdxl_formatter  # noqa: E402


TRIGGER = "sxcppnl7"
SDXL_TRIGGER = "mythp0rt"

SOFTCORE_NOISE_TERMS = (
    "the image focuses",
    "softcore version",
    "non-explicit teaser setup",
    "no sex act",
    "genital contact",
    "keep the softcore version",
    "focused on woman a alone",
)

FORMATTER_LABEL_LEAKS = (
    "role graph:",
    "sexual scene:",
    "cast descriptors:",
    "shared cast descriptors:",
)

HARDCORE_NOISE_TERMS = (
    "softcore visual reference",
    "the same visibly adult",
    "the scene contains",
)


def _json(value: Any) -> str:
    return json.dumps(value, ensure_ascii=True, sort_keys=True)


def _clean_key(value: Any) -> str:
    return re.sub(r"[^a-z0-9]+", " ", str(value or "").lower()).strip()


def _character_cast(*, pov_man: bool = False) -> str:
    cast = pb.build_character_slot_json(
        subject_type="woman",
        label="A",
        age="25-year-old adult",
        ethnicity="western_european",
        figure="balanced",
        body="slim busty",
        hair_color="blonde",
        hair_length="long",
        hair_style="loose_waves",
        descriptor_detail="full",
        expression_intensity=0.55,
        softcore_expression_intensity=0.35,
        hardcore_expression_intensity=0.75,
    )["character_cast"]
    return pb.build_character_slot_json(
        subject_type="man",
        label="A",
        age="40-year-old adult",
        ethnicity="western_european",
        body="average",
        descriptor_detail="compact",
        expression_intensity=0.45,
        softcore_expression_intensity=0.25,
        hardcore_expression_intensity=0.65,
        presence_mode="pov" if pov_man else "visible",
        character_cast=cast,
    )["character_cast"]


def _coworking_location_config() -> str:
    return pb.build_location_pool_json(
        enabled=True,
        combine_mode="replace",
        preset="custom_only",
        custom_locations=(
            "coworking_sim: coworking lounge with tall windows, warm desks, "
            "laptop tables, glass partition seams, repeated desk rows, plants, "
            "and soft shared-office depth"
        ),
    )


def _orbit_camera(horizontal_angle: int = 45, vertical_angle: int = 0, zoom: float = 6.0) -> str:
    return pb.build_camera_orbit_config_json(
        enabled=True,
        camera_mode="standard",
        horizontal_angle=horizontal_angle,
        vertical_angle=vertical_angle,
        zoom=zoom,
        framing="from_zoom",
        subject_focus="action",
        lens="auto",
        orientation="auto",
        phone_visibility="auto",
        priority="soft_hint",
        camera_detail="compact",
        include_degrees=True,
    )


def _position_filter(focus: str, family: str, positions: list[str] | tuple[str, ...] | str) -> str:
    position_config = pb.build_hardcore_position_pool_json(
        combine_mode="replace",
        family=family,
        selected_positions=positions,
    )
    kwargs = {
        "allow_toys": False,
        "allow_double": False,
        "allow_penetration": focus in ("penetration_only", "keep_pool"),
        "allow_foreplay": focus in ("foreplay_only", "keep_pool"),
        "allow_interaction": focus in ("interaction_only", "keep_pool"),
        "allow_manual": focus in ("manual_only", "keep_pool"),
        "allow_oral": focus in ("oral_only", "keep_pool"),
        "allow_outercourse": focus in ("outercourse_only", "keep_pool"),
        "allow_anal": focus in ("anal_only", "keep_pool"),
        "allow_climax": focus in ("climax_only", "keep_pool"),
    }
    return pb.build_hardcore_action_filter_json(
        hardcore_position_config=position_config,
        focus=focus,
        **kwargs,
    )


def _insta_options() -> str:
    return pb.build_insta_of_options_json(
        softcore_cast="same_as_hardcore",
        hardcore_cast="couple",
        hardcore_women_count=1,
        hardcore_men_count=1,
        softcore_level="lingerie_tease",
        hardcore_level="hardcore",
        softcore_expression_enabled=True,
        hardcore_expression_enabled=True,
        softcore_expression_intensity=0.35,
        hardcore_expression_intensity=0.75,
        platform_style="hybrid",
        continuity="same_creator_same_room",
        hardcore_clothing_continuity="explicit_nude",
        softcore_camera_mode="from_camera_config",
        hardcore_camera_mode="from_camera_config",
        camera_detail="compact",
        hardcore_detail_density="balanced",
    )


def _format_metadata(metadata: dict[str, Any], target: str) -> dict[str, Any]:
    metadata_json = _json(metadata)
    krea = krea_formatter.format_krea2_prompt(
        "",
        metadata_json=metadata_json,
        input_hint="metadata_json",
        target=target,
        detail_level="balanced",
        style_mode="preserve",
    )
    sdxl = sdxl_formatter.format_sdxl_prompt(
        "",
        metadata_json=metadata_json,
        input_hint="metadata_json",
        target=target,
        formatter_profile="manual_controls",
        style_preset="flat_vector_pony",
        quality_preset="pony_high",
        trigger=SDXL_TRIGGER,
        prepend_trigger=True,
        preserve_trigger=False,
        nude_weight=1.29,
    )
    caption, caption_method, caption_trace = caption_naturalizer.naturalize_caption_with_trace(
        "",
        metadata_json=metadata_json,
        input_hint="metadata_json",
        target=target,
        trigger=TRIGGER,
        include_trigger=True,
        detail_level="balanced",
        style_policy="drop_style_tail",
        caption_profile="training_dense",
    )
    return {
        "krea": krea,
        "sdxl": sdxl,
        "caption": {
            "natural_caption": caption,
            "method": caption_method,
            "route_trace_json": caption_trace,
        },
    }


def _duplicate_comma_items(value: Any) -> list[str]:
    items = [_clean_key(part) for part in str(value or "").split(",")]
    items = [part for part in items if part]
    return sorted({part for part in items if items.count(part) > 1})


def _text_issues(label: str, value: Any, *, min_len: int = 8) -> list[str]:
    text = str(value or "")
    issues: list[str] = []
    if len(text.strip()) < min_len:
        issues.append(f"{label}: empty_or_short")
    if "None" in text:
        issues.append(f"{label}: leaked_None")
    if "  " in text:
        issues.append(f"{label}: repeated_spaces")
    if " ," in text or " ." in text:
        issues.append(f"{label}: bad_punctuation_spacing")
    return issues


def _formatter_issues(name: str, formats: dict[str, Any], *, is_pov: bool = False) -> list[str]:
    issues: list[str] = []
    krea = formats["krea"]
    sdxl = formats["sdxl"]
    caption = formats["caption"]

    krea_prompt = str(krea.get("krea_prompt") or "")
    sdxl_prompt = str(sdxl.get("sdxl_prompt") or "")
    caption_text = str(caption.get("natural_caption") or "")
    for label, value in (
        (f"{name}.krea_prompt", krea_prompt),
        (f"{name}.sdxl_prompt", sdxl_prompt),
        (f"{name}.caption", caption_text),
    ):
        issues.extend(_text_issues(label, value, min_len=20))

    for formatter_name, method in (
        ("krea", krea.get("method")),
        ("sdxl", sdxl.get("method")),
        ("caption", caption.get("method")),
    ):
        if "metadata" not in str(method or ""):
            issues.append(f"{name}.{formatter_name}: not_metadata_route:{method}")

    for label, value in (
        (f"{name}.krea_negative", krea.get("negative_prompt")),
        (f"{name}.sdxl_negative", sdxl.get("negative_prompt")),
    ):
        duplicates = _duplicate_comma_items(value)
        if duplicates:
            issues.append(f"{label}: duplicate_comma_items:{duplicates[:5]}")

    lower_krea = krea_prompt.lower()
    for leak in FORMATTER_LABEL_LEAKS:
        if leak in lower_krea:
            issues.append(f"{name}.krea_prompt: leaked_label:{leak}")
    for noise in HARDCORE_NOISE_TERMS:
        if noise in lower_krea:
            issues.append(f"{name}.krea_prompt: hardcore_noise:{noise}")
    if is_pov:
        if "viewer" not in lower_krea or "first-person" not in lower_krea:
            issues.append(f"{name}.krea_prompt: pov_wording_missing")
        if "camera:" in krea_prompt:
            issues.append(f"{name}.krea_prompt: pov_emitted_third_person_camera")
    return issues


def _softcore_issues(name: str, text: Any) -> list[str]:
    lower = str(text or "").lower()
    return [f"{name}: softcore_noise:{term}" for term in SOFTCORE_NOISE_TERMS if term in lower]


def _row_summary(row: dict[str, Any]) -> dict[str, Any]:
    return {
        "category": row.get("main_category"),
        "subcategory": row.get("subcategory"),
        "scene": row.get("scene"),
        "scene_profile": row.get("scene_camera_profile_key"),
        "action_family": row.get("action_family"),
        "position_family": row.get("position_family"),
        "position_key": row.get("position_key"),
        "position_keys": row.get("position_keys") or [],
        "pov_labels": row.get("pov_character_labels") or [],
    }


def _route_metadata_issues(name: str, row: dict[str, Any]) -> list[str]:
    config = row.get("hardcore_position_config") if isinstance(row.get("hardcore_position_config"), dict) else {}
    configured = [str(value) for value in (config.get("positions") or [])]
    if not configured:
        return []
    available = set(str(value) for value in (row.get("position_keys") or []))
    selected_available = [value for value in configured if value in available]
    if selected_available and row.get("position_key") not in selected_available:
        return [
            f"{name}: selected_position_not_primary:{row.get('position_key')} not in {selected_available}"
        ]
    return []


def _case_report(
    name: str,
    metadata: dict[str, Any],
    *,
    target: str,
    include_prompts: bool,
    is_pov: bool = False,
) -> dict[str, Any]:
    formats = _format_metadata(metadata, target)
    issues = _formatter_issues(name, formats, is_pov=is_pov)
    issues.extend(_route_metadata_issues(name, metadata))
    if target == "softcore":
        issues.extend(_softcore_issues(f"{name}.krea_prompt", formats["krea"].get("krea_prompt")))
    report = {
        "name": name,
        "target": target,
        "summary": _row_summary(metadata),
        "methods": {
            "krea": formats["krea"].get("method"),
            "sdxl": formats["sdxl"].get("method"),
            "caption": formats["caption"].get("method"),
        },
        "issues": issues,
    }
    if include_prompts:
        report["prompts"] = {
            "raw": metadata.get("prompt", ""),
            "krea": formats["krea"].get("krea_prompt", ""),
            "sdxl": formats["sdxl"].get("sdxl_prompt", ""),
            "caption": formats["caption"].get("natural_caption", ""),
        }
    return report


def _pair_reports(name: str, pair: dict[str, Any], *, include_prompts: bool) -> list[dict[str, Any]]:
    soft_row = dict(pair.get("softcore_row") or {})
    hard_row = dict(pair.get("hardcore_row") or {})
    soft_formats = _format_metadata(pair, "softcore")
    hard_formats = _format_metadata(pair, "hardcore")
    soft_issues = _formatter_issues(f"{name}.softcore", soft_formats)
    soft_issues.extend(_route_metadata_issues(f"{name}.softcore", soft_row))
    soft_issues.extend(_softcore_issues(f"{name}.softcore.krea_prompt", soft_formats["krea"].get("krea_prompt")))
    hard_is_pov = bool(hard_row.get("pov_character_labels"))
    hard_issues = _formatter_issues(f"{name}.hardcore", hard_formats, is_pov=hard_is_pov)
    hard_issues.extend(_route_metadata_issues(f"{name}.hardcore", hard_row))
    reports = [
        {
            "name": f"{name}.softcore",
            "target": "softcore",
            "summary": _row_summary(soft_row),
            "methods": {
                "krea": soft_formats["krea"].get("method"),
                "sdxl": soft_formats["sdxl"].get("method"),
                "caption": soft_formats["caption"].get("method"),
            },
            "issues": soft_issues,
        },
        {
            "name": f"{name}.hardcore",
            "target": "hardcore",
            "summary": _row_summary(hard_row),
            "methods": {
                "krea": hard_formats["krea"].get("method"),
                "sdxl": hard_formats["sdxl"].get("method"),
                "caption": hard_formats["caption"].get("method"),
            },
            "issues": hard_issues,
        },
    ]
    if include_prompts:
        reports[0]["prompts"] = {
            "raw": pair.get("softcore_prompt", ""),
            "krea": soft_formats["krea"].get("krea_softcore_prompt", "") or soft_formats["krea"].get("krea_prompt", ""),
            "sdxl": soft_formats["sdxl"].get("sdxl_softcore_prompt", "") or soft_formats["sdxl"].get("sdxl_prompt", ""),
            "caption": soft_formats["caption"].get("natural_caption", ""),
        }
        reports[1]["prompts"] = {
            "raw": pair.get("hardcore_prompt", ""),
            "krea": hard_formats["krea"].get("krea_hardcore_prompt", "") or hard_formats["krea"].get("krea_prompt", ""),
            "sdxl": hard_formats["sdxl"].get("sdxl_hardcore_prompt", "") or hard_formats["sdxl"].get("sdxl_prompt", ""),
            "caption": hard_formats["caption"].get("natural_caption", ""),
        }
    return reports


def _regular_single_case(seed: int) -> dict[str, Any]:
    return pb.build_prompt_from_configs(
        row_number=1,
        start_index=1,
        seed=seed,
        category_config=pb.build_category_config_json("Casual clothes", "Casual clothes / Streetwear"),
        cast_config=pb.build_cast_config_json("solo_woman", 1, 0),
        seed_config=pb.build_seed_lock_config_json(base_seed=seed),
        camera_config=_orbit_camera(horizontal_angle=45, vertical_angle=0, zoom=5.5),
        character_cast=_character_cast(),
        location_config=_coworking_location_config(),
        extra_positive="simulation marker",
    )


def _insta_pair_case(seed: int, *, pov: bool, position: str, focus: str, family: str) -> dict[str, Any]:
    return pb.build_insta_of_pair(
        row_number=1,
        start_index=1,
        seed=seed,
        ethnicity="any",
        figure="random",
        no_plus_women=False,
        no_black=False,
        trigger=TRIGGER,
        prepend_trigger_to_prompt=True,
        seed_config=pb.build_seed_lock_config_json(base_seed=seed),
        options_json=_insta_options(),
        character_cast=_character_cast(pov_man=pov),
        hardcore_position_config=_position_filter(focus, family, [position]),
        location_config=_coworking_location_config(),
        camera_config=_orbit_camera(horizontal_angle=45, vertical_angle=0, zoom=6.0),
        softcore_camera_config=_orbit_camera(horizontal_angle=45, vertical_angle=0, zoom=5.5),
        hardcore_camera_config=_orbit_camera(horizontal_angle=68 if pov else 135, vertical_angle=20, zoom=7.5),
    )


def _seed_axis_check(seed: int) -> dict[str, Any]:
    base = pb.build_prompt(
        category="Hardcore sexual poses",
        subcategory="Penetrative sex",
        row_number=1,
        start_index=1,
        seed=seed,
        clothing="random",
        ethnicity="any",
        poses="random",
        backside_bias=0.0,
        figure="random",
        no_plus_women=False,
        no_black=False,
        minimal_clothing_ratio=-1,
        standard_pose_ratio=-1,
        trigger=TRIGGER,
        prepend_trigger_to_prompt=True,
        extra_positive="",
        extra_negative="",
        seed_config=pb.build_seed_lock_config_json(base_seed=seed),
        women_count=1,
        men_count=1,
        character_cast=_character_cast(),
        hardcore_position_config=_position_filter("penetration_only", "penetration", ["missionary", "doggy", "cowgirl"]),
        location_config=_coworking_location_config(),
    )
    changed = False
    mismatches: list[str] = []
    for reroll_seed in range(seed + 1, seed + 10):
        rerolled = pb.build_prompt(
            category="Hardcore sexual poses",
            subcategory="Penetrative sex",
            row_number=1,
            start_index=1,
            seed=seed,
            clothing="random",
            ethnicity="any",
            poses="random",
            backside_bias=0.0,
            figure="random",
            no_plus_women=False,
            no_black=False,
            minimal_clothing_ratio=-1,
            standard_pose_ratio=-1,
            trigger=TRIGGER,
            prepend_trigger_to_prompt=True,
            extra_positive="",
            extra_negative="",
            seed_config=pb.build_seed_lock_config_json(base_seed=seed, reroll_axis="pose", reroll_seed=reroll_seed),
            women_count=1,
            men_count=1,
            character_cast=_character_cast(),
            hardcore_position_config=_position_filter("penetration_only", "penetration", ["missionary", "doggy", "cowgirl"]),
            location_config=_coworking_location_config(),
        )
        if rerolled.get("cast_descriptor_text") != base.get("cast_descriptor_text"):
            mismatches.append(f"cast changed on pose reroll {reroll_seed}")
        if rerolled.get("scene_text") != base.get("scene_text"):
            mismatches.append(f"scene changed on pose reroll {reroll_seed}")
        if (
            rerolled.get("position_key") != base.get("position_key")
            or rerolled.get("source_role_graph") != base.get("source_role_graph")
            or rerolled.get("item") != base.get("item")
        ):
            changed = True
            break
    issues = list(mismatches)
    if not changed:
        issues.append("pose reroll did not change pose/action metadata within 9 attempts")
    return {
        "name": "seed_axis.pose_reroll",
        "base": _row_summary(base),
        "changed": changed,
        "issues": issues,
    }


def run_simulation(seed: int = 3901, *, include_prompts: bool = False) -> dict[str, Any]:
    cases: list[dict[str, Any]] = []
    regular = _regular_single_case(seed)
    cases.append(_case_report("regular.single.casual", regular, target="single", include_prompts=include_prompts))
    penetration_pair = _insta_pair_case(seed + 1, pov=False, position="doggy", focus="penetration_only", family="penetration")
    cases.extend(_pair_reports("insta_pair.penetration", penetration_pair, include_prompts=include_prompts))
    pov_pair = _insta_pair_case(seed + 2, pov=True, position="penis_licking", focus="outercourse_only", family="outercourse")
    cases.extend(_pair_reports("insta_pair.pov_outercourse", pov_pair, include_prompts=include_prompts))
    axis_checks = [_seed_axis_check(seed + 3)]
    issues = [
        {"case": case["name"], "issue": issue}
        for case in cases
        for issue in case.get("issues", [])
    ]
    issues.extend(
        {"case": check["name"], "issue": issue}
        for check in axis_checks
        for issue in check.get("issues", [])
    )
    return {
        "summary": {
            "seed": seed,
            "cases": len(cases),
            "axis_checks": len(axis_checks),
            "issues": len(issues),
        },
        "issues": issues,
        "cases": cases,
        "axis_checks": axis_checks,
    }


def _print_text_report(report: dict[str, Any]) -> None:
    summary = report.get("summary") or {}
    print(
        f"Prompt route simulation: seed={summary.get('seed')} "
        f"cases={summary.get('cases')} axis_checks={summary.get('axis_checks')} issues={summary.get('issues')}"
    )
    for case in report.get("cases") or []:
        summary_text = case.get("summary") or {}
        route = ", ".join(f"{key}={value}" for key, value in summary_text.items() if value not in (None, "", []))
        print(f"- {case.get('name')} [{case.get('target')}]: {route}")
        for issue in case.get("issues") or []:
            print(f"  ISSUE {issue}")
    for check in report.get("axis_checks") or []:
        print(f"- {check.get('name')}: changed={check.get('changed')}")
        for issue in check.get("issues") or []:
            print(f"  ISSUE {issue}")


def main(argv: list[str] | None = None) -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--seed", type=int, default=3901, help="Base seed for deterministic simulations.")
    parser.add_argument("--json", action="store_true", help="Print the full JSON report.")
    parser.add_argument("--include-prompts", action="store_true", help="Include raw and formatted prompt text in the report.")
    parser.add_argument("--fail-on-issues", action="store_true", help="Exit with code 1 when any issue is reported.")
    args = parser.parse_args(argv)

    report = run_simulation(seed=args.seed, include_prompts=args.include_prompts)
    if args.json:
        print(json.dumps(report, ensure_ascii=True, indent=2, sort_keys=True))
    else:
        _print_text_report(report)
    return 1 if args.fail_on_issues and report.get("issues") else 0


if __name__ == "__main__":
    raise SystemExit(main())