#!/usr/bin/env python3 """Run representative prompt-route simulations and report quality issues. This is a diagnostic tool, not a golden snapshot test. It builds a small set of metadata rows/pairs, sends them through the Krea2, SDXL, and caption routes, and reports route/noise/seed-control problems in a JSON-friendly structure. """ from __future__ import annotations import argparse import json import re import sys from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) import caption_naturalizer # noqa: E402 import krea_formatter # noqa: E402 import prompt_builder as pb # noqa: E402 import sdxl_formatter # noqa: E402 import sdxl_tag_policy # noqa: E402 TRIGGER = "sxcppnl7" SDXL_TRIGGER = "mythp0rt" SOFTCORE_NOISE_TERMS = ( "the image focuses", "softcore version", "non-explicit teaser setup", "no sex act", "genital contact", "keep the softcore version", "focused on woman a alone", ) FORMATTER_LABEL_LEAKS = ( "role graph:", "sexual scene:", "cast descriptors:", "shared cast descriptors:", ) HARDCORE_NOISE_TERMS = ( "softcore visual reference", "the same visibly adult", "the scene contains", ) def _json(value: Any) -> str: return json.dumps(value, ensure_ascii=True, sort_keys=True) def _clean_key(value: Any) -> str: return re.sub(r"[^a-z0-9]+", " ", str(value or "").lower()).strip() def _character_cast(*, pov_man: bool = False) -> str: cast = pb.build_character_slot_json( subject_type="woman", label="A", age="25-year-old adult", ethnicity="western_european", figure="balanced", body="slim busty", hair_color="blonde", hair_length="long", hair_style="loose_waves", descriptor_detail="full", expression_intensity=0.55, softcore_expression_intensity=0.35, hardcore_expression_intensity=0.75, )["character_cast"] return pb.build_character_slot_json( subject_type="man", label="A", age="40-year-old adult", ethnicity="western_european", body="average", descriptor_detail="compact", expression_intensity=0.45, softcore_expression_intensity=0.25, hardcore_expression_intensity=0.65, presence_mode="pov" if pov_man else "visible", character_cast=cast, )["character_cast"] def _coworking_location_config() -> str: return pb.build_location_pool_json( enabled=True, combine_mode="replace", preset="custom_only", custom_locations=( "coworking_sim: coworking lounge with tall windows, warm desks, " "laptop tables, glass partition seams, repeated desk rows, plants, " "and soft shared-office depth" ), ) def _orbit_camera(horizontal_angle: int = 45, vertical_angle: int = 0, zoom: float = 6.0) -> str: return pb.build_camera_orbit_config_json( enabled=True, camera_mode="standard", horizontal_angle=horizontal_angle, vertical_angle=vertical_angle, zoom=zoom, framing="from_zoom", subject_focus="action", lens="auto", orientation="auto", phone_visibility="auto", priority="soft_hint", camera_detail="compact", include_degrees=True, ) def _position_filter(focus: str, family: str, positions: list[str] | tuple[str, ...] | str) -> str: position_config = pb.build_hardcore_position_pool_json( combine_mode="replace", family=family, selected_positions=positions, ) kwargs = { "allow_toys": False, "allow_double": False, "allow_penetration": focus in ("penetration_only", "keep_pool"), "allow_foreplay": focus in ("foreplay_only", "keep_pool"), "allow_interaction": focus in ("interaction_only", "keep_pool"), "allow_manual": focus in ("manual_only", "keep_pool"), "allow_oral": focus in ("oral_only", "keep_pool"), "allow_outercourse": focus in ("outercourse_only", "keep_pool"), "allow_anal": focus in ("anal_only", "keep_pool"), "allow_climax": focus in ("climax_only", "keep_pool"), } return pb.build_hardcore_action_filter_json( hardcore_position_config=position_config, focus=focus, **kwargs, ) def _insta_options() -> str: return pb.build_insta_of_options_json( softcore_cast="same_as_hardcore", hardcore_cast="couple", hardcore_women_count=1, hardcore_men_count=1, softcore_level="lingerie_tease", hardcore_level="hardcore", softcore_expression_enabled=True, hardcore_expression_enabled=True, softcore_expression_intensity=0.35, hardcore_expression_intensity=0.75, platform_style="hybrid", continuity="same_creator_same_room", hardcore_clothing_continuity="explicit_nude", softcore_camera_mode="from_camera_config", hardcore_camera_mode="from_camera_config", camera_detail="compact", hardcore_detail_density="balanced", ) HARDCORE_ROUTE_CASES = ( { "name": "hardcore.single.oral", "subcategory": "Oral sex", "focus": "oral_only", "family": "oral", "expected_route": {"action_family": "oral", "position_family": "oral"}, "expected_terms": { "krea": ("mouth",), "sdxl": ("oral sex",), "caption": ("oral action",), }, }, { "name": "hardcore.single.manual", "subcategory": "Manual stimulation", "focus": "manual_only", "family": "manual", "expected_route": {"position_family": "manual"}, "expected_terms": { "krea": ("hand",), "sdxl": ("manual stimulation",), "caption": ("manual action",), }, }, { "name": "hardcore.single.outercourse", "subcategory": "Outercourse and genital teasing", "focus": "outercourse_only", "family": "outercourse", "expected_route": {"action_family": "outercourse", "position_family": "outercourse"}, "expected_terms": { "krea": ("penis",), "sdxl": ("outercourse",), "caption": ("non-penetrative action",), }, }, { "name": "hardcore.single.foreplay", "subcategory": "Foreplay and teasing", "focus": "foreplay_only", "family": "foreplay", "expected_route": {"action_family": "foreplay", "position_family": "foreplay"}, "expected_terms": { "krea": ("clothing",), "sdxl": ("foreplay",), "caption": ("foreplay action",), }, }, { "name": "hardcore.single.anal", "subcategory": "Anal and double penetration", "focus": "anal_only", "family": "anal", "expected_route": {"position_family": "anal"}, "expected_terms": { "krea": ("anal",), "sdxl": ("anal sex",), "caption": ("anal action",), }, }, { "name": "hardcore.single.climax", "subcategory": "Cumshot and climax", "focus": "climax_only", "family": "climax", "expected_route": {"action_family": "climax", "position_family": "climax"}, "expected_terms": { "krea": ("ejaculation",), "sdxl": ("climax", "semen"), "caption": ("climax action",), }, }, ) def _format_metadata(metadata: dict[str, Any], target: str) -> dict[str, Any]: metadata_json = _json(metadata) krea = krea_formatter.format_krea2_prompt( "", metadata_json=metadata_json, input_hint="metadata_json", target=target, detail_level="balanced", style_mode="preserve", ) sdxl = sdxl_formatter.format_sdxl_prompt( "", metadata_json=metadata_json, input_hint="metadata_json", target=target, formatter_profile="manual_controls", style_preset="flat_vector_pony", quality_preset="pony_high", trigger=SDXL_TRIGGER, prepend_trigger=True, preserve_trigger=False, nude_weight=1.29, ) caption, caption_method, caption_trace = caption_naturalizer.naturalize_caption_with_trace( "", metadata_json=metadata_json, input_hint="metadata_json", target=target, trigger=TRIGGER, include_trigger=True, detail_level="balanced", style_policy="drop_style_tail", caption_profile="training_dense", ) return { "krea": krea, "sdxl": sdxl, "caption": { "natural_caption": caption, "method": caption_method, "route_trace_json": caption_trace, }, } def _duplicate_comma_items(value: Any) -> list[str]: items = [_clean_key(part) for part in str(value or "").split(",")] items = [part for part in items if part] return sorted({part for part in items if items.count(part) > 1}) def _text_issues(label: str, value: Any, *, min_len: int = 8) -> list[str]: text = str(value or "") issues: list[str] = [] if len(text.strip()) < min_len: issues.append(f"{label}: empty_or_short") if "None" in text: issues.append(f"{label}: leaked_None") if " " in text: issues.append(f"{label}: repeated_spaces") if " ," in text or " ." in text: issues.append(f"{label}: bad_punctuation_spacing") return issues def _contains_all(text: str, required: tuple[str, ...]) -> bool: lower = text.lower() return all(term.lower() in lower for term in required) def _formatter_expectation_issues( name: str, formats: dict[str, Any], expected_terms: dict[str, tuple[str, ...]] | None, ) -> list[str]: if not expected_terms: return [] prompts = { "krea": str(formats["krea"].get("krea_prompt") or ""), "sdxl": str(formats["sdxl"].get("sdxl_prompt") or ""), "caption": str(formats["caption"].get("natural_caption") or ""), } issues: list[str] = [] for formatter_name, required in expected_terms.items(): if required and not _contains_all(prompts.get(formatter_name, ""), required): issues.append(f"{name}.{formatter_name}: missing_route_terms:{required}") return issues def _formatter_issues( name: str, formats: dict[str, Any], *, row: dict[str, Any] | None = None, expected_terms: dict[str, tuple[str, ...]] | None = None, is_pov: bool = False, ) -> list[str]: issues: list[str] = [] krea = formats["krea"] sdxl = formats["sdxl"] caption = formats["caption"] krea_prompt = str(krea.get("krea_prompt") or "") sdxl_prompt = str(sdxl.get("sdxl_prompt") or "") caption_text = str(caption.get("natural_caption") or "") for label, value in ( (f"{name}.krea_prompt", krea_prompt), (f"{name}.sdxl_prompt", sdxl_prompt), (f"{name}.caption", caption_text), ): issues.extend(_text_issues(label, value, min_len=20)) for formatter_name, method in ( ("krea", krea.get("method")), ("sdxl", sdxl.get("method")), ("caption", caption.get("method")), ): if "metadata" not in str(method or ""): issues.append(f"{name}.{formatter_name}: not_metadata_route:{method}") for label, value in ( (f"{name}.krea_negative", krea.get("negative_prompt")), (f"{name}.sdxl_negative", sdxl.get("negative_prompt")), ): duplicates = _duplicate_comma_items(value) if duplicates: issues.append(f"{label}: duplicate_comma_items:{duplicates[:5]}") lower_krea = krea_prompt.lower() for leak in FORMATTER_LABEL_LEAKS: if leak in lower_krea: issues.append(f"{name}.krea_prompt: leaked_label:{leak}") for noise in HARDCORE_NOISE_TERMS: if noise in lower_krea: issues.append(f"{name}.krea_prompt: hardcore_noise:{noise}") if isinstance(row, dict): sdxl_lower = f", {sdxl_prompt.lower()}, " for scope, family in (("action", row.get("action_family")), ("position", row.get("position_family"))): route_key = f"{scope}:{str(family or '').strip()}" for tag in sdxl_tag_policy.INCOMPATIBLE_ROUTE_TAGS.get(route_key, ()): if f", {tag}, " in sdxl_lower: issues.append(f"{name}.sdxl_prompt: incompatible_family_tag:{route_key}:{tag}") issues.extend(_formatter_expectation_issues(name, formats, expected_terms)) if is_pov: if "viewer" not in lower_krea or "first-person" not in lower_krea: issues.append(f"{name}.krea_prompt: pov_wording_missing") if "camera:" in krea_prompt: issues.append(f"{name}.krea_prompt: pov_emitted_third_person_camera") return issues def _softcore_issues(name: str, text: Any) -> list[str]: lower = str(text or "").lower() return [f"{name}: softcore_noise:{term}" for term in SOFTCORE_NOISE_TERMS if term in lower] def _row_summary(row: dict[str, Any]) -> dict[str, Any]: return { "category": row.get("main_category"), "subcategory": row.get("subcategory"), "scene": row.get("scene"), "scene_profile": row.get("scene_camera_profile_key"), "action_family": row.get("action_family"), "position_family": row.get("position_family"), "position_key": row.get("position_key"), "position_keys": row.get("position_keys") or [], "pov_labels": row.get("pov_character_labels") or [], } def _route_metadata_issues(name: str, row: dict[str, Any]) -> list[str]: config = row.get("hardcore_position_config") if isinstance(row.get("hardcore_position_config"), dict) else {} configured = [str(value) for value in (config.get("positions") or [])] if not configured: return [] available = set(str(value) for value in (row.get("position_keys") or [])) selected_available = [value for value in configured if value in available] if selected_available and row.get("position_key") not in selected_available: return [ f"{name}: selected_position_not_primary:{row.get('position_key')} not in {selected_available}" ] return [] def _route_expectation_issues(name: str, row: dict[str, Any], expected_route: dict[str, Any] | None) -> list[str]: if not expected_route: return [] issues: list[str] = [] for key in ("action_family", "position_family", "position_key"): expected = expected_route.get(key) if expected and row.get(key) != expected: issues.append(f"{name}: {key}_mismatch:{row.get(key)} != {expected}") for key, expected_values in (("position_keys", expected_route.get("position_keys") or ()),): current = set(str(value) for value in (row.get(key) or [])) for value in expected_values: if str(value) not in current: issues.append(f"{name}: missing_{key}:{value}") return issues def _case_report( name: str, metadata: dict[str, Any], *, target: str, include_prompts: bool, expected_route: dict[str, Any] | None = None, expected_terms: dict[str, tuple[str, ...]] | None = None, is_pov: bool = False, ) -> dict[str, Any]: formats = _format_metadata(metadata, target) issues = _formatter_issues(name, formats, row=metadata, expected_terms=expected_terms, is_pov=is_pov) issues.extend(_route_metadata_issues(name, metadata)) issues.extend(_route_expectation_issues(name, metadata, expected_route)) if target == "softcore": issues.extend(_softcore_issues(f"{name}.krea_prompt", formats["krea"].get("krea_prompt"))) report = { "name": name, "target": target, "summary": _row_summary(metadata), "methods": { "krea": formats["krea"].get("method"), "sdxl": formats["sdxl"].get("method"), "caption": formats["caption"].get("method"), }, "issues": issues, } if include_prompts: report["prompts"] = { "raw": metadata.get("prompt", ""), "krea": formats["krea"].get("krea_prompt", ""), "sdxl": formats["sdxl"].get("sdxl_prompt", ""), "caption": formats["caption"].get("natural_caption", ""), } return report def _pair_reports(name: str, pair: dict[str, Any], *, include_prompts: bool) -> list[dict[str, Any]]: soft_row = dict(pair.get("softcore_row") or {}) hard_row = dict(pair.get("hardcore_row") or {}) soft_formats = _format_metadata(pair, "softcore") hard_formats = _format_metadata(pair, "hardcore") soft_issues = _formatter_issues(f"{name}.softcore", soft_formats, row=soft_row) soft_issues.extend(_route_metadata_issues(f"{name}.softcore", soft_row)) soft_issues.extend(_softcore_issues(f"{name}.softcore.krea_prompt", soft_formats["krea"].get("krea_prompt"))) hard_is_pov = bool(hard_row.get("pov_character_labels")) hard_issues = _formatter_issues(f"{name}.hardcore", hard_formats, row=hard_row, is_pov=hard_is_pov) hard_issues.extend(_route_metadata_issues(f"{name}.hardcore", hard_row)) reports = [ { "name": f"{name}.softcore", "target": "softcore", "summary": _row_summary(soft_row), "methods": { "krea": soft_formats["krea"].get("method"), "sdxl": soft_formats["sdxl"].get("method"), "caption": soft_formats["caption"].get("method"), }, "issues": soft_issues, }, { "name": f"{name}.hardcore", "target": "hardcore", "summary": _row_summary(hard_row), "methods": { "krea": hard_formats["krea"].get("method"), "sdxl": hard_formats["sdxl"].get("method"), "caption": hard_formats["caption"].get("method"), }, "issues": hard_issues, }, ] if include_prompts: reports[0]["prompts"] = { "raw": pair.get("softcore_prompt", ""), "krea": soft_formats["krea"].get("krea_softcore_prompt", "") or soft_formats["krea"].get("krea_prompt", ""), "sdxl": soft_formats["sdxl"].get("sdxl_softcore_prompt", "") or soft_formats["sdxl"].get("sdxl_prompt", ""), "caption": soft_formats["caption"].get("natural_caption", ""), } reports[1]["prompts"] = { "raw": pair.get("hardcore_prompt", ""), "krea": hard_formats["krea"].get("krea_hardcore_prompt", "") or hard_formats["krea"].get("krea_prompt", ""), "sdxl": hard_formats["sdxl"].get("sdxl_hardcore_prompt", "") or hard_formats["sdxl"].get("sdxl_prompt", ""), "caption": hard_formats["caption"].get("natural_caption", ""), } return reports def _regular_single_case(seed: int) -> dict[str, Any]: return pb.build_prompt_from_configs( row_number=1, start_index=1, seed=seed, category_config=pb.build_category_config_json("Casual clothes", "Casual clothes / Streetwear"), cast_config=pb.build_cast_config_json("solo_woman", 1, 0), seed_config=pb.build_seed_lock_config_json(base_seed=seed), camera_config=_orbit_camera(horizontal_angle=45, vertical_angle=0, zoom=5.5), character_cast=_character_cast(), location_config=_coworking_location_config(), extra_positive="simulation marker", ) def _hardcore_single_case(seed: int, subcategory: str, focus: str, family: str) -> dict[str, Any]: return pb.build_prompt( category="Hardcore sexual poses", subcategory=subcategory, row_number=1, start_index=1, seed=seed, clothing="random", ethnicity="any", poses="random", backside_bias=0.0, figure="random", no_plus_women=False, no_black=False, minimal_clothing_ratio=-1, standard_pose_ratio=-1, trigger=TRIGGER, prepend_trigger_to_prompt=True, extra_positive="", extra_negative="", seed_config=pb.build_seed_lock_config_json(base_seed=seed), women_count=1, men_count=1, character_cast=_character_cast(), hardcore_position_config=_position_filter(focus, family, []), location_config=_coworking_location_config(), camera_config=_orbit_camera(horizontal_angle=35, vertical_angle=0, zoom=6.5), ) def _insta_pair_case(seed: int, *, pov: bool, position: str, focus: str, family: str) -> dict[str, Any]: return pb.build_insta_of_pair( row_number=1, start_index=1, seed=seed, ethnicity="any", figure="random", no_plus_women=False, no_black=False, trigger=TRIGGER, prepend_trigger_to_prompt=True, seed_config=pb.build_seed_lock_config_json(base_seed=seed), options_json=_insta_options(), character_cast=_character_cast(pov_man=pov), hardcore_position_config=_position_filter(focus, family, [position]), location_config=_coworking_location_config(), camera_config=_orbit_camera(horizontal_angle=45, vertical_angle=0, zoom=6.0), softcore_camera_config=_orbit_camera(horizontal_angle=45, vertical_angle=0, zoom=5.5), hardcore_camera_config=_orbit_camera(horizontal_angle=68 if pov else 135, vertical_angle=20, zoom=7.5), ) def _seed_axis_check(seed: int) -> dict[str, Any]: base = pb.build_prompt( category="Hardcore sexual poses", subcategory="Penetrative sex", row_number=1, start_index=1, seed=seed, clothing="random", ethnicity="any", poses="random", backside_bias=0.0, figure="random", no_plus_women=False, no_black=False, minimal_clothing_ratio=-1, standard_pose_ratio=-1, trigger=TRIGGER, prepend_trigger_to_prompt=True, extra_positive="", extra_negative="", seed_config=pb.build_seed_lock_config_json(base_seed=seed), women_count=1, men_count=1, character_cast=_character_cast(), hardcore_position_config=_position_filter("penetration_only", "penetration", ["missionary", "doggy", "cowgirl"]), location_config=_coworking_location_config(), ) changed = False mismatches: list[str] = [] for reroll_seed in range(seed + 1, seed + 10): rerolled = pb.build_prompt( category="Hardcore sexual poses", subcategory="Penetrative sex", row_number=1, start_index=1, seed=seed, clothing="random", ethnicity="any", poses="random", backside_bias=0.0, figure="random", no_plus_women=False, no_black=False, minimal_clothing_ratio=-1, standard_pose_ratio=-1, trigger=TRIGGER, prepend_trigger_to_prompt=True, extra_positive="", extra_negative="", seed_config=pb.build_seed_lock_config_json(base_seed=seed, reroll_axis="pose", reroll_seed=reroll_seed), women_count=1, men_count=1, character_cast=_character_cast(), hardcore_position_config=_position_filter("penetration_only", "penetration", ["missionary", "doggy", "cowgirl"]), location_config=_coworking_location_config(), ) if rerolled.get("cast_descriptor_text") != base.get("cast_descriptor_text"): mismatches.append(f"cast changed on pose reroll {reroll_seed}") if rerolled.get("scene_text") != base.get("scene_text"): mismatches.append(f"scene changed on pose reroll {reroll_seed}") if ( rerolled.get("position_key") != base.get("position_key") or rerolled.get("source_role_graph") != base.get("source_role_graph") or rerolled.get("item") != base.get("item") ): changed = True break issues = list(mismatches) if not changed: issues.append("pose reroll did not change pose/action metadata within 9 attempts") return { "name": "seed_axis.pose_reroll", "base": _row_summary(base), "changed": changed, "issues": issues, } def run_simulation(seed: int = 3901, *, include_prompts: bool = False) -> dict[str, Any]: cases: list[dict[str, Any]] = [] regular = _regular_single_case(seed) cases.append(_case_report("regular.single.casual", regular, target="single", include_prompts=include_prompts)) for offset, route_case in enumerate(HARDCORE_ROUTE_CASES, start=10): row = _hardcore_single_case( seed + offset, str(route_case["subcategory"]), str(route_case["focus"]), str(route_case["family"]), ) cases.append( _case_report( str(route_case["name"]), row, target="single", include_prompts=include_prompts, expected_route=route_case.get("expected_route"), expected_terms=route_case.get("expected_terms"), ) ) penetration_pair = _insta_pair_case(seed + 1, pov=False, position="doggy", focus="penetration_only", family="penetration") cases.extend(_pair_reports("insta_pair.penetration", penetration_pair, include_prompts=include_prompts)) pov_pair = _insta_pair_case(seed + 2, pov=True, position="penis_licking", focus="outercourse_only", family="outercourse") cases.extend(_pair_reports("insta_pair.pov_outercourse", pov_pair, include_prompts=include_prompts)) axis_checks = [_seed_axis_check(seed + 3)] issues = [ {"case": case["name"], "issue": issue} for case in cases for issue in case.get("issues", []) ] issues.extend( {"case": check["name"], "issue": issue} for check in axis_checks for issue in check.get("issues", []) ) return { "summary": { "seed": seed, "cases": len(cases), "axis_checks": len(axis_checks), "issues": len(issues), }, "issues": issues, "cases": cases, "axis_checks": axis_checks, } def _print_text_report(report: dict[str, Any]) -> None: summary = report.get("summary") or {} print( f"Prompt route simulation: seed={summary.get('seed')} " f"cases={summary.get('cases')} axis_checks={summary.get('axis_checks')} issues={summary.get('issues')}" ) for case in report.get("cases") or []: summary_text = case.get("summary") or {} route = ", ".join(f"{key}={value}" for key, value in summary_text.items() if value not in (None, "", [])) print(f"- {case.get('name')} [{case.get('target')}]: {route}") for issue in case.get("issues") or []: print(f" ISSUE {issue}") for check in report.get("axis_checks") or []: print(f"- {check.get('name')}: changed={check.get('changed')}") for issue in check.get("issues") or []: print(f" ISSUE {issue}") def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--seed", type=int, default=3901, help="Base seed for deterministic simulations.") parser.add_argument("--json", action="store_true", help="Print the full JSON report.") parser.add_argument("--include-prompts", action="store_true", help="Include raw and formatted prompt text in the report.") parser.add_argument("--fail-on-issues", action="store_true", help="Exit with code 1 when any issue is reported.") args = parser.parse_args(argv) report = run_simulation(seed=args.seed, include_prompts=args.include_prompts) if args.json: print(json.dumps(report, ensure_ascii=True, indent=2, sort_keys=True)) else: _print_text_report(report) return 1 if args.fail_on_issues and report.get("issues") else 0 if __name__ == "__main__": raise SystemExit(main())