ComfyUI-Ethanfel-Prompt-Bui…/scene_camera_adapters.py

from __future__ import annotations

from typing import Any, Mapping


CAMERA_DIRECTIONS = (
    "front-right quarter view",
    "right side view",
    "back-right quarter view",
    "back view",
    "back-left quarter view",
    "left side view",
    "front-left quarter view",
    "front view",
)

CAMERA_ELEVATIONS = ("low-angle shot", "eye-level shot", "elevated shot", "high-angle shot")
CAMERA_DISTANCES = (
    "wide shot",
    "full-body shot",
    "three-quarter body shot",
    "medium shot",
    "close-up",
    "extreme close-up",
)


SCENE_CAMERA_PROFILES: tuple[dict[str, Any], ...] = (
    {
        "key": "business_cafe",
        "family": "coworking",
        "terms": ("business cafe", "work cafe", "cafe"),
        "layout_label": "Business cafe camera layout",
        "place": "business cafe coworking counter",
        "foreground": "counter edge, laptop corner, and small plant",
        "midground": "bar stools, warm desk lamps, and coffee-counter work spots",
        "background": "plants, mirror strip, menu wall, and repeated cafe work tables",
        "detail_label": "cafe details",
        "composition": {
            "woman": "business-cafe selfie frame with the woman near a counter edge and warm work-table depth behind her",
            "man": "business-cafe portrait frame with the man near a counter edge and warm work-table depth behind him",
            "default": "business-cafe frame with the subjects near a counter edge and warm work-table depth behind them",
        },
    },
    {
        "key": "office_after_hours",
        "family": "coworking",
        "terms": ("corporate office", "office after hours", "copier", "office lounge"),
        "layout_label": "Office camera layout",
        "place": "empty after-hours office",
        "foreground": "copier alcove edge, chair backs, and nearest desk corner",
        "midground": "repeating desks, glass partition seams, and muted monitor glow",
        "background": "rows of empty workstations, city-light windows, and quiet office depth",
        "detail_label": "office details",
        "composition": {
            "woman": "after-hours office frame with the woman near a desk edge and glass-partition depth behind her",
            "man": "after-hours office frame with the man near a desk edge and glass-partition depth behind him",
            "default": "after-hours office frame with the subjects near a desk edge and glass-partition depth behind them",
        },
    },
    {
        "key": "coworking_lounge",
        "family": "coworking",
        "terms": (
            "coworking",
            "cowork",
            "shared office",
            "laptops",
            "warm desks",
            "repeating desks",
            "glass partitions",
        ),
        "layout_label": "Coworking camera layout",
        "place": "coworking lounge",
        "foreground": "near desk edge, laptop corner, and chair back",
        "midground": "warm work desks, laptop tables, and glass partition seams",
        "background": "tall windows, repeated desk rows, plants, and soft shared-office depth",
        "detail_label": "coworking details",
        "composition": {
            "woman": "coworking lounge selfie frame with the woman near a desk edge and tall-window depth behind her",
            "man": "coworking lounge portrait frame with the man near a desk edge and tall-window depth behind him",
            "default": "coworking lounge frame with the subjects near a desk edge and tall-window depth behind them",
        },
    },
    {
        "key": "classical_library",
        "family": "library",
        "terms": (
            "classical library",
            "library stacks",
            "large library",
            "grand library",
            "reading room",
            "bookshelves",
            "book shelves",
            "book stacks",
            "rare-books",
            "rare books",
            "rolling ladders",
        ),
        "layout_label": "Library camera layout",
        "place": "classical library",
        "foreground": "near bookshelf edge, reading-table corner, and brass lamp",
        "midground": "towering bookshelves, rolling ladders, carved columns, and marble floor lines",
        "background": "arched windows, repeated book aisles, warm brass lamps, and deep quiet library depth",
        "detail_label": "library details",
        "composition": {
            "woman": "classical library frame with the woman near a bookshelf edge and long shelf depth behind her",
            "man": "classical library frame with the man near a bookshelf edge and long shelf depth behind him",
            "default": "classical library frame with the subjects near a bookshelf edge and long shelf depth behind them",
        },
    },
)

SCENE_CAMERA_PROFILE_KEYS = {str(profile["key"]): dict(profile) for profile in SCENE_CAMERA_PROFILES}

THEME_PROFILE_KEYS = {
    "classical_library": "classical_library",
}

PROFILE_TEXT_FIELDS = (
    "key",
    "family",
    "layout_label",
    "place",
    "foreground",
    "midground",
    "background",
    "detail_label",
)

MISMATCHED_COMPOSITION_TERMS = (
    "outfit-check",
    "outfit check",
    "mirror view",
    "mirror pose",
    "bag",
    "shoes",
    "footwear",
)


def _clean_text(value: Any) -> str:
    return " ".join(str(value or "").strip().split())


def _profile_by_key(value: Any) -> dict[str, Any]:
    key = str(value or "").strip()
    if not key:
        return {}
    if key in SCENE_CAMERA_PROFILE_KEYS:
        return dict(SCENE_CAMERA_PROFILE_KEYS[key])
    mapped_key = THEME_PROFILE_KEYS.get(key)
    if mapped_key and mapped_key in SCENE_CAMERA_PROFILE_KEYS:
        return dict(SCENE_CAMERA_PROFILE_KEYS[mapped_key])
    return {}


def _profile_title(value: str) -> str:
    text = _clean_text(value).replace("_", " ").replace("-", " ")
    if not text:
        return "Scene"
    return " ".join(part[:1].upper() + part[1:] for part in text.split())


def _default_composition(profile: dict[str, Any]) -> dict[str, str]:
    place = _clean_text(profile.get("place")) or "scene"
    foreground = _clean_text(profile.get("foreground")) or "foreground anchor"
    background = _clean_text(profile.get("background")) or "environment depth"
    return {
        "woman": f"{place} frame with the woman near {foreground} and {background} behind her",
        "man": f"{place} frame with the man near {foreground} and {background} behind him",
        "default": f"{place} frame with the subjects near {foreground} and {background} behind them",
    }


def normalize_scene_camera_profile(value: Any) -> dict[str, Any]:
    if not isinstance(value, dict):
        return {}
    base = _profile_by_key(value.get("base_profile_key") or value.get("extends"))
    merged = dict(base)
    for key, raw_value in value.items():
        if key in ("base_profile_key", "extends"):
            continue
        merged[key] = raw_value
    has_profile_fields = any(_clean_text(merged.get(key)) for key in ("layout_label", "place", "foreground", "midground", "background"))
    if not has_profile_fields:
        return {}
    key = _clean_text(merged.get("key") or merged.get("slug") or merged.get("name") or base.get("key") or "custom_scene")
    place = _clean_text(merged.get("place") or merged.get("name") or key.replace("_", " "))
    profile = {field: _clean_text(merged.get(field)) for field in PROFILE_TEXT_FIELDS}
    profile["key"] = key
    profile["family"] = profile["family"] or "custom"
    profile["place"] = place
    profile["layout_label"] = profile["layout_label"] or f"{_profile_title(place)} camera layout"
    profile["foreground"] = profile["foreground"] or base.get("foreground", "foreground anchor")
    profile["midground"] = profile["midground"] or base.get("midground", "midground environment anchors")
    profile["background"] = profile["background"] or base.get("background", "background depth")
    profile["detail_label"] = profile["detail_label"] or f"{place} details"
    composition = merged.get("composition")
    if isinstance(composition, dict):
        profile["composition"] = {
            str(key): _clean_text(text)
            for key, text in composition.items()
            if _clean_text(text)
        }
    else:
        base_composition = base.get("composition") if isinstance(base.get("composition"), dict) else {}
        profile["composition"] = dict(base_composition) if base_composition else _default_composition(profile)
    if not profile["composition"]:
        profile["composition"] = _default_composition(profile)
    return profile


def _scene_entry_text(scene_entry: Any) -> str:
    if not isinstance(scene_entry, dict):
        return ""
    return str(
        scene_entry.get("prompt")
        or scene_entry.get("description")
        or scene_entry.get("text")
        or scene_entry.get("name")
        or ""
    ).strip()


def _scene_entry_profile_key(scene_entry: Any) -> str:
    if not isinstance(scene_entry, dict):
        return ""
    return str(
        scene_entry.get("scene_camera_profile_key")
        or scene_entry.get("camera_profile_key")
        or scene_entry.get("camera_profile")
        or scene_entry.get("profile")
        or ""
    ).strip()


def _scene_entry_profile(scene_entry: Any) -> dict[str, Any]:
    if not isinstance(scene_entry, dict):
        return {}
    for key in ("scene_camera_profile", "camera_profile"):
        profile = normalize_scene_camera_profile(scene_entry.get(key))
        if profile:
            return profile
    profile = normalize_scene_camera_profile(scene_entry.get("profile"))
    if profile:
        return profile
    return normalize_scene_camera_profile(scene_entry)


def scene_camera_profile(
    scene_text: Any = "",
    *,
    scene_entry: Any = None,
    theme: Any = "",
    profile_key: Any = "",
) -> dict[str, Any]:
    inline_explicit_profile = normalize_scene_camera_profile(profile_key)
    if inline_explicit_profile:
        return inline_explicit_profile
    explicit_profile = _profile_by_key(profile_key)
    if explicit_profile:
        return explicit_profile
    inline_entry_profile = _scene_entry_profile(scene_entry)
    if inline_entry_profile:
        return inline_entry_profile
    entry_profile = _profile_by_key(_scene_entry_profile_key(scene_entry))
    if entry_profile:
        return entry_profile
    theme_profile = _profile_by_key(theme)
    if theme_profile:
        return theme_profile
    if isinstance(scene_entry, dict):
        entry_theme_profile = _profile_by_key(scene_entry.get("theme"))
        if entry_theme_profile:
            return entry_theme_profile
    text = " ".join(part for part in (str(scene_text or ""), _scene_entry_text(scene_entry)) if part).lower()
    if not text:
        return {}
    for profile in SCENE_CAMERA_PROFILES:
        if any(term in text for term in profile["terms"]):
            return dict(profile)
    return {}


def is_coworking_scene(scene_text: Any) -> bool:
    return scene_camera_profile(scene_text).get("family") == "coworking"


def is_scene_camera_aware(scene_text: Any) -> bool:
    return bool(scene_camera_profile(scene_text))


def _compact_label(value: Any, compact_labels: Mapping[str, str] | None = None) -> str:
    text = str(value or "")
    if compact_labels and text in compact_labels:
        return compact_labels[text]
    return text.replace("_", " ")


def camera_geometry_phrase(parsed: dict[str, Any], compact_labels: Mapping[str, str] | None = None) -> str:
    direction = str(parsed.get("orbit_direction") or "").strip()
    elevation = str(parsed.get("orbit_elevation_label") or "").strip()
    distance = str(parsed.get("orbit_distance_label") or "").strip()
    custom = str(parsed.get("custom_camera_prompt") or "").strip()
    if not any((direction, elevation, distance)) and custom:
        return custom
    parts = [part for part in (direction, elevation, distance) if part and part != "auto"]
    if parts:
        return ", ".join(parts)
    compact_parts = [
        _compact_label(parsed.get(key), compact_labels)
        for key in ("shot_size", "angle", "distance")
    ]
    compact_parts = [part for part in compact_parts if part and part != "auto"]
    return ", ".join(compact_parts)


def camera_direction_from_text(text: Any) -> str:
    source = str(text or "").lower()
    for label in CAMERA_DIRECTIONS:
        if label in source:
            return label
    return ""


def camera_elevation_from_text(text: Any) -> str:
    source = str(text or "").lower()
    for label in CAMERA_ELEVATIONS:
        if label in source:
            return label
    return ""


def camera_distance_from_text(text: Any) -> str:
    source = str(text or "").lower()
    for label in CAMERA_DISTANCES:
        if label in source:
            return label
    return ""


def coworking_location_profile(scene_text: Any) -> dict[str, str]:
    profile = scene_camera_profile(scene_text)
    if profile.get("family") == "coworking":
        return profile
    return scene_camera_profile("coworking lounge")


def coworking_subject_terms(subject_kind: str, pov_labels: list[str] | None = None) -> tuple[str, str]:
    if pov_labels:
        return "the visible partner", "them"
    if subject_kind == "woman":
        return "the woman", "her"
    if subject_kind == "man":
        return "the man", "him"
    if subject_kind == "couple":
        return "the couple", "them"
    return "the subjects", "them"


def scene_direction_detail(
    direction: str,
    profile: dict[str, str],
    pov_labels: list[str] | None = None,
    subject_kind: str = "subjects",
) -> str:
    direction = str(direction or "").strip().lower()
    foreground = profile["foreground"]
    midground = profile["midground"]
    background = profile["background"]
    detail_label = profile.get("detail_label") or "location details"
    subject, pronoun = coworking_subject_terms(subject_kind, pov_labels)
    if pov_labels:
        if "right side" in direction:
            return f"{subject} is in right-side profile; {midground} run behind {pronoun} toward {background}, with {detail_label} kept at the frame edges"
        if "left side" in direction:
            return f"{subject} is in left-side profile; {midground} run behind {pronoun} toward {background}, with {detail_label} kept at the frame edges"
        if "back-right" in direction or "back-left" in direction:
            return f"{subject} stays close in one continuous diagonal first-person body angle; {midground} lead toward {background} behind {pronoun} at the edges, not in the lower foreground"
        if direction == "back view":
            return f"the viewer looks past {subject}'s back toward {midground}, then into {background}; only POV body cues sit low in frame"
        if "front-right" in direction or "front-left" in direction:
            return f"{subject} fills the first-person front-quarter view; {midground} recede diagonally behind {pronoun} toward {background}"
        return f"{subject} faces the viewer in first-person view; {midground} and {background} stay behind {pronoun}, not between viewer and body"
    if "right side" in direction or "left side" in direction:
        return f"{subject} is held in side profile along the {foreground}; {midground} run laterally behind {pronoun}, with {background} still readable"
    if "back-right" in direction or "back-left" in direction:
        return f"{subject} is viewed from a rear-quarter angle, partly turning back toward camera; the {foreground} stays low in frame while {midground} lead into {background}"
    if direction == "back view":
        return f"{subject} is seen from behind with the {foreground} at camera side, facing into {midground} and {background}"
    if "front-right" in direction or "front-left" in direction:
        return f"{subject} is placed beside the {foreground}; {midground} recede diagonally behind {pronoun} toward {background}"
    return f"{subject} faces camera beside the {foreground}; {midground} sit between {pronoun} and {background}"


def coworking_direction_detail(
    direction: str,
    profile: dict[str, str],
    pov_labels: list[str] | None = None,
    subject_kind: str = "subjects",
) -> str:
    return scene_direction_detail(direction, profile, pov_labels, subject_kind)


def scene_distance_detail(
    distance: str,
    profile: dict[str, str],
    subject_kind: str,
    pov_labels: list[str] | None = None,
) -> str:
    distance = str(distance or "").strip().lower()
    subject, _pronoun = coworking_subject_terms(subject_kind, pov_labels)
    if pov_labels:
        if "wide" in distance or "full-body" in distance or "full body" in distance:
            return f"wide POV keeps {subject} readable with {profile['place']} context behind them"
        if "close" in distance:
            return f"close POV keeps {subject} dominant with {profile['place']} context only at the sides or background"
        return f"medium POV keeps {subject} dominant with room context behind them"
    if "wide" in distance or "full-body" in distance or "full body" in distance:
        return f"wide crop keeps the {profile['foreground']}, {profile['midground']}, and {profile['background']} readable"
    if "close" in distance:
        return f"close crop keeps one anchor from the {profile['foreground']} visible"
    return f"medium crop keeps {subject} dominant"


def coworking_distance_detail(
    distance: str,
    profile: dict[str, str],
    subject_kind: str,
    pov_labels: list[str] | None = None,
) -> str:
    return scene_distance_detail(distance, profile, subject_kind, pov_labels)


def scene_elevation_detail(
    elevation: str,
    profile: dict[str, str],
    subject_kind: str,
    pov_labels: list[str] | None = None,
) -> str:
    elevation = str(elevation or "").strip().lower()
    subject, pronoun = coworking_subject_terms(subject_kind, pov_labels)
    if pov_labels:
        if "low-angle" in elevation:
            return f"low angle keeps POV body cues low while the {profile['background']} rises behind {pronoun}"
        if "elevated" in elevation:
            return f"elevated POV keeps the viewer's eye line slightly higher than {subject}, with {profile['foreground']} only behind or at the side edges"
        if "high-angle" in elevation:
            return f"high angle looks down from the viewer's position with {profile['midground']} only in the background"
        return f"eye-level angle keeps {profile['midground']} behind {pronoun}"
    if "low-angle" in elevation:
        return f"low angle keeps the {profile['foreground']} low while {profile['background']} rises behind {pronoun}"
    if "elevated" in elevation:
        return f"elevated angle shows the {profile['foreground']} and {profile['midground']} around {pronoun}"
    if "high-angle" in elevation:
        return f"high angle shows the {profile['place']} layout and placement of {pronoun}"
    return f"eye-level angle keeps {profile['midground']} visually stable"


def coworking_elevation_detail(
    elevation: str,
    profile: dict[str, str],
    subject_kind: str,
    pov_labels: list[str] | None = None,
) -> str:
    return scene_elevation_detail(elevation, profile, subject_kind, pov_labels)


def scene_camera_directive(
    scene_text: Any,
    parsed: dict[str, Any],
    pov_labels: list[str] | None = None,
    subject_kind: str = "subjects",
    compact_labels: Mapping[str, str] | None = None,
    *,
    scene_entry: Any = None,
    theme: Any = "",
    profile_key: Any = "",
) -> str:
    profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key)
    if not profile:
        return ""
    direction = str(parsed.get("orbit_direction") or "").strip()
    elevation = str(parsed.get("orbit_elevation_label") or "").strip()
    distance = str(parsed.get("orbit_distance_label") or "").strip()
    custom_prompt = str(parsed.get("custom_camera_prompt") or "").strip()
    direction = direction or camera_direction_from_text(custom_prompt)
    elevation = elevation or camera_elevation_from_text(custom_prompt)
    distance = distance or camera_distance_from_text(custom_prompt)
    if not any((direction, elevation, distance, custom_prompt)):
        return ""
    direction_detail = scene_direction_detail(direction, profile, pov_labels, subject_kind)
    distance_detail = scene_distance_detail(distance, profile, subject_kind, pov_labels)
    elevation_detail = scene_elevation_detail(elevation, profile, subject_kind, pov_labels)
    if pov_labels:
        return (
            f"{profile['layout_label']} from POV: {direction_detail}. "
            f"{distance_detail}; {elevation_detail}; use the multiangle camera only as first-person spatial geometry."
        )
    geometry = camera_geometry_phrase(parsed, compact_labels)
    geometry_clause = f" ({geometry})" if geometry else ""
    return (
        f"{profile['layout_label']}{geometry_clause}: {direction_detail}; "
        f"{distance_detail}; {elevation_detail}."
    )


def coworking_camera_scene_directive(
    scene_text: Any,
    parsed: dict[str, Any],
    pov_labels: list[str] | None = None,
    subject_kind: str = "subjects",
    compact_labels: Mapping[str, str] | None = None,
) -> str:
    if not is_coworking_scene(scene_text):
        return ""
    return scene_camera_directive(scene_text, parsed, pov_labels, subject_kind, compact_labels)


def profile_composition_text(profile: dict[str, Any], subject_kind: str) -> str:
    composition = profile.get("composition") if isinstance(profile.get("composition"), dict) else {}
    if subject_kind == "woman" and composition.get("woman"):
        return str(composition["woman"])
    if subject_kind == "man" and composition.get("man"):
        return str(composition["man"])
    text = str(composition.get("default") or f"{profile['place']} frame with the subjects clearly placed in the room")
    if subject_kind == "couple":
        text = text.replace("the subjects", "the couple")
    if "composition" not in text.lower():
        text = f"{text} composition"
    return text


def contextual_composition_prompt(
    scene_text: Any,
    composition: Any,
    subject_kind: str = "subjects",
    *,
    scene_entry: Any = None,
    theme: Any = "",
    profile_key: Any = "",
) -> str:
    text = str(composition or "").strip()
    if not text:
        return text
    profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key)
    if not profile:
        return text
    lower = text.lower()
    profile_lower = " ".join(
        str(profile.get(key, "")).lower()
        for key in ("place", "foreground", "midground", "background")
    )
    already_matches = any(term and term in lower for term in profile_lower.replace(",", " ").split())
    mismatched = any(term in lower for term in MISMATCHED_COMPOSITION_TERMS)
    office_generic = any(term in lower for term in ("office-lobby", "office lobby", "walking composition", "outfit-check"))
    if not mismatched and not office_generic and already_matches:
        return text
    if not mismatched and not office_generic and profile.get("family") != "coworking":
        return text
    return profile_composition_text(profile, subject_kind)


def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
    return contextual_composition_prompt(scene_text, composition, subject_kind)


def camera_scene_directive_for_context(
    scene_text: Any,
    parsed_camera_config: dict[str, Any],
    pov_labels: list[str] | None = None,
    subject_kind: str = "subjects",
    compact_labels: Mapping[str, str] | None = None,
    *,
    scene_entry: Any = None,
    theme: Any = "",
    profile_key: Any = "",
) -> str:
    if (
        parsed_camera_config.get("camera_detail") == "off"
        or parsed_camera_config.get("camera_mode") == "disabled"
    ):
        return ""
    return scene_camera_directive(
        scene_text,
        parsed_camera_config,
        pov_labels,
        subject_kind,
        compact_labels,
        scene_entry=scene_entry,
        theme=theme,
        profile_key=profile_key,
    )