Extract scene camera adapters

2026-06-26 15:10:05 +02:00
parent 97c49fffed
commit b82cf3fbbf
4 changed files with 307 additions and 236 deletions
@@ -0,0 +1,286 @@
+from __future__ import annotations
+
+from typing import Any, Mapping
+
+
+CAMERA_DIRECTIONS = (
+    "front-right quarter view",
+    "right side view",
+    "back-right quarter view",
+    "back view",
+    "back-left quarter view",
+    "left side view",
+    "front-left quarter view",
+    "front view",
+)
+
+CAMERA_ELEVATIONS = ("low-angle shot", "eye-level shot", "elevated shot", "high-angle shot")
+CAMERA_DISTANCES = (
+    "wide shot",
+    "full-body shot",
+    "three-quarter body shot",
+    "medium shot",
+    "close-up",
+    "extreme close-up",
+)
+
+
+def is_coworking_scene(scene_text: Any) -> bool:
+    text = str(scene_text or "").lower()
+    return any(
+        term in text
+        for term in (
+            "coworking",
+            "cowork",
+            "office lounge",
+            "business cafe",
+            "work cafe",
+            "shared office",
+            "corporate office",
+            "office after hours",
+            "laptops",
+            "warm desks",
+            "repeating desks",
+            "glass partitions",
+            "copier alcove",
+        )
+    )
+
+
+def _compact_label(value: Any, compact_labels: Mapping[str, str] | None = None) -> str:
+    text = str(value or "")
+    if compact_labels and text in compact_labels:
+        return compact_labels[text]
+    return text.replace("_", " ")
+
+
+def camera_geometry_phrase(parsed: dict[str, Any], compact_labels: Mapping[str, str] | None = None) -> str:
+    direction = str(parsed.get("orbit_direction") or "").strip()
+    elevation = str(parsed.get("orbit_elevation_label") or "").strip()
+    distance = str(parsed.get("orbit_distance_label") or "").strip()
+    custom = str(parsed.get("custom_camera_prompt") or "").strip()
+    if not any((direction, elevation, distance)) and custom:
+        return custom
+    parts = [part for part in (direction, elevation, distance) if part and part != "auto"]
+    if parts:
+        return ", ".join(parts)
+    compact_parts = [
+        _compact_label(parsed.get(key), compact_labels)
+        for key in ("shot_size", "angle", "distance")
+    ]
+    compact_parts = [part for part in compact_parts if part and part != "auto"]
+    return ", ".join(compact_parts)
+
+
+def camera_direction_from_text(text: Any) -> str:
+    source = str(text or "").lower()
+    for label in CAMERA_DIRECTIONS:
+        if label in source:
+            return label
+    return ""
+
+
+def camera_elevation_from_text(text: Any) -> str:
+    source = str(text or "").lower()
+    for label in CAMERA_ELEVATIONS:
+        if label in source:
+            return label
+    return ""
+
+
+def camera_distance_from_text(text: Any) -> str:
+    source = str(text or "").lower()
+    for label in CAMERA_DISTANCES:
+        if label in source:
+            return label
+    return ""
+
+
+def coworking_location_profile(scene_text: Any) -> dict[str, str]:
+    text = str(scene_text or "").lower()
+    if "business cafe" in text or "work cafe" in text or "cafe" in text:
+        return {
+            "layout_label": "Business cafe camera layout",
+            "place": "business cafe coworking counter",
+            "foreground": "counter edge, laptop corner, and small plant",
+            "midground": "bar stools, warm desk lamps, and coffee-counter work spots",
+            "background": "plants, mirror strip, menu wall, and repeated cafe work tables",
+        }
+    if "corporate office" in text or "office after hours" in text or "copier" in text:
+        return {
+            "layout_label": "Office camera layout",
+            "place": "empty after-hours office",
+            "foreground": "copier alcove edge, chair backs, and nearest desk corner",
+            "midground": "repeating desks, glass partition seams, and muted monitor glow",
+            "background": "rows of empty workstations, city-light windows, and quiet office depth",
+        }
+    return {
+        "layout_label": "Coworking camera layout",
+        "place": "coworking lounge",
+        "foreground": "near desk edge, laptop corner, and chair back",
+        "midground": "warm work desks, laptop tables, and glass partition seams",
+        "background": "tall windows, repeated desk rows, plants, and soft shared-office depth",
+    }
+
+
+def coworking_subject_terms(subject_kind: str, pov_labels: list[str] | None = None) -> tuple[str, str]:
+    if pov_labels:
+        return "the visible partner", "them"
+    if subject_kind == "woman":
+        return "the woman", "her"
+    if subject_kind == "man":
+        return "the man", "him"
+    if subject_kind == "couple":
+        return "the couple", "them"
+    return "the subjects", "them"
+
+
+def coworking_direction_detail(
+    direction: str,
+    profile: dict[str, str],
+    pov_labels: list[str] | None = None,
+    subject_kind: str = "subjects",
+) -> str:
+    direction = str(direction or "").strip().lower()
+    foreground = profile["foreground"]
+    midground = profile["midground"]
+    background = profile["background"]
+    subject, pronoun = coworking_subject_terms(subject_kind, pov_labels)
+    if pov_labels:
+        if "right side" in direction:
+            return f"{subject} is in right-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges"
+        if "left side" in direction:
+            return f"{subject} is in left-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges"
+        if "back-right" in direction or "back-left" in direction:
+            return f"{subject} stays close in one continuous diagonal first-person body angle; {midground} lead toward {background} behind {pronoun} at the edges, not in the lower foreground"
+        if direction == "back view":
+            return f"the viewer looks past {subject}'s back toward {midground}, then into {background}; only POV body cues sit low in frame"
+        if "front-right" in direction or "front-left" in direction:
+            return f"{subject} fills the first-person front-quarter view; {midground} recede diagonally behind {pronoun} toward {background}"
+        return f"{subject} faces the viewer in first-person view; {midground} and {background} stay behind {pronoun}, not between viewer and body"
+    if "right side" in direction or "left side" in direction:
+        return f"{subject} is held in side profile along the {foreground}; {midground} run laterally behind {pronoun}, with {background} still readable"
+    if "back-right" in direction or "back-left" in direction:
+        return f"{subject} is viewed from a rear-quarter angle, partly turning back toward camera; the {foreground} stays low in frame while {midground} lead into {background}"
+    if direction == "back view":
+        return f"{subject} is seen from behind with the {foreground} at camera side, facing into {midground} and {background}"
+    if "front-right" in direction or "front-left" in direction:
+        return f"{subject} is placed beside the {foreground}; {midground} recede diagonally behind {pronoun} toward {background}"
+    return f"{subject} faces camera beside the {foreground}; {midground} sit between {pronoun} and {background}"
+
+
+def coworking_distance_detail(
+    distance: str,
+    profile: dict[str, str],
+    subject_kind: str,
+    pov_labels: list[str] | None = None,
+) -> str:
+    distance = str(distance or "").strip().lower()
+    subject, _pronoun = coworking_subject_terms(subject_kind, pov_labels)
+    if pov_labels:
+        if "wide" in distance or "full-body" in distance or "full body" in distance:
+            return f"wide POV keeps {subject} readable with coworking context behind them"
+        if "close" in distance:
+            return f"close POV keeps {subject} dominant with coworking context only at the sides or background"
+        return f"medium POV keeps {subject} dominant with room context behind them"
+    if "wide" in distance or "full-body" in distance or "full body" in distance:
+        return "wide crop keeps floor aisle, table rows, and window depth readable"
+    if "close" in distance:
+        return "close crop keeps one desk or counter anchor visible"
+    return f"medium crop keeps {subject} dominant"
+
+
+def coworking_elevation_detail(
+    elevation: str,
+    profile: dict[str, str],
+    subject_kind: str,
+    pov_labels: list[str] | None = None,
+) -> str:
+    elevation = str(elevation or "").strip().lower()
+    subject, pronoun = coworking_subject_terms(subject_kind, pov_labels)
+    if pov_labels:
+        if "low-angle" in elevation:
+            return f"low angle keeps POV body cues low while windows and partition lines rise behind {pronoun}"
+        if "elevated" in elevation:
+            return f"elevated POV keeps the viewer's eye line slightly higher than {subject}, with tabletop and glass lines only behind or at the side edges"
+        if "high-angle" in elevation:
+            return f"high angle looks down from the viewer's position with desks and aisle only in the background"
+        return f"eye-level angle keeps tabletop lines and glass seams behind {pronoun}"
+    if "low-angle" in elevation:
+        return f"low angle keeps the foreground desk edge low while windows and partitions rise behind {pronoun}"
+    if "elevated" in elevation:
+        return f"elevated angle shows tabletop surfaces, laptop shapes, chairs, and walking aisle around {pronoun}"
+    if "high-angle" in elevation:
+        return f"high angle shows the desk grid, chairs, floor aisle, and placement of {pronoun}"
+    return f"eye-level angle keeps tabletop lines and glass seams straight"
+
+
+def coworking_camera_scene_directive(
+    scene_text: Any,
+    parsed: dict[str, Any],
+    pov_labels: list[str] | None = None,
+    subject_kind: str = "subjects",
+    compact_labels: Mapping[str, str] | None = None,
+) -> str:
+    if not is_coworking_scene(scene_text):
+        return ""
+    direction = str(parsed.get("orbit_direction") or "").strip()
+    elevation = str(parsed.get("orbit_elevation_label") or "").strip()
+    distance = str(parsed.get("orbit_distance_label") or "").strip()
+    custom_prompt = str(parsed.get("custom_camera_prompt") or "").strip()
+    direction = direction or camera_direction_from_text(custom_prompt)
+    elevation = elevation or camera_elevation_from_text(custom_prompt)
+    distance = distance or camera_distance_from_text(custom_prompt)
+    if not any((direction, elevation, distance, custom_prompt)):
+        return ""
+    profile = coworking_location_profile(scene_text)
+    direction_detail = coworking_direction_detail(direction, profile, pov_labels, subject_kind)
+    distance_detail = coworking_distance_detail(distance, profile, subject_kind, pov_labels)
+    elevation_detail = coworking_elevation_detail(elevation, profile, subject_kind, pov_labels)
+    if pov_labels:
+        return (
+            f"{profile['layout_label']} from POV: {direction_detail}. "
+            f"{distance_detail}; {elevation_detail}; use the multiangle camera only as first-person spatial geometry."
+        )
+    geometry = camera_geometry_phrase(parsed, compact_labels)
+    geometry_clause = f" ({geometry})" if geometry else ""
+    return (
+        f"{profile['layout_label']}{geometry_clause}: {direction_detail}; "
+        f"{distance_detail}; {elevation_detail}."
+    )
+
+
+def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
+    text = str(composition or "").strip()
+    if not text or not is_coworking_scene(scene_text):
+        return text
+    lower = text.lower()
+    if not any(term in lower for term in ("office-lobby", "office lobby", "walking composition", "outfit-check")):
+        return text
+    subject, _pronoun = coworking_subject_terms(subject_kind)
+    if subject_kind == "woman":
+        return "coworking lounge selfie frame with the woman near a desk edge and tall-window depth behind her"
+    if subject_kind == "man":
+        return "coworking lounge portrait frame with the man near a desk edge and tall-window depth behind him"
+    return f"coworking lounge frame with {subject} near a desk edge and tall-window depth behind them"
+
+
+def camera_scene_directive_for_context(
+    scene_text: Any,
+    parsed_camera_config: dict[str, Any],
+    pov_labels: list[str] | None = None,
+    subject_kind: str = "subjects",
+    compact_labels: Mapping[str, str] | None = None,
+) -> str:
+    if (
+        parsed_camera_config.get("camera_detail") == "off"
+        or parsed_camera_config.get("camera_mode") == "disabled"
+    ):
+        return ""
+    return coworking_camera_scene_directive(
+        scene_text,
+        parsed_camera_config,
+        pov_labels,
+        subject_kind,
+        compact_labels,
+    )