Generalize scene camera profiles

2026-06-27 13:13:31 +02:00
parent 616d1132ff
commit 63e8489fb2
4 changed files with 252 additions and 83 deletions
@@ -25,26 +25,120 @@ CAMERA_DISTANCES = (
 )


-def is_coworking_scene(scene_text: Any) -> bool:
-    text = str(scene_text or "").lower()
-    return any(
-        term in text
-        for term in (
+SCENE_CAMERA_PROFILES: tuple[dict[str, Any], ...] = (
+    {
+        "key": "business_cafe",
+        "family": "coworking",
+        "terms": ("business cafe", "work cafe", "cafe"),
+        "layout_label": "Business cafe camera layout",
+        "place": "business cafe coworking counter",
+        "foreground": "counter edge, laptop corner, and small plant",
+        "midground": "bar stools, warm desk lamps, and coffee-counter work spots",
+        "background": "plants, mirror strip, menu wall, and repeated cafe work tables",
+        "detail_label": "cafe details",
+        "composition": {
+            "woman": "business-cafe selfie frame with the woman near a counter edge and warm work-table depth behind her",
+            "man": "business-cafe portrait frame with the man near a counter edge and warm work-table depth behind him",
+            "default": "business-cafe frame with the subjects near a counter edge and warm work-table depth behind them",
+        },
+    },
+    {
+        "key": "office_after_hours",
+        "family": "coworking",
+        "terms": ("corporate office", "office after hours", "copier", "office lounge"),
+        "layout_label": "Office camera layout",
+        "place": "empty after-hours office",
+        "foreground": "copier alcove edge, chair backs, and nearest desk corner",
+        "midground": "repeating desks, glass partition seams, and muted monitor glow",
+        "background": "rows of empty workstations, city-light windows, and quiet office depth",
+        "detail_label": "office details",
+        "composition": {
+            "woman": "after-hours office frame with the woman near a desk edge and glass-partition depth behind her",
+            "man": "after-hours office frame with the man near a desk edge and glass-partition depth behind him",
+            "default": "after-hours office frame with the subjects near a desk edge and glass-partition depth behind them",
+        },
+    },
+    {
+        "key": "coworking_lounge",
+        "family": "coworking",
+        "terms": (
            "coworking",
            "cowork",
-            "office lounge",
-            "business cafe",
-            "work cafe",
            "shared office",
-            "corporate office",
-            "office after hours",
            "laptops",
            "warm desks",
            "repeating desks",
            "glass partitions",
-            "copier alcove",
-        )
-    )
+        ),
+        "layout_label": "Coworking camera layout",
+        "place": "coworking lounge",
+        "foreground": "near desk edge, laptop corner, and chair back",
+        "midground": "warm work desks, laptop tables, and glass partition seams",
+        "background": "tall windows, repeated desk rows, plants, and soft shared-office depth",
+        "detail_label": "coworking details",
+        "composition": {
+            "woman": "coworking lounge selfie frame with the woman near a desk edge and tall-window depth behind her",
+            "man": "coworking lounge portrait frame with the man near a desk edge and tall-window depth behind him",
+            "default": "coworking lounge frame with the subjects near a desk edge and tall-window depth behind them",
+        },
+    },
+    {
+        "key": "classical_library",
+        "family": "library",
+        "terms": (
+            "classical library",
+            "library stacks",
+            "large library",
+            "grand library",
+            "reading room",
+            "bookshelves",
+            "book shelves",
+            "book stacks",
+            "rare-books",
+            "rare books",
+            "rolling ladders",
+        ),
+        "layout_label": "Library camera layout",
+        "place": "classical library",
+        "foreground": "near bookshelf edge, reading-table corner, and brass lamp",
+        "midground": "towering bookshelves, rolling ladders, carved columns, and marble floor lines",
+        "background": "arched windows, repeated book aisles, warm brass lamps, and deep quiet library depth",
+        "detail_label": "library details",
+        "composition": {
+            "woman": "classical library frame with the woman near a bookshelf edge and long shelf depth behind her",
+            "man": "classical library frame with the man near a bookshelf edge and long shelf depth behind him",
+            "default": "classical library frame with the subjects near a bookshelf edge and long shelf depth behind them",
+        },
+    },
+)
+
+MISMATCHED_COMPOSITION_TERMS = (
+    "outfit-check",
+    "outfit check",
+    "mirror view",
+    "mirror pose",
+    "bag",
+    "shoes",
+    "footwear",
+)
+
+
+def scene_camera_profile(scene_text: Any) -> dict[str, Any]:
+    text = str(scene_text or "").lower()
+    if not text:
+        return {}
+    for profile in SCENE_CAMERA_PROFILES:
+        if any(term in text for term in profile["terms"]):
+            return dict(profile)
+    return {}
+
+
+def is_coworking_scene(scene_text: Any) -> bool:
+    return scene_camera_profile(scene_text).get("family") == "coworking"
+
+
+def is_scene_camera_aware(scene_text: Any) -> bool:
+    return bool(scene_camera_profile(scene_text))


 def _compact_label(value: Any, compact_labels: Mapping[str, str] | None = None) -> str:
@@ -97,30 +191,10 @@ def camera_distance_from_text(text: Any) -> str:


 def coworking_location_profile(scene_text: Any) -> dict[str, str]:
-    text = str(scene_text or "").lower()
-    if "business cafe" in text or "work cafe" in text or "cafe" in text:
-        return {
-            "layout_label": "Business cafe camera layout",
-            "place": "business cafe coworking counter",
-            "foreground": "counter edge, laptop corner, and small plant",
-            "midground": "bar stools, warm desk lamps, and coffee-counter work spots",
-            "background": "plants, mirror strip, menu wall, and repeated cafe work tables",
-        }
-    if "corporate office" in text or "office after hours" in text or "copier" in text:
-        return {
-            "layout_label": "Office camera layout",
-            "place": "empty after-hours office",
-            "foreground": "copier alcove edge, chair backs, and nearest desk corner",
-            "midground": "repeating desks, glass partition seams, and muted monitor glow",
-            "background": "rows of empty workstations, city-light windows, and quiet office depth",
-        }
-    return {
-        "layout_label": "Coworking camera layout",
-        "place": "coworking lounge",
-        "foreground": "near desk edge, laptop corner, and chair back",
-        "midground": "warm work desks, laptop tables, and glass partition seams",
-        "background": "tall windows, repeated desk rows, plants, and soft shared-office depth",
-    }
+    profile = scene_camera_profile(scene_text)
+    if profile.get("family") == "coworking":
+        return profile
+    return scene_camera_profile("coworking lounge")


 def coworking_subject_terms(subject_kind: str, pov_labels: list[str] | None = None) -> tuple[str, str]:
@@ -135,7 +209,7 @@ def coworking_subject_terms(subject_kind: str, pov_labels: list[str] | None = No
    return "the subjects", "them"


-def coworking_direction_detail(
+def scene_direction_detail(
    direction: str,
    profile: dict[str, str],
    pov_labels: list[str] | None = None,
@@ -145,12 +219,13 @@ def coworking_direction_detail(
    foreground = profile["foreground"]
    midground = profile["midground"]
    background = profile["background"]
+    detail_label = profile.get("detail_label") or "location details"
    subject, pronoun = coworking_subject_terms(subject_kind, pov_labels)
    if pov_labels:
        if "right side" in direction:
-            return f"{subject} is in right-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges"
+            return f"{subject} is in right-side profile; {midground} run behind {pronoun} toward {background}, with {detail_label} kept at the frame edges"
        if "left side" in direction:
-            return f"{subject} is in left-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges"
+            return f"{subject} is in left-side profile; {midground} run behind {pronoun} toward {background}, with {detail_label} kept at the frame edges"
        if "back-right" in direction or "back-left" in direction:
            return f"{subject} stays close in one continuous diagonal first-person body angle; {midground} lead toward {background} behind {pronoun} at the edges, not in the lower foreground"
        if direction == "back view":
@@ -169,7 +244,16 @@ def coworking_direction_detail(
    return f"{subject} faces camera beside the {foreground}; {midground} sit between {pronoun} and {background}"


-def coworking_distance_detail(
+def coworking_direction_detail(
+    direction: str,
+    profile: dict[str, str],
+    pov_labels: list[str] | None = None,
+    subject_kind: str = "subjects",
+) -> str:
+    return scene_direction_detail(direction, profile, pov_labels, subject_kind)
+
+
+def scene_distance_detail(
    distance: str,
    profile: dict[str, str],
    subject_kind: str,
@@ -179,18 +263,27 @@ def coworking_distance_detail(
    subject, _pronoun = coworking_subject_terms(subject_kind, pov_labels)
    if pov_labels:
        if "wide" in distance or "full-body" in distance or "full body" in distance:
-            return f"wide POV keeps {subject} readable with coworking context behind them"
+            return f"wide POV keeps {subject} readable with {profile['place']} context behind them"
        if "close" in distance:
-            return f"close POV keeps {subject} dominant with coworking context only at the sides or background"
+            return f"close POV keeps {subject} dominant with {profile['place']} context only at the sides or background"
        return f"medium POV keeps {subject} dominant with room context behind them"
    if "wide" in distance or "full-body" in distance or "full body" in distance:
-        return "wide crop keeps floor aisle, table rows, and window depth readable"
+        return f"wide crop keeps the {profile['foreground']}, {profile['midground']}, and {profile['background']} readable"
    if "close" in distance:
-        return "close crop keeps one desk or counter anchor visible"
+        return f"close crop keeps one anchor from the {profile['foreground']} visible"
    return f"medium crop keeps {subject} dominant"


-def coworking_elevation_detail(
+def coworking_distance_detail(
+    distance: str,
+    profile: dict[str, str],
+    subject_kind: str,
+    pov_labels: list[str] | None = None,
+) -> str:
+    return scene_distance_detail(distance, profile, subject_kind, pov_labels)
+
+
+def scene_elevation_detail(
    elevation: str,
    profile: dict[str, str],
    subject_kind: str,
@@ -200,29 +293,39 @@ def coworking_elevation_detail(
    subject, pronoun = coworking_subject_terms(subject_kind, pov_labels)
    if pov_labels:
        if "low-angle" in elevation:
-            return f"low angle keeps POV body cues low while windows and partition lines rise behind {pronoun}"
+            return f"low angle keeps POV body cues low while the {profile['background']} rises behind {pronoun}"
        if "elevated" in elevation:
-            return f"elevated POV keeps the viewer's eye line slightly higher than {subject}, with tabletop and glass lines only behind or at the side edges"
+            return f"elevated POV keeps the viewer's eye line slightly higher than {subject}, with {profile['foreground']} only behind or at the side edges"
        if "high-angle" in elevation:
-            return f"high angle looks down from the viewer's position with desks and aisle only in the background"
-        return f"eye-level angle keeps tabletop lines and glass seams behind {pronoun}"
+            return f"high angle looks down from the viewer's position with {profile['midground']} only in the background"
+        return f"eye-level angle keeps {profile['midground']} behind {pronoun}"
    if "low-angle" in elevation:
-        return f"low angle keeps the foreground desk edge low while windows and partitions rise behind {pronoun}"
+        return f"low angle keeps the {profile['foreground']} low while {profile['background']} rises behind {pronoun}"
    if "elevated" in elevation:
-        return f"elevated angle shows tabletop surfaces, laptop shapes, chairs, and walking aisle around {pronoun}"
+        return f"elevated angle shows the {profile['foreground']} and {profile['midground']} around {pronoun}"
    if "high-angle" in elevation:
-        return f"high angle shows the desk grid, chairs, floor aisle, and placement of {pronoun}"
-    return f"eye-level angle keeps tabletop lines and glass seams straight"
+        return f"high angle shows the {profile['place']} layout and placement of {pronoun}"
+    return f"eye-level angle keeps {profile['midground']} visually stable"


-def coworking_camera_scene_directive(
+def coworking_elevation_detail(
+    elevation: str,
+    profile: dict[str, str],
+    subject_kind: str,
+    pov_labels: list[str] | None = None,
+) -> str:
+    return scene_elevation_detail(elevation, profile, subject_kind, pov_labels)
+
+
+def scene_camera_directive(
    scene_text: Any,
    parsed: dict[str, Any],
    pov_labels: list[str] | None = None,
    subject_kind: str = "subjects",
    compact_labels: Mapping[str, str] | None = None,
 ) -> str:
-    if not is_coworking_scene(scene_text):
+    profile = scene_camera_profile(scene_text)
+    if not profile:
        return ""
    direction = str(parsed.get("orbit_direction") or "").strip()
    elevation = str(parsed.get("orbit_elevation_label") or "").strip()
@@ -233,10 +336,9 @@ def coworking_camera_scene_directive(
    distance = distance or camera_distance_from_text(custom_prompt)
    if not any((direction, elevation, distance, custom_prompt)):
        return ""
-    profile = coworking_location_profile(scene_text)
-    direction_detail = coworking_direction_detail(direction, profile, pov_labels, subject_kind)
-    distance_detail = coworking_distance_detail(distance, profile, subject_kind, pov_labels)
-    elevation_detail = coworking_elevation_detail(elevation, profile, subject_kind, pov_labels)
+    direction_detail = scene_direction_detail(direction, profile, pov_labels, subject_kind)
+    distance_detail = scene_distance_detail(distance, profile, subject_kind, pov_labels)
+    elevation_detail = scene_elevation_detail(elevation, profile, subject_kind, pov_labels)
    if pov_labels:
        return (
            f"{profile['layout_label']} from POV: {direction_detail}. "
@@ -250,19 +352,56 @@ def coworking_camera_scene_directive(
    )


-def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
+def coworking_camera_scene_directive(
+    scene_text: Any,
+    parsed: dict[str, Any],
+    pov_labels: list[str] | None = None,
+    subject_kind: str = "subjects",
+    compact_labels: Mapping[str, str] | None = None,
+) -> str:
+    if not is_coworking_scene(scene_text):
+        return ""
+    return scene_camera_directive(scene_text, parsed, pov_labels, subject_kind, compact_labels)
+
+
+def profile_composition_text(profile: dict[str, Any], subject_kind: str) -> str:
+    composition = profile.get("composition") if isinstance(profile.get("composition"), dict) else {}
+    if subject_kind == "woman" and composition.get("woman"):
+        return str(composition["woman"])
+    if subject_kind == "man" and composition.get("man"):
+        return str(composition["man"])
+    text = str(composition.get("default") or f"{profile['place']} frame with the subjects clearly placed in the room")
+    if subject_kind == "couple":
+        text = text.replace("the subjects", "the couple")
+    if "composition" not in text.lower():
+        text = f"{text} composition"
+    return text
+
+
+def contextual_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
    text = str(composition or "").strip()
-    if not text or not is_coworking_scene(scene_text):
+    if not text:
+        return text
+    profile = scene_camera_profile(scene_text)
+    if not profile:
        return text
    lower = text.lower()
-    if not any(term in lower for term in ("office-lobby", "office lobby", "walking composition", "outfit-check")):
+    profile_lower = " ".join(
+        str(profile.get(key, "")).lower()
+        for key in ("place", "foreground", "midground", "background")
+    )
+    already_matches = any(term and term in lower for term in profile_lower.replace(",", " ").split())
+    mismatched = any(term in lower for term in MISMATCHED_COMPOSITION_TERMS)
+    office_generic = any(term in lower for term in ("office-lobby", "office lobby", "walking composition", "outfit-check"))
+    if not mismatched and not office_generic and already_matches:
        return text
-    subject, _pronoun = coworking_subject_terms(subject_kind)
-    if subject_kind == "woman":
-        return "coworking lounge selfie frame with the woman near a desk edge and tall-window depth behind her"
-    if subject_kind == "man":
-        return "coworking lounge portrait frame with the man near a desk edge and tall-window depth behind him"
-    return f"coworking lounge frame with {subject} near a desk edge and tall-window depth behind them"
+    if not mismatched and not office_generic and profile.get("family") != "coworking":
+        return text
+    return profile_composition_text(profile, subject_kind)
+
+
+def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
+    return contextual_composition_prompt(scene_text, composition, subject_kind)


 def camera_scene_directive_for_context(
@@ -277,7 +416,7 @@ def camera_scene_directive_for_context(
        or parsed_camera_config.get("camera_mode") == "disabled"
    ):
        return ""
-    return coworking_camera_scene_directive(
+    return scene_camera_directive(
        scene_text,
        parsed_camera_config,
        pov_labels,