Add coworking camera-aware scene prompts

2026-06-25 23:07:31 +02:00
parent 9434070877
commit ec5640fa22
4 changed files with 282 additions and 6 deletions
@@ -3440,15 +3440,223 @@ def _camera_caption_text(parsed: dict[str, Any]) -> str:
    return f"{camera_mode} camera framing"


+def _is_coworking_scene(scene_text: Any) -> bool:
+    text = str(scene_text or "").lower()
+    return any(
+        term in text
+        for term in (
+            "coworking",
+            "cowork",
+            "office lounge",
+            "business cafe",
+            "work cafe",
+            "shared office",
+            "corporate office",
+            "office after hours",
+            "laptops",
+            "warm desks",
+            "repeating desks",
+            "glass partitions",
+            "copier alcove",
+        )
+    )
+
+
+def _camera_geometry_phrase(parsed: dict[str, Any]) -> str:
+    direction = str(parsed.get("orbit_direction") or "").strip()
+    elevation = str(parsed.get("orbit_elevation_label") or "").strip()
+    distance = str(parsed.get("orbit_distance_label") or "").strip()
+    custom = str(parsed.get("custom_camera_prompt") or "").strip()
+    if not any((direction, elevation, distance)) and custom:
+        return custom
+    parts = [part for part in (direction, elevation, distance) if part and part != "auto"]
+    if parts:
+        return ", ".join(parts)
+    compact_parts = [
+        CAMERA_COMPACT_LABELS.get(str(parsed.get(key) or ""), str(parsed.get(key) or "").replace("_", " "))
+        for key in ("shot_size", "angle", "distance")
+    ]
+    compact_parts = [part for part in compact_parts if part and part != "auto"]
+    return ", ".join(compact_parts)
+
+
+def _camera_direction_from_text(text: Any) -> str:
+    source = str(text or "").lower()
+    for label in (
+        "front-right quarter view",
+        "right side view",
+        "back-right quarter view",
+        "back view",
+        "back-left quarter view",
+        "left side view",
+        "front-left quarter view",
+        "front view",
+    ):
+        if label in source:
+            return label
+    return ""
+
+
+def _camera_elevation_from_text(text: Any) -> str:
+    source = str(text or "").lower()
+    for label in ("low-angle shot", "eye-level shot", "elevated shot", "high-angle shot"):
+        if label in source:
+            return label
+    return ""
+
+
+def _camera_distance_from_text(text: Any) -> str:
+    source = str(text or "").lower()
+    for label in ("wide shot", "full-body shot", "three-quarter body shot", "medium shot", "close-up", "extreme close-up"):
+        if label in source:
+            return label
+    return ""
+
+
+def _coworking_location_profile(scene_text: Any) -> dict[str, str]:
+    text = str(scene_text or "").lower()
+    if "business cafe" in text or "work cafe" in text or "cafe" in text:
+        return {
+            "place": "business cafe coworking counter",
+            "foreground": "counter edge, small plant, laptop corner, and polished phone-check surface",
+            "midground": "bar stools, warm desk lamps, coffee counter, and laptop users' empty work spots",
+            "background": "plants, mirror strip, menu wall, and repeated cafe work tables",
+        }
+    if "corporate office" in text or "office after hours" in text or "copier" in text:
+        return {
+            "place": "empty after-hours office",
+            "foreground": "copier alcove edge, office chair backs, and the nearest desk corner",
+            "midground": "repeating desks, glass partition seams, blinds, and muted monitor glow",
+            "background": "rows of empty workstations, city-light windows, and quiet office depth",
+        }
+    return {
+        "place": "coworking lounge",
+        "foreground": "nearest desk edge, laptop corner, chair back, and polished tabletop line",
+        "midground": "warm work desks, laptop tables, glass partition seams, and open walking aisle",
+        "background": "tall windows, repeated desk rows, plants, and soft shared-office depth",
+    }
+
+
+def _coworking_direction_detail(
+    direction: str,
+    profile: dict[str, str],
+    pov_labels: list[str] | None = None,
+) -> str:
+    direction = str(direction or "").strip().lower()
+    foreground = profile["foreground"]
+    midground = profile["midground"]
+    background = profile["background"]
+    if pov_labels:
+        if "right side" in direction:
+            return f"the visible partner is in right-side profile across the lower foreground: {foreground}; behind them, {midground} runs horizontally toward {background}"
+        if "left side" in direction:
+            return f"the visible partner is in left-side profile across the lower foreground: {foreground}; behind them, {midground} runs horizontally toward {background}"
+        if "back-right" in direction or "back-left" in direction:
+            return f"the viewer sees the visible partner from a rear-quarter angle, turning back over one shoulder; {foreground} sits at the lower edge while {midground} leads into {background}"
+        if direction == "back view":
+            return f"the viewer looks past the visible partner's back toward {midground}, then into {background}, with foreground body cues low in frame"
+        if "front-right" in direction or "front-left" in direction:
+            return f"the visible partner is close in a front-quarter view over the lower foreground: {foreground}; {midground} recede diagonally into {background}"
+        return f"the visible partner faces the viewer over the lower foreground: {foreground}; {midground} stacks clearly in front of {background}"
+    if "right side" in direction or "left side" in direction:
+        return f"the cast is held in clean side profile along the foreground anchor: {foreground}; {midground} creates horizontal perspective lines, with {background} still visible"
+    if "back-right" in direction or "back-left" in direction:
+        return f"the cast is viewed from a rear-quarter angle, partly turning back toward the camera; {foreground} stays low in frame while {midground} leads into {background}"
+    if direction == "back view":
+        return f"the cast is seen from behind with {foreground} at the camera side, facing into {midground} and {background}"
+    if "front-right" in direction or "front-left" in direction:
+        return f"the cast is placed beside the foreground anchor: {foreground}; {midground} recede diagonally into {background}"
+    return f"the cast faces the camera beside the foreground anchor: {foreground}; {midground} is layered between the cast and {background}"
+
+
+def _coworking_distance_detail(distance: str, profile: dict[str, str]) -> str:
+    distance = str(distance or "").strip().lower()
+    if "wide" in distance or "full-body" in distance or "full body" in distance:
+        return f"Keep full bodies plus floor aisle, table rows, and enough {profile['background']} to read the whole {profile['place']}."
+    if "close" in distance:
+        return f"Crop close, but keep one concrete location anchor visible: {profile['foreground']} or a slice of {profile['midground']}."
+    return f"Use a medium crop: bodies stay dominant, but the foreground anchor ({profile['foreground']}) and one midground layer ({profile['midground']}) remain visible."
+
+
+def _coworking_elevation_detail(elevation: str, profile: dict[str, str]) -> str:
+    elevation = str(elevation or "").strip().lower()
+    if "low-angle" in elevation:
+        return f"Low viewpoint: let {profile['foreground']} loom at the lower edge while windows and partitions rise behind the bodies."
+    if "elevated" in elevation:
+        return f"Elevated viewpoint: show tabletop surfaces, laptop rectangles, chair positions, and the walking aisle around the bodies."
+    if "high-angle" in elevation:
+        return f"High viewpoint: look down over the grid of desks, chairs, floor aisle, and body placement so the room layout is explicit."
+    return f"Eye-level viewpoint: keep tabletop lines and glass seams straight enough to make the {profile['place']} believable."
+
+
+def _coworking_camera_scene_directive(
+    scene_text: Any,
+    parsed: dict[str, Any],
+    pov_labels: list[str] | None = None,
+) -> str:
+    if not _is_coworking_scene(scene_text):
+        return ""
+    direction = str(parsed.get("orbit_direction") or "").strip()
+    elevation = str(parsed.get("orbit_elevation_label") or "").strip()
+    distance = str(parsed.get("orbit_distance_label") or "").strip()
+    custom_prompt = str(parsed.get("custom_camera_prompt") or "").strip()
+    direction = direction or _camera_direction_from_text(custom_prompt)
+    elevation = elevation or _camera_elevation_from_text(custom_prompt)
+    distance = distance or _camera_distance_from_text(custom_prompt)
+    if not any((direction, elevation, distance, custom_prompt)):
+        return ""
+    profile = _coworking_location_profile(scene_text)
+    direction_detail = _coworking_direction_detail(direction, profile, pov_labels)
+    distance_detail = _coworking_distance_detail(distance, profile)
+    elevation_detail = _coworking_elevation_detail(elevation, profile)
+    if pov_labels:
+        return (
+            f"From the POV participant's position inside the {profile['place']}, {direction_detail}. "
+            f"{distance_detail} {elevation_detail} Use the multiangle camera only as spatial geometry for what the viewer can see."
+        )
+    geometry = _camera_geometry_phrase(parsed)
+    geometry_clause = f" from a {geometry}" if geometry else ""
+    return (
+        f"In the {profile['place']}{geometry_clause}, {direction_detail}. "
+        f"{distance_detail} {elevation_detail}"
+    )
+
+
+def _camera_scene_directive_for_context(
+    scene_text: Any,
+    composition: Any,
+    camera_config: str | dict[str, Any] | None,
+    pov_labels: list[str] | None = None,
+) -> tuple[str, dict[str, Any]]:
+    parsed = _parse_camera_config(camera_config)
+    if parsed["camera_detail"] == "off" or parsed["camera_mode"] == "disabled":
+        return "", parsed
+    return _coworking_camera_scene_directive(scene_text, parsed, pov_labels), parsed
+
+
 def _apply_camera_config(row: dict[str, Any], camera_config: str | dict[str, Any] | None) -> dict[str, Any]:
    directive, parsed = _camera_directive(camera_config)
+    pov_labels = _pov_character_labels(
+        _character_slot_label_map(_parse_character_cast(row.get("character_cast_slots"))),
+        int(row.get("men_count") or 0) if str(row.get("men_count") or "").isdigit() else 0,
+    )
+    if not pov_labels:
+        pov_labels = [str(label) for label in _list_from(row.get("pov_character_labels")) if str(label).strip()]
+    scene_directive, parsed = _camera_scene_directive_for_context(
+        row.get("scene_text") or row.get("source_scene_text") or row.get("scene"),
+        row.get("composition") or row.get("source_composition"),
+        parsed,
+        pov_labels,
+    )
    row["camera_config"] = parsed
-    row["camera_directive"] = directive
-    if not directive:
+    row["camera_scene_directive"] = scene_directive
+    row["camera_directive"] = "" if pov_labels else directive
+    combined_directive = " ".join(part for part in (scene_directive, row["camera_directive"]) if part)
+    if not combined_directive:
        return row
-    row["prompt"] = _insert_positive_directive(row["prompt"], directive)
+    row["prompt"] = _insert_positive_directive(row["prompt"], combined_directive)
    camera_caption = _camera_caption_text(parsed)
-    if camera_caption:
+    if camera_caption and not pov_labels:
        row["caption"] = f"{row.get('caption', '').rstrip()}, {camera_caption}"
    return row

@@ -7981,10 +8189,39 @@ def build_insta_of_pair(
    hard_camera_config = _insta_camera_config_with_detail(hard_camera_config, options["camera_detail"])
    soft_camera_directive, soft_camera_config = _camera_directive(soft_camera_config)
    hard_camera_directive, hard_camera_config = _camera_directive(hard_camera_config)
-    soft_camera_sentence = f"Camera control: {soft_camera_directive} " if soft_camera_directive else ""
-    hard_camera_sentence = f"Camera control: {hard_camera_directive} " if hard_camera_directive else ""
    hard_scene = soft_row["scene_text"] if options["continuity"] == "same_creator_same_room" else hard_row["scene_text"]
    hard_composition = hard_row["composition"]
+    soft_pov_camera_labels = (
+        pov_character_labels
+        if options["softcore_cast"] == "same_as_hardcore"
+        else []
+    )
+    soft_camera_scene_directive, soft_camera_config = _camera_scene_directive_for_context(
+        soft_row.get("scene_text"),
+        soft_row.get("composition"),
+        soft_camera_config,
+        soft_pov_camera_labels,
+    )
+    hard_camera_scene_directive, hard_camera_config = _camera_scene_directive_for_context(
+        hard_scene,
+        hard_composition,
+        hard_camera_config,
+        pov_character_labels,
+    )
+    if soft_pov_camera_labels:
+        soft_camera_directive = ""
+    if pov_character_labels:
+        hard_camera_directive = ""
+    soft_row["camera_config"] = soft_camera_config
+    soft_row["camera_directive"] = soft_camera_directive
+    soft_row["camera_scene_directive"] = soft_camera_scene_directive
+    hard_row["camera_config"] = hard_camera_config
+    hard_row["camera_directive"] = hard_camera_directive
+    hard_row["camera_scene_directive"] = hard_camera_scene_directive
+    soft_camera_scene_sentence = f"{soft_camera_scene_directive} " if soft_camera_scene_directive else ""
+    hard_camera_scene_sentence = f"{hard_camera_scene_directive} " if hard_camera_scene_directive else ""
+    soft_camera_sentence = f"Camera control: {soft_camera_directive} " if soft_camera_directive else ""
+    hard_camera_sentence = f"Camera control: {hard_camera_directive} " if hard_camera_directive else ""
    soft_cast = (
        "solo creator setup with Woman A alone"
        if options["softcore_cast"] == "solo"
@@ -8065,6 +8302,7 @@ def build_insta_of_pair(
        f"{soft_cast_presence}"
        f"{soft_cast_styling_sentence}"
        f"{soft_row['softcore_item_prompt_label']}: {soft_row['item']}. Pose: {soft_row['pose']}. Setting: {soft_row['scene_text']}. "
+        f"{soft_camera_scene_sentence}"
        f"{_labeled_expression_sentence('Facial expression', soft_row.get('expression'))}"
        f"Composition: {soft_row['composition']}. "
        f"{soft_camera_sentence}"
@@ -8080,6 +8318,7 @@ def build_insta_of_pair(
        f"{hard_clothing_sentence}"
        f"Role graph: {hard_row['role_graph']} Sexual scene: {hard_row['item']}. "
        f"Setting: {hard_scene}. "
+        f"{hard_camera_scene_sentence}"
        f"{_labeled_expression_sentence('Facial expressions', hard_row.get('expression'))}"
        f"Composition: {hard_composition}. "
        f"{hard_detail_directive}"
@@ -8104,6 +8343,7 @@ def build_insta_of_pair(
        soft_partner_outfit_text,
        soft_partner_styling["pose"],
        soft_row["scene_text"],
+        soft_camera_scene_directive,
        soft_row["composition"],
        _camera_caption_text(soft_camera_config) if soft_camera_directive else "",
    ]
@@ -8117,6 +8357,7 @@ def build_insta_of_pair(
        hard_row["role_graph"],
        hard_row["item"],
        hard_scene,
+        hard_camera_scene_directive,
        hard_composition,
        _camera_caption_text(hard_camera_config) if hard_camera_directive else "",
    ]
@@ -8150,5 +8391,7 @@ def build_insta_of_pair(
        "hardcore_camera_config": hard_camera_config,
        "softcore_camera_directive": soft_camera_directive,
        "hardcore_camera_directive": hard_camera_directive,
+        "softcore_camera_scene_directive": soft_camera_scene_directive,
+        "hardcore_camera_scene_directive": hard_camera_scene_directive,
    }
    return metadata