Add coworking camera-aware scene prompts

This commit is contained in:
2026-06-25 23:07:31 +02:00
parent 9434070877
commit ec5640fa22
4 changed files with 282 additions and 6 deletions
+249 -6
View File
@@ -3440,15 +3440,223 @@ def _camera_caption_text(parsed: dict[str, Any]) -> str:
return f"{camera_mode} camera framing"
def _is_coworking_scene(scene_text: Any) -> bool:
text = str(scene_text or "").lower()
return any(
term in text
for term in (
"coworking",
"cowork",
"office lounge",
"business cafe",
"work cafe",
"shared office",
"corporate office",
"office after hours",
"laptops",
"warm desks",
"repeating desks",
"glass partitions",
"copier alcove",
)
)
def _camera_geometry_phrase(parsed: dict[str, Any]) -> str:
direction = str(parsed.get("orbit_direction") or "").strip()
elevation = str(parsed.get("orbit_elevation_label") or "").strip()
distance = str(parsed.get("orbit_distance_label") or "").strip()
custom = str(parsed.get("custom_camera_prompt") or "").strip()
if not any((direction, elevation, distance)) and custom:
return custom
parts = [part for part in (direction, elevation, distance) if part and part != "auto"]
if parts:
return ", ".join(parts)
compact_parts = [
CAMERA_COMPACT_LABELS.get(str(parsed.get(key) or ""), str(parsed.get(key) or "").replace("_", " "))
for key in ("shot_size", "angle", "distance")
]
compact_parts = [part for part in compact_parts if part and part != "auto"]
return ", ".join(compact_parts)
def _camera_direction_from_text(text: Any) -> str:
source = str(text or "").lower()
for label in (
"front-right quarter view",
"right side view",
"back-right quarter view",
"back view",
"back-left quarter view",
"left side view",
"front-left quarter view",
"front view",
):
if label in source:
return label
return ""
def _camera_elevation_from_text(text: Any) -> str:
source = str(text or "").lower()
for label in ("low-angle shot", "eye-level shot", "elevated shot", "high-angle shot"):
if label in source:
return label
return ""
def _camera_distance_from_text(text: Any) -> str:
source = str(text or "").lower()
for label in ("wide shot", "full-body shot", "three-quarter body shot", "medium shot", "close-up", "extreme close-up"):
if label in source:
return label
return ""
def _coworking_location_profile(scene_text: Any) -> dict[str, str]:
text = str(scene_text or "").lower()
if "business cafe" in text or "work cafe" in text or "cafe" in text:
return {
"place": "business cafe coworking counter",
"foreground": "counter edge, small plant, laptop corner, and polished phone-check surface",
"midground": "bar stools, warm desk lamps, coffee counter, and laptop users' empty work spots",
"background": "plants, mirror strip, menu wall, and repeated cafe work tables",
}
if "corporate office" in text or "office after hours" in text or "copier" in text:
return {
"place": "empty after-hours office",
"foreground": "copier alcove edge, office chair backs, and the nearest desk corner",
"midground": "repeating desks, glass partition seams, blinds, and muted monitor glow",
"background": "rows of empty workstations, city-light windows, and quiet office depth",
}
return {
"place": "coworking lounge",
"foreground": "nearest desk edge, laptop corner, chair back, and polished tabletop line",
"midground": "warm work desks, laptop tables, glass partition seams, and open walking aisle",
"background": "tall windows, repeated desk rows, plants, and soft shared-office depth",
}
def _coworking_direction_detail(
direction: str,
profile: dict[str, str],
pov_labels: list[str] | None = None,
) -> str:
direction = str(direction or "").strip().lower()
foreground = profile["foreground"]
midground = profile["midground"]
background = profile["background"]
if pov_labels:
if "right side" in direction:
return f"the visible partner is in right-side profile across the lower foreground: {foreground}; behind them, {midground} runs horizontally toward {background}"
if "left side" in direction:
return f"the visible partner is in left-side profile across the lower foreground: {foreground}; behind them, {midground} runs horizontally toward {background}"
if "back-right" in direction or "back-left" in direction:
return f"the viewer sees the visible partner from a rear-quarter angle, turning back over one shoulder; {foreground} sits at the lower edge while {midground} leads into {background}"
if direction == "back view":
return f"the viewer looks past the visible partner's back toward {midground}, then into {background}, with foreground body cues low in frame"
if "front-right" in direction or "front-left" in direction:
return f"the visible partner is close in a front-quarter view over the lower foreground: {foreground}; {midground} recede diagonally into {background}"
return f"the visible partner faces the viewer over the lower foreground: {foreground}; {midground} stacks clearly in front of {background}"
if "right side" in direction or "left side" in direction:
return f"the cast is held in clean side profile along the foreground anchor: {foreground}; {midground} creates horizontal perspective lines, with {background} still visible"
if "back-right" in direction or "back-left" in direction:
return f"the cast is viewed from a rear-quarter angle, partly turning back toward the camera; {foreground} stays low in frame while {midground} leads into {background}"
if direction == "back view":
return f"the cast is seen from behind with {foreground} at the camera side, facing into {midground} and {background}"
if "front-right" in direction or "front-left" in direction:
return f"the cast is placed beside the foreground anchor: {foreground}; {midground} recede diagonally into {background}"
return f"the cast faces the camera beside the foreground anchor: {foreground}; {midground} is layered between the cast and {background}"
def _coworking_distance_detail(distance: str, profile: dict[str, str]) -> str:
distance = str(distance or "").strip().lower()
if "wide" in distance or "full-body" in distance or "full body" in distance:
return f"Keep full bodies plus floor aisle, table rows, and enough {profile['background']} to read the whole {profile['place']}."
if "close" in distance:
return f"Crop close, but keep one concrete location anchor visible: {profile['foreground']} or a slice of {profile['midground']}."
return f"Use a medium crop: bodies stay dominant, but the foreground anchor ({profile['foreground']}) and one midground layer ({profile['midground']}) remain visible."
def _coworking_elevation_detail(elevation: str, profile: dict[str, str]) -> str:
elevation = str(elevation or "").strip().lower()
if "low-angle" in elevation:
return f"Low viewpoint: let {profile['foreground']} loom at the lower edge while windows and partitions rise behind the bodies."
if "elevated" in elevation:
return f"Elevated viewpoint: show tabletop surfaces, laptop rectangles, chair positions, and the walking aisle around the bodies."
if "high-angle" in elevation:
return f"High viewpoint: look down over the grid of desks, chairs, floor aisle, and body placement so the room layout is explicit."
return f"Eye-level viewpoint: keep tabletop lines and glass seams straight enough to make the {profile['place']} believable."
def _coworking_camera_scene_directive(
scene_text: Any,
parsed: dict[str, Any],
pov_labels: list[str] | None = None,
) -> str:
if not _is_coworking_scene(scene_text):
return ""
direction = str(parsed.get("orbit_direction") or "").strip()
elevation = str(parsed.get("orbit_elevation_label") or "").strip()
distance = str(parsed.get("orbit_distance_label") or "").strip()
custom_prompt = str(parsed.get("custom_camera_prompt") or "").strip()
direction = direction or _camera_direction_from_text(custom_prompt)
elevation = elevation or _camera_elevation_from_text(custom_prompt)
distance = distance or _camera_distance_from_text(custom_prompt)
if not any((direction, elevation, distance, custom_prompt)):
return ""
profile = _coworking_location_profile(scene_text)
direction_detail = _coworking_direction_detail(direction, profile, pov_labels)
distance_detail = _coworking_distance_detail(distance, profile)
elevation_detail = _coworking_elevation_detail(elevation, profile)
if pov_labels:
return (
f"From the POV participant's position inside the {profile['place']}, {direction_detail}. "
f"{distance_detail} {elevation_detail} Use the multiangle camera only as spatial geometry for what the viewer can see."
)
geometry = _camera_geometry_phrase(parsed)
geometry_clause = f" from a {geometry}" if geometry else ""
return (
f"In the {profile['place']}{geometry_clause}, {direction_detail}. "
f"{distance_detail} {elevation_detail}"
)
def _camera_scene_directive_for_context(
scene_text: Any,
composition: Any,
camera_config: str | dict[str, Any] | None,
pov_labels: list[str] | None = None,
) -> tuple[str, dict[str, Any]]:
parsed = _parse_camera_config(camera_config)
if parsed["camera_detail"] == "off" or parsed["camera_mode"] == "disabled":
return "", parsed
return _coworking_camera_scene_directive(scene_text, parsed, pov_labels), parsed
def _apply_camera_config(row: dict[str, Any], camera_config: str | dict[str, Any] | None) -> dict[str, Any]:
directive, parsed = _camera_directive(camera_config)
pov_labels = _pov_character_labels(
_character_slot_label_map(_parse_character_cast(row.get("character_cast_slots"))),
int(row.get("men_count") or 0) if str(row.get("men_count") or "").isdigit() else 0,
)
if not pov_labels:
pov_labels = [str(label) for label in _list_from(row.get("pov_character_labels")) if str(label).strip()]
scene_directive, parsed = _camera_scene_directive_for_context(
row.get("scene_text") or row.get("source_scene_text") or row.get("scene"),
row.get("composition") or row.get("source_composition"),
parsed,
pov_labels,
)
row["camera_config"] = parsed
row["camera_directive"] = directive
if not directive:
row["camera_scene_directive"] = scene_directive
row["camera_directive"] = "" if pov_labels else directive
combined_directive = " ".join(part for part in (scene_directive, row["camera_directive"]) if part)
if not combined_directive:
return row
row["prompt"] = _insert_positive_directive(row["prompt"], directive)
row["prompt"] = _insert_positive_directive(row["prompt"], combined_directive)
camera_caption = _camera_caption_text(parsed)
if camera_caption:
if camera_caption and not pov_labels:
row["caption"] = f"{row.get('caption', '').rstrip()}, {camera_caption}"
return row
@@ -7981,10 +8189,39 @@ def build_insta_of_pair(
hard_camera_config = _insta_camera_config_with_detail(hard_camera_config, options["camera_detail"])
soft_camera_directive, soft_camera_config = _camera_directive(soft_camera_config)
hard_camera_directive, hard_camera_config = _camera_directive(hard_camera_config)
soft_camera_sentence = f"Camera control: {soft_camera_directive} " if soft_camera_directive else ""
hard_camera_sentence = f"Camera control: {hard_camera_directive} " if hard_camera_directive else ""
hard_scene = soft_row["scene_text"] if options["continuity"] == "same_creator_same_room" else hard_row["scene_text"]
hard_composition = hard_row["composition"]
soft_pov_camera_labels = (
pov_character_labels
if options["softcore_cast"] == "same_as_hardcore"
else []
)
soft_camera_scene_directive, soft_camera_config = _camera_scene_directive_for_context(
soft_row.get("scene_text"),
soft_row.get("composition"),
soft_camera_config,
soft_pov_camera_labels,
)
hard_camera_scene_directive, hard_camera_config = _camera_scene_directive_for_context(
hard_scene,
hard_composition,
hard_camera_config,
pov_character_labels,
)
if soft_pov_camera_labels:
soft_camera_directive = ""
if pov_character_labels:
hard_camera_directive = ""
soft_row["camera_config"] = soft_camera_config
soft_row["camera_directive"] = soft_camera_directive
soft_row["camera_scene_directive"] = soft_camera_scene_directive
hard_row["camera_config"] = hard_camera_config
hard_row["camera_directive"] = hard_camera_directive
hard_row["camera_scene_directive"] = hard_camera_scene_directive
soft_camera_scene_sentence = f"{soft_camera_scene_directive} " if soft_camera_scene_directive else ""
hard_camera_scene_sentence = f"{hard_camera_scene_directive} " if hard_camera_scene_directive else ""
soft_camera_sentence = f"Camera control: {soft_camera_directive} " if soft_camera_directive else ""
hard_camera_sentence = f"Camera control: {hard_camera_directive} " if hard_camera_directive else ""
soft_cast = (
"solo creator setup with Woman A alone"
if options["softcore_cast"] == "solo"
@@ -8065,6 +8302,7 @@ def build_insta_of_pair(
f"{soft_cast_presence}"
f"{soft_cast_styling_sentence}"
f"{soft_row['softcore_item_prompt_label']}: {soft_row['item']}. Pose: {soft_row['pose']}. Setting: {soft_row['scene_text']}. "
f"{soft_camera_scene_sentence}"
f"{_labeled_expression_sentence('Facial expression', soft_row.get('expression'))}"
f"Composition: {soft_row['composition']}. "
f"{soft_camera_sentence}"
@@ -8080,6 +8318,7 @@ def build_insta_of_pair(
f"{hard_clothing_sentence}"
f"Role graph: {hard_row['role_graph']} Sexual scene: {hard_row['item']}. "
f"Setting: {hard_scene}. "
f"{hard_camera_scene_sentence}"
f"{_labeled_expression_sentence('Facial expressions', hard_row.get('expression'))}"
f"Composition: {hard_composition}. "
f"{hard_detail_directive}"
@@ -8104,6 +8343,7 @@ def build_insta_of_pair(
soft_partner_outfit_text,
soft_partner_styling["pose"],
soft_row["scene_text"],
soft_camera_scene_directive,
soft_row["composition"],
_camera_caption_text(soft_camera_config) if soft_camera_directive else "",
]
@@ -8117,6 +8357,7 @@ def build_insta_of_pair(
hard_row["role_graph"],
hard_row["item"],
hard_scene,
hard_camera_scene_directive,
hard_composition,
_camera_caption_text(hard_camera_config) if hard_camera_directive else "",
]
@@ -8150,5 +8391,7 @@ def build_insta_of_pair(
"hardcore_camera_config": hard_camera_config,
"softcore_camera_directive": soft_camera_directive,
"hardcore_camera_directive": hard_camera_directive,
"softcore_camera_scene_directive": soft_camera_scene_directive,
"hardcore_camera_scene_directive": hard_camera_scene_directive,
}
return metadata