Extract scene camera adapters

This commit is contained in:
2026-06-26 15:10:05 +02:00
parent 97c49fffed
commit b82cf3fbbf
4 changed files with 307 additions and 236 deletions
+9 -4
View File
@@ -86,9 +86,14 @@ Move or isolate later:
- role graph generation for hardcore interaction categories into a dedicated
module, for example `hardcore_role_graphs.py`;
- camera-scene adapters into `scene_camera_adapters.py`;
- category-library loading and inheritance helpers into `category_library.py`.
Already isolated:
- camera-scene prose and coworking composition adaptation live in
`scene_camera_adapters.py`; `prompt_builder.py` still owns camera config
parsing and row mutation.
### Pair / Adapter Layer
Owner today: `build_insta_of_pair`.
@@ -285,10 +290,11 @@ Near-term:
- Keep scene-camera adapters scoped by location family.
- Use the memory note in
`/home/ethanfel/.codex/memories/scene-camera-system.md` when editing POV.
- Keep `scene_camera_adapters.py` as the owner for location-aware camera prose;
add new location families there one at a time.
Medium-term:
- Move coworking adapter into a scene-camera adapter module.
- Build new adapters one location family at a time.
## Invariants To Preserve
@@ -307,6 +313,5 @@ Medium-term:
1. Expand `tools/prompt_smoke.py` with close foreplay and POV penetration
fixtures.
2. Split Krea action/POV/clothing helpers into separate modules.
3. Extract scene-camera adapters from `prompt_builder.py`.
4. Split `__init__.py` node classes by family after behavior is covered by smoke
3. Split `__init__.py` node classes by family after behavior is covered by smoke
checks.
+1 -1
View File
@@ -776,7 +776,7 @@ Use these traces to narrow a problem in one pass.
2. If scene is good and composition is bad, edit composition pools, not
location pools.
3. If a scene-camera adapter rewrote composition, inspect
`_coworking_composition_prompt` or the future adapter for that scene family.
`scene_camera_adapters.py`.
4. If the issue comes from `Location Theme`, edit `THEMATIC_LOCATION_PRESETS`.
### Trigger missing after formatting
+11 -231
View File
@@ -10,6 +10,7 @@ from typing import Any, Callable
try:
from . import generate_prompt_batches as g
from . import scene_camera_adapters
from .prompt_hygiene import (
sanitize_caption_text,
sanitize_negative_text,
@@ -17,6 +18,7 @@ try:
)
except ImportError: # Allows local smoke tests with `python -c`.
import generate_prompt_batches as g
import scene_camera_adapters
from prompt_hygiene import (
sanitize_caption_text,
sanitize_negative_text,
@@ -3674,235 +3676,8 @@ def _camera_caption_text(parsed: dict[str, Any]) -> str:
return f"{camera_mode} camera framing"
def _is_coworking_scene(scene_text: Any) -> bool:
text = str(scene_text or "").lower()
return any(
term in text
for term in (
"coworking",
"cowork",
"office lounge",
"business cafe",
"work cafe",
"shared office",
"corporate office",
"office after hours",
"laptops",
"warm desks",
"repeating desks",
"glass partitions",
"copier alcove",
)
)
def _camera_geometry_phrase(parsed: dict[str, Any]) -> str:
direction = str(parsed.get("orbit_direction") or "").strip()
elevation = str(parsed.get("orbit_elevation_label") or "").strip()
distance = str(parsed.get("orbit_distance_label") or "").strip()
custom = str(parsed.get("custom_camera_prompt") or "").strip()
if not any((direction, elevation, distance)) and custom:
return custom
parts = [part for part in (direction, elevation, distance) if part and part != "auto"]
if parts:
return ", ".join(parts)
compact_parts = [
CAMERA_COMPACT_LABELS.get(str(parsed.get(key) or ""), str(parsed.get(key) or "").replace("_", " "))
for key in ("shot_size", "angle", "distance")
]
compact_parts = [part for part in compact_parts if part and part != "auto"]
return ", ".join(compact_parts)
def _camera_direction_from_text(text: Any) -> str:
source = str(text or "").lower()
for label in (
"front-right quarter view",
"right side view",
"back-right quarter view",
"back view",
"back-left quarter view",
"left side view",
"front-left quarter view",
"front view",
):
if label in source:
return label
return ""
def _camera_elevation_from_text(text: Any) -> str:
source = str(text or "").lower()
for label in ("low-angle shot", "eye-level shot", "elevated shot", "high-angle shot"):
if label in source:
return label
return ""
def _camera_distance_from_text(text: Any) -> str:
source = str(text or "").lower()
for label in ("wide shot", "full-body shot", "three-quarter body shot", "medium shot", "close-up", "extreme close-up"):
if label in source:
return label
return ""
def _coworking_location_profile(scene_text: Any) -> dict[str, str]:
text = str(scene_text or "").lower()
if "business cafe" in text or "work cafe" in text or "cafe" in text:
return {
"layout_label": "Business cafe camera layout",
"place": "business cafe coworking counter",
"foreground": "counter edge, laptop corner, and small plant",
"midground": "bar stools, warm desk lamps, and coffee-counter work spots",
"background": "plants, mirror strip, menu wall, and repeated cafe work tables",
}
if "corporate office" in text or "office after hours" in text or "copier" in text:
return {
"layout_label": "Office camera layout",
"place": "empty after-hours office",
"foreground": "copier alcove edge, chair backs, and nearest desk corner",
"midground": "repeating desks, glass partition seams, and muted monitor glow",
"background": "rows of empty workstations, city-light windows, and quiet office depth",
}
return {
"layout_label": "Coworking camera layout",
"place": "coworking lounge",
"foreground": "near desk edge, laptop corner, and chair back",
"midground": "warm work desks, laptop tables, and glass partition seams",
"background": "tall windows, repeated desk rows, plants, and soft shared-office depth",
}
def _coworking_subject_terms(subject_kind: str, pov_labels: list[str] | None = None) -> tuple[str, str]:
if pov_labels:
return "the visible partner", "them"
if subject_kind == "woman":
return "the woman", "her"
if subject_kind == "man":
return "the man", "him"
if subject_kind == "couple":
return "the couple", "them"
return "the subjects", "them"
def _coworking_direction_detail(
direction: str,
profile: dict[str, str],
pov_labels: list[str] | None = None,
subject_kind: str = "subjects",
) -> str:
direction = str(direction or "").strip().lower()
foreground = profile["foreground"]
midground = profile["midground"]
background = profile["background"]
subject, pronoun = _coworking_subject_terms(subject_kind, pov_labels)
if pov_labels:
if "right side" in direction:
return f"{subject} is in right-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges"
if "left side" in direction:
return f"{subject} is in left-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges"
if "back-right" in direction or "back-left" in direction:
return f"{subject} stays close in one continuous diagonal first-person body angle; {midground} lead toward {background} behind {pronoun} at the edges, not in the lower foreground"
if direction == "back view":
return f"the viewer looks past {subject}'s back toward {midground}, then into {background}; only POV body cues sit low in frame"
if "front-right" in direction or "front-left" in direction:
return f"{subject} fills the first-person front-quarter view; {midground} recede diagonally behind {pronoun} toward {background}"
return f"{subject} faces the viewer in first-person view; {midground} and {background} stay behind {pronoun}, not between viewer and body"
if "right side" in direction or "left side" in direction:
return f"{subject} is held in side profile along the {foreground}; {midground} run laterally behind {pronoun}, with {background} still readable"
if "back-right" in direction or "back-left" in direction:
return f"{subject} is viewed from a rear-quarter angle, partly turning back toward camera; the {foreground} stays low in frame while {midground} lead into {background}"
if direction == "back view":
return f"{subject} is seen from behind with the {foreground} at camera side, facing into {midground} and {background}"
if "front-right" in direction or "front-left" in direction:
return f"{subject} is placed beside the {foreground}; {midground} recede diagonally behind {pronoun} toward {background}"
return f"{subject} faces camera beside the {foreground}; {midground} sit between {pronoun} and {background}"
def _coworking_distance_detail(distance: str, profile: dict[str, str], subject_kind: str, pov_labels: list[str] | None = None) -> str:
distance = str(distance or "").strip().lower()
subject, _pronoun = _coworking_subject_terms(subject_kind, pov_labels)
if pov_labels:
if "wide" in distance or "full-body" in distance or "full body" in distance:
return f"wide POV keeps {subject} readable with coworking context behind them"
if "close" in distance:
return f"close POV keeps {subject} dominant with coworking context only at the sides or background"
return f"medium POV keeps {subject} dominant with room context behind them"
if "wide" in distance or "full-body" in distance or "full body" in distance:
return "wide crop keeps floor aisle, table rows, and window depth readable"
if "close" in distance:
return "close crop keeps one desk or counter anchor visible"
return f"medium crop keeps {subject} dominant"
def _coworking_elevation_detail(elevation: str, profile: dict[str, str], subject_kind: str, pov_labels: list[str] | None = None) -> str:
elevation = str(elevation or "").strip().lower()
subject, pronoun = _coworking_subject_terms(subject_kind, pov_labels)
if pov_labels:
if "low-angle" in elevation:
return f"low angle keeps POV body cues low while windows and partition lines rise behind {pronoun}"
if "elevated" in elevation:
return f"elevated POV keeps the viewer's eye line slightly higher than {subject}, with tabletop and glass lines only behind or at the side edges"
if "high-angle" in elevation:
return f"high angle looks down from the viewer's position with desks and aisle only in the background"
return f"eye-level angle keeps tabletop lines and glass seams behind {pronoun}"
if "low-angle" in elevation:
return f"low angle keeps the foreground desk edge low while windows and partitions rise behind {pronoun}"
if "elevated" in elevation:
return f"elevated angle shows tabletop surfaces, laptop shapes, chairs, and walking aisle around {pronoun}"
if "high-angle" in elevation:
return f"high angle shows the desk grid, chairs, floor aisle, and placement of {pronoun}"
return f"eye-level angle keeps tabletop lines and glass seams straight"
def _coworking_camera_scene_directive(
scene_text: Any,
parsed: dict[str, Any],
pov_labels: list[str] | None = None,
subject_kind: str = "subjects",
) -> str:
if not _is_coworking_scene(scene_text):
return ""
direction = str(parsed.get("orbit_direction") or "").strip()
elevation = str(parsed.get("orbit_elevation_label") or "").strip()
distance = str(parsed.get("orbit_distance_label") or "").strip()
custom_prompt = str(parsed.get("custom_camera_prompt") or "").strip()
direction = direction or _camera_direction_from_text(custom_prompt)
elevation = elevation or _camera_elevation_from_text(custom_prompt)
distance = distance or _camera_distance_from_text(custom_prompt)
if not any((direction, elevation, distance, custom_prompt)):
return ""
profile = _coworking_location_profile(scene_text)
direction_detail = _coworking_direction_detail(direction, profile, pov_labels, subject_kind)
distance_detail = _coworking_distance_detail(distance, profile, subject_kind, pov_labels)
elevation_detail = _coworking_elevation_detail(elevation, profile, subject_kind, pov_labels)
if pov_labels:
return (
f"{profile['layout_label']} from POV: {direction_detail}. "
f"{distance_detail}; {elevation_detail}; use the multiangle camera only as first-person spatial geometry."
)
geometry = _camera_geometry_phrase(parsed)
geometry_clause = f" ({geometry})" if geometry else ""
return (
f"{profile['layout_label']}{geometry_clause}: {direction_detail}; "
f"{distance_detail}; {elevation_detail}."
)
def _coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
text = str(composition or "").strip()
if not text or not _is_coworking_scene(scene_text):
return text
lower = text.lower()
if not any(term in lower for term in ("office-lobby", "office lobby", "walking composition", "outfit-check")):
return text
subject, _pronoun = _coworking_subject_terms(subject_kind)
if subject_kind == "woman":
return "coworking lounge selfie frame with the woman near a desk edge and tall-window depth behind her"
if subject_kind == "man":
return "coworking lounge portrait frame with the man near a desk edge and tall-window depth behind him"
return f"coworking lounge frame with {subject} near a desk edge and tall-window depth behind them"
return scene_camera_adapters.coworking_composition_prompt(scene_text, composition, subject_kind)
def _apply_coworking_composition(row: dict[str, Any], subject_kind: str) -> dict[str, Any]:
@@ -3936,9 +3711,14 @@ def _camera_scene_directive_for_context(
subject_kind: str = "subjects",
) -> tuple[str, dict[str, Any]]:
parsed = _parse_camera_config(camera_config)
if parsed["camera_detail"] == "off" or parsed["camera_mode"] == "disabled":
return "", parsed
return _coworking_camera_scene_directive(scene_text, parsed, pov_labels, subject_kind), parsed
directive = scene_camera_adapters.camera_scene_directive_for_context(
scene_text,
parsed,
pov_labels,
subject_kind,
CAMERA_COMPACT_LABELS,
)
return directive, parsed
def _row_camera_subject_kind(row: dict[str, Any]) -> str:
+286
View File
@@ -0,0 +1,286 @@
from __future__ import annotations
from typing import Any, Mapping
CAMERA_DIRECTIONS = (
"front-right quarter view",
"right side view",
"back-right quarter view",
"back view",
"back-left quarter view",
"left side view",
"front-left quarter view",
"front view",
)
CAMERA_ELEVATIONS = ("low-angle shot", "eye-level shot", "elevated shot", "high-angle shot")
CAMERA_DISTANCES = (
"wide shot",
"full-body shot",
"three-quarter body shot",
"medium shot",
"close-up",
"extreme close-up",
)
def is_coworking_scene(scene_text: Any) -> bool:
text = str(scene_text or "").lower()
return any(
term in text
for term in (
"coworking",
"cowork",
"office lounge",
"business cafe",
"work cafe",
"shared office",
"corporate office",
"office after hours",
"laptops",
"warm desks",
"repeating desks",
"glass partitions",
"copier alcove",
)
)
def _compact_label(value: Any, compact_labels: Mapping[str, str] | None = None) -> str:
text = str(value or "")
if compact_labels and text in compact_labels:
return compact_labels[text]
return text.replace("_", " ")
def camera_geometry_phrase(parsed: dict[str, Any], compact_labels: Mapping[str, str] | None = None) -> str:
direction = str(parsed.get("orbit_direction") or "").strip()
elevation = str(parsed.get("orbit_elevation_label") or "").strip()
distance = str(parsed.get("orbit_distance_label") or "").strip()
custom = str(parsed.get("custom_camera_prompt") or "").strip()
if not any((direction, elevation, distance)) and custom:
return custom
parts = [part for part in (direction, elevation, distance) if part and part != "auto"]
if parts:
return ", ".join(parts)
compact_parts = [
_compact_label(parsed.get(key), compact_labels)
for key in ("shot_size", "angle", "distance")
]
compact_parts = [part for part in compact_parts if part and part != "auto"]
return ", ".join(compact_parts)
def camera_direction_from_text(text: Any) -> str:
source = str(text or "").lower()
for label in CAMERA_DIRECTIONS:
if label in source:
return label
return ""
def camera_elevation_from_text(text: Any) -> str:
source = str(text or "").lower()
for label in CAMERA_ELEVATIONS:
if label in source:
return label
return ""
def camera_distance_from_text(text: Any) -> str:
source = str(text or "").lower()
for label in CAMERA_DISTANCES:
if label in source:
return label
return ""
def coworking_location_profile(scene_text: Any) -> dict[str, str]:
text = str(scene_text or "").lower()
if "business cafe" in text or "work cafe" in text or "cafe" in text:
return {
"layout_label": "Business cafe camera layout",
"place": "business cafe coworking counter",
"foreground": "counter edge, laptop corner, and small plant",
"midground": "bar stools, warm desk lamps, and coffee-counter work spots",
"background": "plants, mirror strip, menu wall, and repeated cafe work tables",
}
if "corporate office" in text or "office after hours" in text or "copier" in text:
return {
"layout_label": "Office camera layout",
"place": "empty after-hours office",
"foreground": "copier alcove edge, chair backs, and nearest desk corner",
"midground": "repeating desks, glass partition seams, and muted monitor glow",
"background": "rows of empty workstations, city-light windows, and quiet office depth",
}
return {
"layout_label": "Coworking camera layout",
"place": "coworking lounge",
"foreground": "near desk edge, laptop corner, and chair back",
"midground": "warm work desks, laptop tables, and glass partition seams",
"background": "tall windows, repeated desk rows, plants, and soft shared-office depth",
}
def coworking_subject_terms(subject_kind: str, pov_labels: list[str] | None = None) -> tuple[str, str]:
if pov_labels:
return "the visible partner", "them"
if subject_kind == "woman":
return "the woman", "her"
if subject_kind == "man":
return "the man", "him"
if subject_kind == "couple":
return "the couple", "them"
return "the subjects", "them"
def coworking_direction_detail(
direction: str,
profile: dict[str, str],
pov_labels: list[str] | None = None,
subject_kind: str = "subjects",
) -> str:
direction = str(direction or "").strip().lower()
foreground = profile["foreground"]
midground = profile["midground"]
background = profile["background"]
subject, pronoun = coworking_subject_terms(subject_kind, pov_labels)
if pov_labels:
if "right side" in direction:
return f"{subject} is in right-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges"
if "left side" in direction:
return f"{subject} is in left-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges"
if "back-right" in direction or "back-left" in direction:
return f"{subject} stays close in one continuous diagonal first-person body angle; {midground} lead toward {background} behind {pronoun} at the edges, not in the lower foreground"
if direction == "back view":
return f"the viewer looks past {subject}'s back toward {midground}, then into {background}; only POV body cues sit low in frame"
if "front-right" in direction or "front-left" in direction:
return f"{subject} fills the first-person front-quarter view; {midground} recede diagonally behind {pronoun} toward {background}"
return f"{subject} faces the viewer in first-person view; {midground} and {background} stay behind {pronoun}, not between viewer and body"
if "right side" in direction or "left side" in direction:
return f"{subject} is held in side profile along the {foreground}; {midground} run laterally behind {pronoun}, with {background} still readable"
if "back-right" in direction or "back-left" in direction:
return f"{subject} is viewed from a rear-quarter angle, partly turning back toward camera; the {foreground} stays low in frame while {midground} lead into {background}"
if direction == "back view":
return f"{subject} is seen from behind with the {foreground} at camera side, facing into {midground} and {background}"
if "front-right" in direction or "front-left" in direction:
return f"{subject} is placed beside the {foreground}; {midground} recede diagonally behind {pronoun} toward {background}"
return f"{subject} faces camera beside the {foreground}; {midground} sit between {pronoun} and {background}"
def coworking_distance_detail(
distance: str,
profile: dict[str, str],
subject_kind: str,
pov_labels: list[str] | None = None,
) -> str:
distance = str(distance or "").strip().lower()
subject, _pronoun = coworking_subject_terms(subject_kind, pov_labels)
if pov_labels:
if "wide" in distance or "full-body" in distance or "full body" in distance:
return f"wide POV keeps {subject} readable with coworking context behind them"
if "close" in distance:
return f"close POV keeps {subject} dominant with coworking context only at the sides or background"
return f"medium POV keeps {subject} dominant with room context behind them"
if "wide" in distance or "full-body" in distance or "full body" in distance:
return "wide crop keeps floor aisle, table rows, and window depth readable"
if "close" in distance:
return "close crop keeps one desk or counter anchor visible"
return f"medium crop keeps {subject} dominant"
def coworking_elevation_detail(
elevation: str,
profile: dict[str, str],
subject_kind: str,
pov_labels: list[str] | None = None,
) -> str:
elevation = str(elevation or "").strip().lower()
subject, pronoun = coworking_subject_terms(subject_kind, pov_labels)
if pov_labels:
if "low-angle" in elevation:
return f"low angle keeps POV body cues low while windows and partition lines rise behind {pronoun}"
if "elevated" in elevation:
return f"elevated POV keeps the viewer's eye line slightly higher than {subject}, with tabletop and glass lines only behind or at the side edges"
if "high-angle" in elevation:
return f"high angle looks down from the viewer's position with desks and aisle only in the background"
return f"eye-level angle keeps tabletop lines and glass seams behind {pronoun}"
if "low-angle" in elevation:
return f"low angle keeps the foreground desk edge low while windows and partitions rise behind {pronoun}"
if "elevated" in elevation:
return f"elevated angle shows tabletop surfaces, laptop shapes, chairs, and walking aisle around {pronoun}"
if "high-angle" in elevation:
return f"high angle shows the desk grid, chairs, floor aisle, and placement of {pronoun}"
return f"eye-level angle keeps tabletop lines and glass seams straight"
def coworking_camera_scene_directive(
scene_text: Any,
parsed: dict[str, Any],
pov_labels: list[str] | None = None,
subject_kind: str = "subjects",
compact_labels: Mapping[str, str] | None = None,
) -> str:
if not is_coworking_scene(scene_text):
return ""
direction = str(parsed.get("orbit_direction") or "").strip()
elevation = str(parsed.get("orbit_elevation_label") or "").strip()
distance = str(parsed.get("orbit_distance_label") or "").strip()
custom_prompt = str(parsed.get("custom_camera_prompt") or "").strip()
direction = direction or camera_direction_from_text(custom_prompt)
elevation = elevation or camera_elevation_from_text(custom_prompt)
distance = distance or camera_distance_from_text(custom_prompt)
if not any((direction, elevation, distance, custom_prompt)):
return ""
profile = coworking_location_profile(scene_text)
direction_detail = coworking_direction_detail(direction, profile, pov_labels, subject_kind)
distance_detail = coworking_distance_detail(distance, profile, subject_kind, pov_labels)
elevation_detail = coworking_elevation_detail(elevation, profile, subject_kind, pov_labels)
if pov_labels:
return (
f"{profile['layout_label']} from POV: {direction_detail}. "
f"{distance_detail}; {elevation_detail}; use the multiangle camera only as first-person spatial geometry."
)
geometry = camera_geometry_phrase(parsed, compact_labels)
geometry_clause = f" ({geometry})" if geometry else ""
return (
f"{profile['layout_label']}{geometry_clause}: {direction_detail}; "
f"{distance_detail}; {elevation_detail}."
)
def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
text = str(composition or "").strip()
if not text or not is_coworking_scene(scene_text):
return text
lower = text.lower()
if not any(term in lower for term in ("office-lobby", "office lobby", "walking composition", "outfit-check")):
return text
subject, _pronoun = coworking_subject_terms(subject_kind)
if subject_kind == "woman":
return "coworking lounge selfie frame with the woman near a desk edge and tall-window depth behind her"
if subject_kind == "man":
return "coworking lounge portrait frame with the man near a desk edge and tall-window depth behind him"
return f"coworking lounge frame with {subject} near a desk edge and tall-window depth behind them"
def camera_scene_directive_for_context(
scene_text: Any,
parsed_camera_config: dict[str, Any],
pov_labels: list[str] | None = None,
subject_kind: str = "subjects",
compact_labels: Mapping[str, str] | None = None,
) -> str:
if (
parsed_camera_config.get("camera_detail") == "off"
or parsed_camera_config.get("camera_mode") == "disabled"
):
return ""
return coworking_camera_scene_directive(
scene_text,
parsed_camera_config,
pov_labels,
subject_kind,
compact_labels,
)