diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md index a19854f..d5c815b 100644 --- a/docs/prompt-architecture-improvement-plan.md +++ b/docs/prompt-architecture-improvement-plan.md @@ -86,9 +86,14 @@ Move or isolate later: - role graph generation for hardcore interaction categories into a dedicated module, for example `hardcore_role_graphs.py`; -- camera-scene adapters into `scene_camera_adapters.py`; - category-library loading and inheritance helpers into `category_library.py`. +Already isolated: + +- camera-scene prose and coworking composition adaptation live in + `scene_camera_adapters.py`; `prompt_builder.py` still owns camera config + parsing and row mutation. + ### Pair / Adapter Layer Owner today: `build_insta_of_pair`. @@ -285,10 +290,11 @@ Near-term: - Keep scene-camera adapters scoped by location family. - Use the memory note in `/home/ethanfel/.codex/memories/scene-camera-system.md` when editing POV. +- Keep `scene_camera_adapters.py` as the owner for location-aware camera prose; + add new location families there one at a time. Medium-term: -- Move coworking adapter into a scene-camera adapter module. - Build new adapters one location family at a time. ## Invariants To Preserve @@ -307,6 +313,5 @@ Medium-term: 1. Expand `tools/prompt_smoke.py` with close foreplay and POV penetration fixtures. 2. Split Krea action/POV/clothing helpers into separate modules. -3. Extract scene-camera adapters from `prompt_builder.py`. -4. Split `__init__.py` node classes by family after behavior is covered by smoke +3. Split `__init__.py` node classes by family after behavior is covered by smoke checks. diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index 6055bdf..54f7174 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -776,7 +776,7 @@ Use these traces to narrow a problem in one pass. 2. If scene is good and composition is bad, edit composition pools, not location pools. 3. If a scene-camera adapter rewrote composition, inspect - `_coworking_composition_prompt` or the future adapter for that scene family. + `scene_camera_adapters.py`. 4. If the issue comes from `Location Theme`, edit `THEMATIC_LOCATION_PRESETS`. ### Trigger missing after formatting diff --git a/prompt_builder.py b/prompt_builder.py index 87747e0..f6be95e 100644 --- a/prompt_builder.py +++ b/prompt_builder.py @@ -10,6 +10,7 @@ from typing import Any, Callable try: from . import generate_prompt_batches as g + from . import scene_camera_adapters from .prompt_hygiene import ( sanitize_caption_text, sanitize_negative_text, @@ -17,6 +18,7 @@ try: ) except ImportError: # Allows local smoke tests with `python -c`. import generate_prompt_batches as g + import scene_camera_adapters from prompt_hygiene import ( sanitize_caption_text, sanitize_negative_text, @@ -3674,235 +3676,8 @@ def _camera_caption_text(parsed: dict[str, Any]) -> str: return f"{camera_mode} camera framing" -def _is_coworking_scene(scene_text: Any) -> bool: - text = str(scene_text or "").lower() - return any( - term in text - for term in ( - "coworking", - "cowork", - "office lounge", - "business cafe", - "work cafe", - "shared office", - "corporate office", - "office after hours", - "laptops", - "warm desks", - "repeating desks", - "glass partitions", - "copier alcove", - ) - ) - - -def _camera_geometry_phrase(parsed: dict[str, Any]) -> str: - direction = str(parsed.get("orbit_direction") or "").strip() - elevation = str(parsed.get("orbit_elevation_label") or "").strip() - distance = str(parsed.get("orbit_distance_label") or "").strip() - custom = str(parsed.get("custom_camera_prompt") or "").strip() - if not any((direction, elevation, distance)) and custom: - return custom - parts = [part for part in (direction, elevation, distance) if part and part != "auto"] - if parts: - return ", ".join(parts) - compact_parts = [ - CAMERA_COMPACT_LABELS.get(str(parsed.get(key) or ""), str(parsed.get(key) or "").replace("_", " ")) - for key in ("shot_size", "angle", "distance") - ] - compact_parts = [part for part in compact_parts if part and part != "auto"] - return ", ".join(compact_parts) - - -def _camera_direction_from_text(text: Any) -> str: - source = str(text or "").lower() - for label in ( - "front-right quarter view", - "right side view", - "back-right quarter view", - "back view", - "back-left quarter view", - "left side view", - "front-left quarter view", - "front view", - ): - if label in source: - return label - return "" - - -def _camera_elevation_from_text(text: Any) -> str: - source = str(text or "").lower() - for label in ("low-angle shot", "eye-level shot", "elevated shot", "high-angle shot"): - if label in source: - return label - return "" - - -def _camera_distance_from_text(text: Any) -> str: - source = str(text or "").lower() - for label in ("wide shot", "full-body shot", "three-quarter body shot", "medium shot", "close-up", "extreme close-up"): - if label in source: - return label - return "" - - -def _coworking_location_profile(scene_text: Any) -> dict[str, str]: - text = str(scene_text or "").lower() - if "business cafe" in text or "work cafe" in text or "cafe" in text: - return { - "layout_label": "Business cafe camera layout", - "place": "business cafe coworking counter", - "foreground": "counter edge, laptop corner, and small plant", - "midground": "bar stools, warm desk lamps, and coffee-counter work spots", - "background": "plants, mirror strip, menu wall, and repeated cafe work tables", - } - if "corporate office" in text or "office after hours" in text or "copier" in text: - return { - "layout_label": "Office camera layout", - "place": "empty after-hours office", - "foreground": "copier alcove edge, chair backs, and nearest desk corner", - "midground": "repeating desks, glass partition seams, and muted monitor glow", - "background": "rows of empty workstations, city-light windows, and quiet office depth", - } - return { - "layout_label": "Coworking camera layout", - "place": "coworking lounge", - "foreground": "near desk edge, laptop corner, and chair back", - "midground": "warm work desks, laptop tables, and glass partition seams", - "background": "tall windows, repeated desk rows, plants, and soft shared-office depth", - } - - -def _coworking_subject_terms(subject_kind: str, pov_labels: list[str] | None = None) -> tuple[str, str]: - if pov_labels: - return "the visible partner", "them" - if subject_kind == "woman": - return "the woman", "her" - if subject_kind == "man": - return "the man", "him" - if subject_kind == "couple": - return "the couple", "them" - return "the subjects", "them" - - -def _coworking_direction_detail( - direction: str, - profile: dict[str, str], - pov_labels: list[str] | None = None, - subject_kind: str = "subjects", -) -> str: - direction = str(direction or "").strip().lower() - foreground = profile["foreground"] - midground = profile["midground"] - background = profile["background"] - subject, pronoun = _coworking_subject_terms(subject_kind, pov_labels) - if pov_labels: - if "right side" in direction: - return f"{subject} is in right-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges" - if "left side" in direction: - return f"{subject} is in left-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges" - if "back-right" in direction or "back-left" in direction: - return f"{subject} stays close in one continuous diagonal first-person body angle; {midground} lead toward {background} behind {pronoun} at the edges, not in the lower foreground" - if direction == "back view": - return f"the viewer looks past {subject}'s back toward {midground}, then into {background}; only POV body cues sit low in frame" - if "front-right" in direction or "front-left" in direction: - return f"{subject} fills the first-person front-quarter view; {midground} recede diagonally behind {pronoun} toward {background}" - return f"{subject} faces the viewer in first-person view; {midground} and {background} stay behind {pronoun}, not between viewer and body" - if "right side" in direction or "left side" in direction: - return f"{subject} is held in side profile along the {foreground}; {midground} run laterally behind {pronoun}, with {background} still readable" - if "back-right" in direction or "back-left" in direction: - return f"{subject} is viewed from a rear-quarter angle, partly turning back toward camera; the {foreground} stays low in frame while {midground} lead into {background}" - if direction == "back view": - return f"{subject} is seen from behind with the {foreground} at camera side, facing into {midground} and {background}" - if "front-right" in direction or "front-left" in direction: - return f"{subject} is placed beside the {foreground}; {midground} recede diagonally behind {pronoun} toward {background}" - return f"{subject} faces camera beside the {foreground}; {midground} sit between {pronoun} and {background}" - - -def _coworking_distance_detail(distance: str, profile: dict[str, str], subject_kind: str, pov_labels: list[str] | None = None) -> str: - distance = str(distance or "").strip().lower() - subject, _pronoun = _coworking_subject_terms(subject_kind, pov_labels) - if pov_labels: - if "wide" in distance or "full-body" in distance or "full body" in distance: - return f"wide POV keeps {subject} readable with coworking context behind them" - if "close" in distance: - return f"close POV keeps {subject} dominant with coworking context only at the sides or background" - return f"medium POV keeps {subject} dominant with room context behind them" - if "wide" in distance or "full-body" in distance or "full body" in distance: - return "wide crop keeps floor aisle, table rows, and window depth readable" - if "close" in distance: - return "close crop keeps one desk or counter anchor visible" - return f"medium crop keeps {subject} dominant" - - -def _coworking_elevation_detail(elevation: str, profile: dict[str, str], subject_kind: str, pov_labels: list[str] | None = None) -> str: - elevation = str(elevation or "").strip().lower() - subject, pronoun = _coworking_subject_terms(subject_kind, pov_labels) - if pov_labels: - if "low-angle" in elevation: - return f"low angle keeps POV body cues low while windows and partition lines rise behind {pronoun}" - if "elevated" in elevation: - return f"elevated POV keeps the viewer's eye line slightly higher than {subject}, with tabletop and glass lines only behind or at the side edges" - if "high-angle" in elevation: - return f"high angle looks down from the viewer's position with desks and aisle only in the background" - return f"eye-level angle keeps tabletop lines and glass seams behind {pronoun}" - if "low-angle" in elevation: - return f"low angle keeps the foreground desk edge low while windows and partitions rise behind {pronoun}" - if "elevated" in elevation: - return f"elevated angle shows tabletop surfaces, laptop shapes, chairs, and walking aisle around {pronoun}" - if "high-angle" in elevation: - return f"high angle shows the desk grid, chairs, floor aisle, and placement of {pronoun}" - return f"eye-level angle keeps tabletop lines and glass seams straight" - - -def _coworking_camera_scene_directive( - scene_text: Any, - parsed: dict[str, Any], - pov_labels: list[str] | None = None, - subject_kind: str = "subjects", -) -> str: - if not _is_coworking_scene(scene_text): - return "" - direction = str(parsed.get("orbit_direction") or "").strip() - elevation = str(parsed.get("orbit_elevation_label") or "").strip() - distance = str(parsed.get("orbit_distance_label") or "").strip() - custom_prompt = str(parsed.get("custom_camera_prompt") or "").strip() - direction = direction or _camera_direction_from_text(custom_prompt) - elevation = elevation or _camera_elevation_from_text(custom_prompt) - distance = distance or _camera_distance_from_text(custom_prompt) - if not any((direction, elevation, distance, custom_prompt)): - return "" - profile = _coworking_location_profile(scene_text) - direction_detail = _coworking_direction_detail(direction, profile, pov_labels, subject_kind) - distance_detail = _coworking_distance_detail(distance, profile, subject_kind, pov_labels) - elevation_detail = _coworking_elevation_detail(elevation, profile, subject_kind, pov_labels) - if pov_labels: - return ( - f"{profile['layout_label']} from POV: {direction_detail}. " - f"{distance_detail}; {elevation_detail}; use the multiangle camera only as first-person spatial geometry." - ) - geometry = _camera_geometry_phrase(parsed) - geometry_clause = f" ({geometry})" if geometry else "" - return ( - f"{profile['layout_label']}{geometry_clause}: {direction_detail}; " - f"{distance_detail}; {elevation_detail}." - ) - - def _coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str: - text = str(composition or "").strip() - if not text or not _is_coworking_scene(scene_text): - return text - lower = text.lower() - if not any(term in lower for term in ("office-lobby", "office lobby", "walking composition", "outfit-check")): - return text - subject, _pronoun = _coworking_subject_terms(subject_kind) - if subject_kind == "woman": - return "coworking lounge selfie frame with the woman near a desk edge and tall-window depth behind her" - if subject_kind == "man": - return "coworking lounge portrait frame with the man near a desk edge and tall-window depth behind him" - return f"coworking lounge frame with {subject} near a desk edge and tall-window depth behind them" + return scene_camera_adapters.coworking_composition_prompt(scene_text, composition, subject_kind) def _apply_coworking_composition(row: dict[str, Any], subject_kind: str) -> dict[str, Any]: @@ -3936,9 +3711,14 @@ def _camera_scene_directive_for_context( subject_kind: str = "subjects", ) -> tuple[str, dict[str, Any]]: parsed = _parse_camera_config(camera_config) - if parsed["camera_detail"] == "off" or parsed["camera_mode"] == "disabled": - return "", parsed - return _coworking_camera_scene_directive(scene_text, parsed, pov_labels, subject_kind), parsed + directive = scene_camera_adapters.camera_scene_directive_for_context( + scene_text, + parsed, + pov_labels, + subject_kind, + CAMERA_COMPACT_LABELS, + ) + return directive, parsed def _row_camera_subject_kind(row: dict[str, Any]) -> str: diff --git a/scene_camera_adapters.py b/scene_camera_adapters.py new file mode 100644 index 0000000..22f368b --- /dev/null +++ b/scene_camera_adapters.py @@ -0,0 +1,286 @@ +from __future__ import annotations + +from typing import Any, Mapping + + +CAMERA_DIRECTIONS = ( + "front-right quarter view", + "right side view", + "back-right quarter view", + "back view", + "back-left quarter view", + "left side view", + "front-left quarter view", + "front view", +) + +CAMERA_ELEVATIONS = ("low-angle shot", "eye-level shot", "elevated shot", "high-angle shot") +CAMERA_DISTANCES = ( + "wide shot", + "full-body shot", + "three-quarter body shot", + "medium shot", + "close-up", + "extreme close-up", +) + + +def is_coworking_scene(scene_text: Any) -> bool: + text = str(scene_text or "").lower() + return any( + term in text + for term in ( + "coworking", + "cowork", + "office lounge", + "business cafe", + "work cafe", + "shared office", + "corporate office", + "office after hours", + "laptops", + "warm desks", + "repeating desks", + "glass partitions", + "copier alcove", + ) + ) + + +def _compact_label(value: Any, compact_labels: Mapping[str, str] | None = None) -> str: + text = str(value or "") + if compact_labels and text in compact_labels: + return compact_labels[text] + return text.replace("_", " ") + + +def camera_geometry_phrase(parsed: dict[str, Any], compact_labels: Mapping[str, str] | None = None) -> str: + direction = str(parsed.get("orbit_direction") or "").strip() + elevation = str(parsed.get("orbit_elevation_label") or "").strip() + distance = str(parsed.get("orbit_distance_label") or "").strip() + custom = str(parsed.get("custom_camera_prompt") or "").strip() + if not any((direction, elevation, distance)) and custom: + return custom + parts = [part for part in (direction, elevation, distance) if part and part != "auto"] + if parts: + return ", ".join(parts) + compact_parts = [ + _compact_label(parsed.get(key), compact_labels) + for key in ("shot_size", "angle", "distance") + ] + compact_parts = [part for part in compact_parts if part and part != "auto"] + return ", ".join(compact_parts) + + +def camera_direction_from_text(text: Any) -> str: + source = str(text or "").lower() + for label in CAMERA_DIRECTIONS: + if label in source: + return label + return "" + + +def camera_elevation_from_text(text: Any) -> str: + source = str(text or "").lower() + for label in CAMERA_ELEVATIONS: + if label in source: + return label + return "" + + +def camera_distance_from_text(text: Any) -> str: + source = str(text or "").lower() + for label in CAMERA_DISTANCES: + if label in source: + return label + return "" + + +def coworking_location_profile(scene_text: Any) -> dict[str, str]: + text = str(scene_text or "").lower() + if "business cafe" in text or "work cafe" in text or "cafe" in text: + return { + "layout_label": "Business cafe camera layout", + "place": "business cafe coworking counter", + "foreground": "counter edge, laptop corner, and small plant", + "midground": "bar stools, warm desk lamps, and coffee-counter work spots", + "background": "plants, mirror strip, menu wall, and repeated cafe work tables", + } + if "corporate office" in text or "office after hours" in text or "copier" in text: + return { + "layout_label": "Office camera layout", + "place": "empty after-hours office", + "foreground": "copier alcove edge, chair backs, and nearest desk corner", + "midground": "repeating desks, glass partition seams, and muted monitor glow", + "background": "rows of empty workstations, city-light windows, and quiet office depth", + } + return { + "layout_label": "Coworking camera layout", + "place": "coworking lounge", + "foreground": "near desk edge, laptop corner, and chair back", + "midground": "warm work desks, laptop tables, and glass partition seams", + "background": "tall windows, repeated desk rows, plants, and soft shared-office depth", + } + + +def coworking_subject_terms(subject_kind: str, pov_labels: list[str] | None = None) -> tuple[str, str]: + if pov_labels: + return "the visible partner", "them" + if subject_kind == "woman": + return "the woman", "her" + if subject_kind == "man": + return "the man", "him" + if subject_kind == "couple": + return "the couple", "them" + return "the subjects", "them" + + +def coworking_direction_detail( + direction: str, + profile: dict[str, str], + pov_labels: list[str] | None = None, + subject_kind: str = "subjects", +) -> str: + direction = str(direction or "").strip().lower() + foreground = profile["foreground"] + midground = profile["midground"] + background = profile["background"] + subject, pronoun = coworking_subject_terms(subject_kind, pov_labels) + if pov_labels: + if "right side" in direction: + return f"{subject} is in right-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges" + if "left side" in direction: + return f"{subject} is in left-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges" + if "back-right" in direction or "back-left" in direction: + return f"{subject} stays close in one continuous diagonal first-person body angle; {midground} lead toward {background} behind {pronoun} at the edges, not in the lower foreground" + if direction == "back view": + return f"the viewer looks past {subject}'s back toward {midground}, then into {background}; only POV body cues sit low in frame" + if "front-right" in direction or "front-left" in direction: + return f"{subject} fills the first-person front-quarter view; {midground} recede diagonally behind {pronoun} toward {background}" + return f"{subject} faces the viewer in first-person view; {midground} and {background} stay behind {pronoun}, not between viewer and body" + if "right side" in direction or "left side" in direction: + return f"{subject} is held in side profile along the {foreground}; {midground} run laterally behind {pronoun}, with {background} still readable" + if "back-right" in direction or "back-left" in direction: + return f"{subject} is viewed from a rear-quarter angle, partly turning back toward camera; the {foreground} stays low in frame while {midground} lead into {background}" + if direction == "back view": + return f"{subject} is seen from behind with the {foreground} at camera side, facing into {midground} and {background}" + if "front-right" in direction or "front-left" in direction: + return f"{subject} is placed beside the {foreground}; {midground} recede diagonally behind {pronoun} toward {background}" + return f"{subject} faces camera beside the {foreground}; {midground} sit between {pronoun} and {background}" + + +def coworking_distance_detail( + distance: str, + profile: dict[str, str], + subject_kind: str, + pov_labels: list[str] | None = None, +) -> str: + distance = str(distance or "").strip().lower() + subject, _pronoun = coworking_subject_terms(subject_kind, pov_labels) + if pov_labels: + if "wide" in distance or "full-body" in distance or "full body" in distance: + return f"wide POV keeps {subject} readable with coworking context behind them" + if "close" in distance: + return f"close POV keeps {subject} dominant with coworking context only at the sides or background" + return f"medium POV keeps {subject} dominant with room context behind them" + if "wide" in distance or "full-body" in distance or "full body" in distance: + return "wide crop keeps floor aisle, table rows, and window depth readable" + if "close" in distance: + return "close crop keeps one desk or counter anchor visible" + return f"medium crop keeps {subject} dominant" + + +def coworking_elevation_detail( + elevation: str, + profile: dict[str, str], + subject_kind: str, + pov_labels: list[str] | None = None, +) -> str: + elevation = str(elevation or "").strip().lower() + subject, pronoun = coworking_subject_terms(subject_kind, pov_labels) + if pov_labels: + if "low-angle" in elevation: + return f"low angle keeps POV body cues low while windows and partition lines rise behind {pronoun}" + if "elevated" in elevation: + return f"elevated POV keeps the viewer's eye line slightly higher than {subject}, with tabletop and glass lines only behind or at the side edges" + if "high-angle" in elevation: + return f"high angle looks down from the viewer's position with desks and aisle only in the background" + return f"eye-level angle keeps tabletop lines and glass seams behind {pronoun}" + if "low-angle" in elevation: + return f"low angle keeps the foreground desk edge low while windows and partitions rise behind {pronoun}" + if "elevated" in elevation: + return f"elevated angle shows tabletop surfaces, laptop shapes, chairs, and walking aisle around {pronoun}" + if "high-angle" in elevation: + return f"high angle shows the desk grid, chairs, floor aisle, and placement of {pronoun}" + return f"eye-level angle keeps tabletop lines and glass seams straight" + + +def coworking_camera_scene_directive( + scene_text: Any, + parsed: dict[str, Any], + pov_labels: list[str] | None = None, + subject_kind: str = "subjects", + compact_labels: Mapping[str, str] | None = None, +) -> str: + if not is_coworking_scene(scene_text): + return "" + direction = str(parsed.get("orbit_direction") or "").strip() + elevation = str(parsed.get("orbit_elevation_label") or "").strip() + distance = str(parsed.get("orbit_distance_label") or "").strip() + custom_prompt = str(parsed.get("custom_camera_prompt") or "").strip() + direction = direction or camera_direction_from_text(custom_prompt) + elevation = elevation or camera_elevation_from_text(custom_prompt) + distance = distance or camera_distance_from_text(custom_prompt) + if not any((direction, elevation, distance, custom_prompt)): + return "" + profile = coworking_location_profile(scene_text) + direction_detail = coworking_direction_detail(direction, profile, pov_labels, subject_kind) + distance_detail = coworking_distance_detail(distance, profile, subject_kind, pov_labels) + elevation_detail = coworking_elevation_detail(elevation, profile, subject_kind, pov_labels) + if pov_labels: + return ( + f"{profile['layout_label']} from POV: {direction_detail}. " + f"{distance_detail}; {elevation_detail}; use the multiangle camera only as first-person spatial geometry." + ) + geometry = camera_geometry_phrase(parsed, compact_labels) + geometry_clause = f" ({geometry})" if geometry else "" + return ( + f"{profile['layout_label']}{geometry_clause}: {direction_detail}; " + f"{distance_detail}; {elevation_detail}." + ) + + +def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str: + text = str(composition or "").strip() + if not text or not is_coworking_scene(scene_text): + return text + lower = text.lower() + if not any(term in lower for term in ("office-lobby", "office lobby", "walking composition", "outfit-check")): + return text + subject, _pronoun = coworking_subject_terms(subject_kind) + if subject_kind == "woman": + return "coworking lounge selfie frame with the woman near a desk edge and tall-window depth behind her" + if subject_kind == "man": + return "coworking lounge portrait frame with the man near a desk edge and tall-window depth behind him" + return f"coworking lounge frame with {subject} near a desk edge and tall-window depth behind them" + + +def camera_scene_directive_for_context( + scene_text: Any, + parsed_camera_config: dict[str, Any], + pov_labels: list[str] | None = None, + subject_kind: str = "subjects", + compact_labels: Mapping[str, str] | None = None, +) -> str: + if ( + parsed_camera_config.get("camera_detail") == "off" + or parsed_camera_config.get("camera_mode") == "disabled" + ): + return "" + return coworking_camera_scene_directive( + scene_text, + parsed_camera_config, + pov_labels, + subject_kind, + compact_labels, + )