diff --git a/README.md b/README.md index 475552d..e922d7e 100644 --- a/README.md +++ b/README.md @@ -413,13 +413,13 @@ The translator accepts the Qwen labels such as `front-right quarter view`, as the native camera nodes. `suppress_phone_visibility` is enabled by default so generic Qwen camera views do not add `phone hidden` or other phone wording. -For coworking-style locations, the prompt builder also uses the translated -camera geometry to add a location-aware framing sentence. It currently targets -`coworking lounge`, `business cafe`, and empty office scenes: front/side/back -views, zoom, and elevation change which desks, windows, laptop tables, glass -partitions, counters, or office rows are kept visible. In male-POV setups this -becomes a first-person spatial description and the external camera sentence is -suppressed. +For camera-aware locations, the prompt builder also uses the translated camera +geometry to add a location-aware framing sentence. It currently has scene +profiles for coworking/business-office spaces and classical library/book-stack +spaces: front/side/back views, zoom, and elevation change which desks, windows, +partitions, bookshelves, reading tables, lamps, or aisles are kept visible. In +male-POV setups this becomes a first-person spatial description and the +external camera sentence is suppressed. `SxCP SDXL Formatter` rewrites prompt builder output or `metadata_json` into comma-tag SDXL/Pony-style prompts. Connect `metadata_json` when possible so diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index 316337f..6e1a9fe 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -648,12 +648,15 @@ Camera handling: Current camera-aware scene adapter: -- Coworking/business-cafe/office scenes are detected by `_is_coworking_scene`. -- Location profile comes from `_coworking_location_profile`. -- Direction, distance, and elevation details come from `_coworking_direction_detail`, - `_coworking_distance_detail`, and `_coworking_elevation_detail`. -- Composition cleanup for coworking outfit-check wording happens in - `_coworking_composition_prompt`. +- Scene profiles live in `scene_camera_adapters.SCENE_CAMERA_PROFILES`. +- Coworking/business-cafe/office scenes and classical library/book-stack scenes + are detected by `scene_camera_profile`. +- Direction, distance, and elevation details come from profile-aware helpers + such as `scene_direction_detail`, `scene_distance_detail`, and + `scene_elevation_detail`. +- Composition cleanup for mismatched outfit-check, mirror, bag, or shoes + wording happens in `contextual_composition_prompt`; compatibility wrappers + keep the old coworking function names available. Important POV rule: diff --git a/scene_camera_adapters.py b/scene_camera_adapters.py index 22f368b..5242744 100644 --- a/scene_camera_adapters.py +++ b/scene_camera_adapters.py @@ -25,26 +25,120 @@ CAMERA_DISTANCES = ( ) -def is_coworking_scene(scene_text: Any) -> bool: - text = str(scene_text or "").lower() - return any( - term in text - for term in ( +SCENE_CAMERA_PROFILES: tuple[dict[str, Any], ...] = ( + { + "key": "business_cafe", + "family": "coworking", + "terms": ("business cafe", "work cafe", "cafe"), + "layout_label": "Business cafe camera layout", + "place": "business cafe coworking counter", + "foreground": "counter edge, laptop corner, and small plant", + "midground": "bar stools, warm desk lamps, and coffee-counter work spots", + "background": "plants, mirror strip, menu wall, and repeated cafe work tables", + "detail_label": "cafe details", + "composition": { + "woman": "business-cafe selfie frame with the woman near a counter edge and warm work-table depth behind her", + "man": "business-cafe portrait frame with the man near a counter edge and warm work-table depth behind him", + "default": "business-cafe frame with the subjects near a counter edge and warm work-table depth behind them", + }, + }, + { + "key": "office_after_hours", + "family": "coworking", + "terms": ("corporate office", "office after hours", "copier", "office lounge"), + "layout_label": "Office camera layout", + "place": "empty after-hours office", + "foreground": "copier alcove edge, chair backs, and nearest desk corner", + "midground": "repeating desks, glass partition seams, and muted monitor glow", + "background": "rows of empty workstations, city-light windows, and quiet office depth", + "detail_label": "office details", + "composition": { + "woman": "after-hours office frame with the woman near a desk edge and glass-partition depth behind her", + "man": "after-hours office frame with the man near a desk edge and glass-partition depth behind him", + "default": "after-hours office frame with the subjects near a desk edge and glass-partition depth behind them", + }, + }, + { + "key": "coworking_lounge", + "family": "coworking", + "terms": ( "coworking", "cowork", - "office lounge", - "business cafe", - "work cafe", "shared office", - "corporate office", - "office after hours", "laptops", "warm desks", "repeating desks", "glass partitions", - "copier alcove", - ) - ) + ), + "layout_label": "Coworking camera layout", + "place": "coworking lounge", + "foreground": "near desk edge, laptop corner, and chair back", + "midground": "warm work desks, laptop tables, and glass partition seams", + "background": "tall windows, repeated desk rows, plants, and soft shared-office depth", + "detail_label": "coworking details", + "composition": { + "woman": "coworking lounge selfie frame with the woman near a desk edge and tall-window depth behind her", + "man": "coworking lounge portrait frame with the man near a desk edge and tall-window depth behind him", + "default": "coworking lounge frame with the subjects near a desk edge and tall-window depth behind them", + }, + }, + { + "key": "classical_library", + "family": "library", + "terms": ( + "classical library", + "library stacks", + "large library", + "grand library", + "reading room", + "bookshelves", + "book shelves", + "book stacks", + "rare-books", + "rare books", + "rolling ladders", + ), + "layout_label": "Library camera layout", + "place": "classical library", + "foreground": "near bookshelf edge, reading-table corner, and brass lamp", + "midground": "towering bookshelves, rolling ladders, carved columns, and marble floor lines", + "background": "arched windows, repeated book aisles, warm brass lamps, and deep quiet library depth", + "detail_label": "library details", + "composition": { + "woman": "classical library frame with the woman near a bookshelf edge and long shelf depth behind her", + "man": "classical library frame with the man near a bookshelf edge and long shelf depth behind him", + "default": "classical library frame with the subjects near a bookshelf edge and long shelf depth behind them", + }, + }, +) + +MISMATCHED_COMPOSITION_TERMS = ( + "outfit-check", + "outfit check", + "mirror view", + "mirror pose", + "bag", + "shoes", + "footwear", +) + + +def scene_camera_profile(scene_text: Any) -> dict[str, Any]: + text = str(scene_text or "").lower() + if not text: + return {} + for profile in SCENE_CAMERA_PROFILES: + if any(term in text for term in profile["terms"]): + return dict(profile) + return {} + + +def is_coworking_scene(scene_text: Any) -> bool: + return scene_camera_profile(scene_text).get("family") == "coworking" + + +def is_scene_camera_aware(scene_text: Any) -> bool: + return bool(scene_camera_profile(scene_text)) def _compact_label(value: Any, compact_labels: Mapping[str, str] | None = None) -> str: @@ -97,30 +191,10 @@ def camera_distance_from_text(text: Any) -> str: def coworking_location_profile(scene_text: Any) -> dict[str, str]: - text = str(scene_text or "").lower() - if "business cafe" in text or "work cafe" in text or "cafe" in text: - return { - "layout_label": "Business cafe camera layout", - "place": "business cafe coworking counter", - "foreground": "counter edge, laptop corner, and small plant", - "midground": "bar stools, warm desk lamps, and coffee-counter work spots", - "background": "plants, mirror strip, menu wall, and repeated cafe work tables", - } - if "corporate office" in text or "office after hours" in text or "copier" in text: - return { - "layout_label": "Office camera layout", - "place": "empty after-hours office", - "foreground": "copier alcove edge, chair backs, and nearest desk corner", - "midground": "repeating desks, glass partition seams, and muted monitor glow", - "background": "rows of empty workstations, city-light windows, and quiet office depth", - } - return { - "layout_label": "Coworking camera layout", - "place": "coworking lounge", - "foreground": "near desk edge, laptop corner, and chair back", - "midground": "warm work desks, laptop tables, and glass partition seams", - "background": "tall windows, repeated desk rows, plants, and soft shared-office depth", - } + profile = scene_camera_profile(scene_text) + if profile.get("family") == "coworking": + return profile + return scene_camera_profile("coworking lounge") def coworking_subject_terms(subject_kind: str, pov_labels: list[str] | None = None) -> tuple[str, str]: @@ -135,7 +209,7 @@ def coworking_subject_terms(subject_kind: str, pov_labels: list[str] | None = No return "the subjects", "them" -def coworking_direction_detail( +def scene_direction_detail( direction: str, profile: dict[str, str], pov_labels: list[str] | None = None, @@ -145,12 +219,13 @@ def coworking_direction_detail( foreground = profile["foreground"] midground = profile["midground"] background = profile["background"] + detail_label = profile.get("detail_label") or "location details" subject, pronoun = coworking_subject_terms(subject_kind, pov_labels) if pov_labels: if "right side" in direction: - return f"{subject} is in right-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges" + return f"{subject} is in right-side profile; {midground} run behind {pronoun} toward {background}, with {detail_label} kept at the frame edges" if "left side" in direction: - return f"{subject} is in left-side profile; {midground} run behind {pronoun} toward {background}, with coworking details kept at the frame edges" + return f"{subject} is in left-side profile; {midground} run behind {pronoun} toward {background}, with {detail_label} kept at the frame edges" if "back-right" in direction or "back-left" in direction: return f"{subject} stays close in one continuous diagonal first-person body angle; {midground} lead toward {background} behind {pronoun} at the edges, not in the lower foreground" if direction == "back view": @@ -169,7 +244,16 @@ def coworking_direction_detail( return f"{subject} faces camera beside the {foreground}; {midground} sit between {pronoun} and {background}" -def coworking_distance_detail( +def coworking_direction_detail( + direction: str, + profile: dict[str, str], + pov_labels: list[str] | None = None, + subject_kind: str = "subjects", +) -> str: + return scene_direction_detail(direction, profile, pov_labels, subject_kind) + + +def scene_distance_detail( distance: str, profile: dict[str, str], subject_kind: str, @@ -179,18 +263,27 @@ def coworking_distance_detail( subject, _pronoun = coworking_subject_terms(subject_kind, pov_labels) if pov_labels: if "wide" in distance or "full-body" in distance or "full body" in distance: - return f"wide POV keeps {subject} readable with coworking context behind them" + return f"wide POV keeps {subject} readable with {profile['place']} context behind them" if "close" in distance: - return f"close POV keeps {subject} dominant with coworking context only at the sides or background" + return f"close POV keeps {subject} dominant with {profile['place']} context only at the sides or background" return f"medium POV keeps {subject} dominant with room context behind them" if "wide" in distance or "full-body" in distance or "full body" in distance: - return "wide crop keeps floor aisle, table rows, and window depth readable" + return f"wide crop keeps the {profile['foreground']}, {profile['midground']}, and {profile['background']} readable" if "close" in distance: - return "close crop keeps one desk or counter anchor visible" + return f"close crop keeps one anchor from the {profile['foreground']} visible" return f"medium crop keeps {subject} dominant" -def coworking_elevation_detail( +def coworking_distance_detail( + distance: str, + profile: dict[str, str], + subject_kind: str, + pov_labels: list[str] | None = None, +) -> str: + return scene_distance_detail(distance, profile, subject_kind, pov_labels) + + +def scene_elevation_detail( elevation: str, profile: dict[str, str], subject_kind: str, @@ -200,29 +293,39 @@ def coworking_elevation_detail( subject, pronoun = coworking_subject_terms(subject_kind, pov_labels) if pov_labels: if "low-angle" in elevation: - return f"low angle keeps POV body cues low while windows and partition lines rise behind {pronoun}" + return f"low angle keeps POV body cues low while the {profile['background']} rises behind {pronoun}" if "elevated" in elevation: - return f"elevated POV keeps the viewer's eye line slightly higher than {subject}, with tabletop and glass lines only behind or at the side edges" + return f"elevated POV keeps the viewer's eye line slightly higher than {subject}, with {profile['foreground']} only behind or at the side edges" if "high-angle" in elevation: - return f"high angle looks down from the viewer's position with desks and aisle only in the background" - return f"eye-level angle keeps tabletop lines and glass seams behind {pronoun}" + return f"high angle looks down from the viewer's position with {profile['midground']} only in the background" + return f"eye-level angle keeps {profile['midground']} behind {pronoun}" if "low-angle" in elevation: - return f"low angle keeps the foreground desk edge low while windows and partitions rise behind {pronoun}" + return f"low angle keeps the {profile['foreground']} low while {profile['background']} rises behind {pronoun}" if "elevated" in elevation: - return f"elevated angle shows tabletop surfaces, laptop shapes, chairs, and walking aisle around {pronoun}" + return f"elevated angle shows the {profile['foreground']} and {profile['midground']} around {pronoun}" if "high-angle" in elevation: - return f"high angle shows the desk grid, chairs, floor aisle, and placement of {pronoun}" - return f"eye-level angle keeps tabletop lines and glass seams straight" + return f"high angle shows the {profile['place']} layout and placement of {pronoun}" + return f"eye-level angle keeps {profile['midground']} visually stable" -def coworking_camera_scene_directive( +def coworking_elevation_detail( + elevation: str, + profile: dict[str, str], + subject_kind: str, + pov_labels: list[str] | None = None, +) -> str: + return scene_elevation_detail(elevation, profile, subject_kind, pov_labels) + + +def scene_camera_directive( scene_text: Any, parsed: dict[str, Any], pov_labels: list[str] | None = None, subject_kind: str = "subjects", compact_labels: Mapping[str, str] | None = None, ) -> str: - if not is_coworking_scene(scene_text): + profile = scene_camera_profile(scene_text) + if not profile: return "" direction = str(parsed.get("orbit_direction") or "").strip() elevation = str(parsed.get("orbit_elevation_label") or "").strip() @@ -233,10 +336,9 @@ def coworking_camera_scene_directive( distance = distance or camera_distance_from_text(custom_prompt) if not any((direction, elevation, distance, custom_prompt)): return "" - profile = coworking_location_profile(scene_text) - direction_detail = coworking_direction_detail(direction, profile, pov_labels, subject_kind) - distance_detail = coworking_distance_detail(distance, profile, subject_kind, pov_labels) - elevation_detail = coworking_elevation_detail(elevation, profile, subject_kind, pov_labels) + direction_detail = scene_direction_detail(direction, profile, pov_labels, subject_kind) + distance_detail = scene_distance_detail(distance, profile, subject_kind, pov_labels) + elevation_detail = scene_elevation_detail(elevation, profile, subject_kind, pov_labels) if pov_labels: return ( f"{profile['layout_label']} from POV: {direction_detail}. " @@ -250,19 +352,56 @@ def coworking_camera_scene_directive( ) -def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str: +def coworking_camera_scene_directive( + scene_text: Any, + parsed: dict[str, Any], + pov_labels: list[str] | None = None, + subject_kind: str = "subjects", + compact_labels: Mapping[str, str] | None = None, +) -> str: + if not is_coworking_scene(scene_text): + return "" + return scene_camera_directive(scene_text, parsed, pov_labels, subject_kind, compact_labels) + + +def profile_composition_text(profile: dict[str, Any], subject_kind: str) -> str: + composition = profile.get("composition") if isinstance(profile.get("composition"), dict) else {} + if subject_kind == "woman" and composition.get("woman"): + return str(composition["woman"]) + if subject_kind == "man" and composition.get("man"): + return str(composition["man"]) + text = str(composition.get("default") or f"{profile['place']} frame with the subjects clearly placed in the room") + if subject_kind == "couple": + text = text.replace("the subjects", "the couple") + if "composition" not in text.lower(): + text = f"{text} composition" + return text + + +def contextual_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str: text = str(composition or "").strip() - if not text or not is_coworking_scene(scene_text): + if not text: + return text + profile = scene_camera_profile(scene_text) + if not profile: return text lower = text.lower() - if not any(term in lower for term in ("office-lobby", "office lobby", "walking composition", "outfit-check")): + profile_lower = " ".join( + str(profile.get(key, "")).lower() + for key in ("place", "foreground", "midground", "background") + ) + already_matches = any(term and term in lower for term in profile_lower.replace(",", " ").split()) + mismatched = any(term in lower for term in MISMATCHED_COMPOSITION_TERMS) + office_generic = any(term in lower for term in ("office-lobby", "office lobby", "walking composition", "outfit-check")) + if not mismatched and not office_generic and already_matches: return text - subject, _pronoun = coworking_subject_terms(subject_kind) - if subject_kind == "woman": - return "coworking lounge selfie frame with the woman near a desk edge and tall-window depth behind her" - if subject_kind == "man": - return "coworking lounge portrait frame with the man near a desk edge and tall-window depth behind him" - return f"coworking lounge frame with {subject} near a desk edge and tall-window depth behind them" + if not mismatched and not office_generic and profile.get("family") != "coworking": + return text + return profile_composition_text(profile, subject_kind) + + +def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str: + return contextual_composition_prompt(scene_text, composition, subject_kind) def camera_scene_directive_for_context( @@ -277,7 +416,7 @@ def camera_scene_directive_for_context( or parsed_camera_config.get("camera_mode") == "disabled" ): return "" - return coworking_camera_scene_directive( + return scene_camera_directive( scene_text, parsed_camera_config, pov_labels, diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index 445c388..0dd3f68 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -548,6 +548,28 @@ def smoke_row_camera_policy() -> None: "coworking lounge frame with the couple near a desk edge" in updated.get("composition", ""), "row camera policy did not adapt coworking composition for couple rows", ) + library_row = { + "prompt": "A generated adult prompt. Composition: vertical polished mirror view with bag and shoes visible. Avoid: low quality.", + "caption": "sxcppnl7, generated adult prompt, polished mirror view with bag and shoes visible, illustration", + "scene_text": "grand classical library hall with towering dark-wood bookshelves, carved columns, rolling ladders, marble floor, and warm brass lamps", + "composition": "polished mirror view with bag and shoes visible", + "subject_type": "woman", + "women_count": 1, + "men_count": 0, + } + updated_library = row_camera.apply_camera_config( + library_row, + _orbit_camera(horizontal_angle=315, vertical_angle=0, zoom=5.0), + compact_labels=pb.CAMERA_COMPACT_LABELS, + ) + library_scene = _expect_text("row_camera_policy.library_scene", updated_library.get("camera_scene_directive"), 40) + library_composition = _expect_text("row_camera_policy.library_composition", updated_library.get("composition"), 20) + _expect("Library camera layout" in library_scene, "row camera policy missed library layout") + _expect("front-left quarter view" in library_scene, "row camera library layout missed orbit direction") + _expect("bookshelf" in library_scene.lower() or "bookshelves" in library_scene.lower(), "row camera library layout missed shelf anchors") + _expect("bag" not in library_composition.lower(), "row camera library composition leaked bag wording") + _expect("shoes" not in library_composition.lower(), "row camera library composition leaked shoes wording") + _expect("library" in library_composition.lower(), "row camera library composition did not become location-aware") def smoke_config_route_location_theme() -> None: @@ -588,8 +610,12 @@ def smoke_config_route_location_theme() -> None: scene = _expect_text("config_route_location_theme.scene_text", row.get("scene_text"), 20) composition = _expect_text("config_route_location_theme.composition", row.get("composition"), 10) camera = _expect_text("config_route_location_theme.camera_directive", row.get("camera_directive"), 20) + scene_directive = _expect_text("config_route_location_theme.camera_scene_directive", row.get("camera_scene_directive"), 40) _expect("library" in scene.lower() or "bookshelves" in scene.lower(), "location theme did not drive scene") _expect("books" in composition.lower() or "shelf" in composition.lower() or "library" in composition.lower(), "location theme did not drive composition") + _expect("Library camera layout" in scene_directive, "location theme did not drive library camera-scene adapter") + _expect("front-left quarter view" in scene_directive, "library camera-scene adapter missed orbit direction") + _expect("bag" not in composition.lower() and "shoes" not in composition.lower(), "location theme composition leaked outfit-check props") _expect("315-degree front-left quarter view" in camera, "config route did not preserve orbit camera directive") seed_config = row.get("seed_config") if isinstance(row.get("seed_config"), dict) else {} _expect(seed_config.get("pose_seed") == 3302, "seed lock did not reroll pose axis") @@ -608,6 +634,7 @@ def smoke_config_route_location_theme() -> None: krea = krea_formatter.format_krea2_prompt("", metadata_json=_json(row), target="single") prompt = krea.get("krea_prompt") or "" _expect("library" in prompt.lower() or "bookshelves" in prompt.lower(), "Krea config route lost theme scene") + _expect("Library camera layout" in prompt, "Krea config route lost library camera-scene directive") _expect("315-degree front-left quarter view" in prompt, "Krea config route lost camera directive") _expect_formatter_outputs(row, "config_route_location_theme", target="single")