diff --git a/README.md b/README.md index 1c8dcab..694803d 100644 --- a/README.md +++ b/README.md @@ -424,6 +424,8 @@ Rows keep the selected `scene_entry`, `location_theme`, `scene_theme`, `composition_entry`, `composition_theme`, and `scene_camera_profile_key` in `metadata_json` so location/camera behavior can be debugged without guessing from prompt text alone. +When camera-aware profile routing runs, explicit `scene_camera_profile_key` and +theme metadata are used before fallback text matching. `SxCP SDXL Formatter` rewrites prompt builder output or `metadata_json` into comma-tag SDXL/Pony-style prompts. Connect `metadata_json` when possible so diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index d6f1ca9..d4f7468 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -653,6 +653,9 @@ Camera handling: Current camera-aware scene adapter: - Scene profiles live in `scene_camera_adapters.SCENE_CAMERA_PROFILES`. +- Profile resolution is metadata-first: explicit `scene_camera_profile_key`, + selected `scene_entry` profile keys, and theme metadata are preferred before + text matching. - Coworking/business-cafe/office scenes and classical library/book-stack scenes are detected by `scene_camera_profile`. - Location themes preserve `theme` on configs and selected scene entries, and diff --git a/row_camera.py b/row_camera.py index c3f0763..1512bdf 100644 --- a/row_camera.py +++ b/row_camera.py @@ -47,10 +47,29 @@ def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind return scene_camera_adapters.coworking_composition_prompt(scene_text, composition, subject_kind) +def row_scene_text(row: dict[str, Any]) -> Any: + return row.get("scene_text") or row.get("source_scene_text") or row.get("scene") + + +def row_scene_theme(row: dict[str, Any]) -> str: + return str(row.get("scene_theme") or row.get("location_theme") or "") + + +def row_scene_profile_key(row: dict[str, Any]) -> str: + return str(row.get("scene_camera_profile_key") or "") + + def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict[str, Any]: - scene_text = row.get("scene_text") or row.get("source_scene_text") or row.get("scene") + scene_text = row_scene_text(row) old_composition = str(row.get("composition") or "").strip() - new_composition = coworking_composition_prompt(scene_text, old_composition, subject_kind) + new_composition = scene_camera_adapters.contextual_composition_prompt( + scene_text, + old_composition, + subject_kind, + scene_entry=row.get("scene_entry"), + theme=row_scene_theme(row), + profile_key=row_scene_profile_key(row), + ) if not old_composition or new_composition == old_composition: return row row["source_composition"] = row.get("source_composition") or old_composition @@ -70,8 +89,19 @@ def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict return row -def scene_camera_profile_metadata(scene_text: Any) -> dict[str, str]: - profile = scene_camera_adapters.scene_camera_profile(scene_text) +def scene_camera_profile_metadata( + scene_text: Any = "", + *, + scene_entry: Any = None, + theme: Any = "", + profile_key: Any = "", +) -> dict[str, str]: + profile = scene_camera_adapters.scene_camera_profile( + scene_text, + scene_entry=scene_entry, + theme=theme, + profile_key=profile_key, + ) if not profile: return {} return { @@ -89,6 +119,10 @@ def camera_scene_directive_for_context( pov_labels: list[str] | None = None, subject_kind: str = "subjects", compact_labels: Mapping[str, str] | None = None, + *, + scene_entry: Any = None, + theme: Any = "", + profile_key: Any = "", ) -> tuple[str, dict[str, Any]]: parsed = camera_policy.parse_camera_config(camera_config) directive = scene_camera_adapters.camera_scene_directive_for_context( @@ -97,6 +131,9 @@ def camera_scene_directive_for_context( pov_labels, subject_kind, compact_labels, + scene_entry=scene_entry, + theme=theme, + profile_key=profile_key, ) return directive, parsed @@ -141,17 +178,25 @@ def apply_camera_config( pov_labels = row_pov_labels(row, pov_label_resolver) subject_kind = row_camera_subject_kind(row) row = apply_contextual_composition(row, subject_kind) - profile_metadata = scene_camera_profile_metadata(row.get("scene_text") or row.get("source_scene_text") or row.get("scene")) + profile_metadata = scene_camera_profile_metadata( + row_scene_text(row), + scene_entry=row.get("scene_entry"), + theme=row_scene_theme(row), + profile_key=row_scene_profile_key(row), + ) if profile_metadata: row["scene_camera_profile"] = profile_metadata row["scene_camera_profile_key"] = profile_metadata.get("key", "") scene_directive, parsed = camera_scene_directive_for_context( - row.get("scene_text") or row.get("source_scene_text") or row.get("scene"), + row_scene_text(row), row.get("composition") or row.get("source_composition"), parsed, pov_labels, subject_kind, compact_labels, + scene_entry=row.get("scene_entry"), + theme=row_scene_theme(row), + profile_key=row_scene_profile_key(row), ) row["camera_config"] = parsed row["camera_scene_directive"] = scene_directive diff --git a/scene_camera_adapters.py b/scene_camera_adapters.py index 5242744..15ffc78 100644 --- a/scene_camera_adapters.py +++ b/scene_camera_adapters.py @@ -112,6 +112,12 @@ SCENE_CAMERA_PROFILES: tuple[dict[str, Any], ...] = ( }, ) +SCENE_CAMERA_PROFILE_KEYS = {str(profile["key"]): dict(profile) for profile in SCENE_CAMERA_PROFILES} + +THEME_PROFILE_KEYS = { + "classical_library": "classical_library", +} + MISMATCHED_COMPOSITION_TERMS = ( "outfit-check", "outfit check", @@ -123,8 +129,63 @@ MISMATCHED_COMPOSITION_TERMS = ( ) -def scene_camera_profile(scene_text: Any) -> dict[str, Any]: - text = str(scene_text or "").lower() +def _profile_by_key(value: Any) -> dict[str, Any]: + key = str(value or "").strip() + if not key: + return {} + if key in SCENE_CAMERA_PROFILE_KEYS: + return dict(SCENE_CAMERA_PROFILE_KEYS[key]) + mapped_key = THEME_PROFILE_KEYS.get(key) + if mapped_key and mapped_key in SCENE_CAMERA_PROFILE_KEYS: + return dict(SCENE_CAMERA_PROFILE_KEYS[mapped_key]) + return {} + + +def _scene_entry_text(scene_entry: Any) -> str: + if not isinstance(scene_entry, dict): + return "" + return str( + scene_entry.get("prompt") + or scene_entry.get("description") + or scene_entry.get("text") + or scene_entry.get("name") + or "" + ).strip() + + +def _scene_entry_profile_key(scene_entry: Any) -> str: + if not isinstance(scene_entry, dict): + return "" + return str( + scene_entry.get("scene_camera_profile_key") + or scene_entry.get("camera_profile_key") + or scene_entry.get("camera_profile") + or scene_entry.get("profile") + or "" + ).strip() + + +def scene_camera_profile( + scene_text: Any = "", + *, + scene_entry: Any = None, + theme: Any = "", + profile_key: Any = "", +) -> dict[str, Any]: + explicit_profile = _profile_by_key(profile_key) + if explicit_profile: + return explicit_profile + entry_profile = _profile_by_key(_scene_entry_profile_key(scene_entry)) + if entry_profile: + return entry_profile + theme_profile = _profile_by_key(theme) + if theme_profile: + return theme_profile + if isinstance(scene_entry, dict): + entry_theme_profile = _profile_by_key(scene_entry.get("theme")) + if entry_theme_profile: + return entry_theme_profile + text = " ".join(part for part in (str(scene_text or ""), _scene_entry_text(scene_entry)) if part).lower() if not text: return {} for profile in SCENE_CAMERA_PROFILES: @@ -323,8 +384,12 @@ def scene_camera_directive( pov_labels: list[str] | None = None, subject_kind: str = "subjects", compact_labels: Mapping[str, str] | None = None, + *, + scene_entry: Any = None, + theme: Any = "", + profile_key: Any = "", ) -> str: - profile = scene_camera_profile(scene_text) + profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key) if not profile: return "" direction = str(parsed.get("orbit_direction") or "").strip() @@ -378,11 +443,19 @@ def profile_composition_text(profile: dict[str, Any], subject_kind: str) -> str: return text -def contextual_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str: +def contextual_composition_prompt( + scene_text: Any, + composition: Any, + subject_kind: str = "subjects", + *, + scene_entry: Any = None, + theme: Any = "", + profile_key: Any = "", +) -> str: text = str(composition or "").strip() if not text: return text - profile = scene_camera_profile(scene_text) + profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key) if not profile: return text lower = text.lower() @@ -410,6 +483,10 @@ def camera_scene_directive_for_context( pov_labels: list[str] | None = None, subject_kind: str = "subjects", compact_labels: Mapping[str, str] | None = None, + *, + scene_entry: Any = None, + theme: Any = "", + profile_key: Any = "", ) -> str: if ( parsed_camera_config.get("camera_detail") == "off" @@ -422,4 +499,7 @@ def camera_scene_directive_for_context( pov_labels, subject_kind, compact_labels, + scene_entry=scene_entry, + theme=theme, + profile_key=profile_key, ) diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index cf16c69..311aa52 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -570,6 +570,59 @@ def smoke_row_camera_policy() -> None: _expect("bag" not in library_composition.lower(), "row camera library composition leaked bag wording") _expect("shoes" not in library_composition.lower(), "row camera library composition leaked shoes wording") _expect("library" in library_composition.lower(), "row camera library composition did not become location-aware") + metadata_profile_row = { + "prompt": "A generated adult prompt. Composition: vertical polished mirror view with bag and shoes visible. Avoid: low quality.", + "caption": "sxcppnl7, generated adult prompt, polished mirror view with bag and shoes visible, illustration", + "scene_text": "private themed room with neutral walls and warm lamps", + "scene_entry": { + "slug": "library_by_metadata", + "prompt": "private themed room with neutral walls and warm lamps", + "theme": "classical_library", + }, + "scene_theme": "classical_library", + "composition": "polished mirror view with bag and shoes visible", + "subject_type": "woman", + "women_count": 1, + "men_count": 0, + } + updated_metadata_profile = row_camera.apply_camera_config( + metadata_profile_row, + _orbit_camera(horizontal_angle=315, vertical_angle=0, zoom=5.0), + compact_labels=pb.CAMERA_COMPACT_LABELS, + ) + metadata_scene = _expect_text( + "row_camera_policy.metadata_scene", + updated_metadata_profile.get("camera_scene_directive"), + 40, + ) + _expect("Library camera layout" in metadata_scene, "row camera should prefer scene theme metadata over generic scene text") + _expect( + updated_metadata_profile.get("scene_camera_profile_key") == "classical_library", + "row camera should expose metadata-selected profile key", + ) + _expect( + "library" in str(updated_metadata_profile.get("composition", "")).lower(), + "row camera metadata-selected profile did not clean composition", + ) + explicit_profile_row = { + "prompt": "A generated adult prompt. Composition: vertical polished mirror view with bag and shoes visible. Avoid: low quality.", + "caption": "sxcppnl7, generated adult prompt, polished mirror view with bag and shoes visible, illustration", + "scene_text": "coworking lounge with tall windows, warm desks, and glass partitions", + "scene_camera_profile_key": "classical_library", + "composition": "polished mirror view with bag and shoes visible", + "subject_type": "woman", + "women_count": 1, + "men_count": 0, + } + updated_explicit_profile = row_camera.apply_camera_config( + explicit_profile_row, + _orbit_camera(horizontal_angle=315, vertical_angle=0, zoom=5.0), + compact_labels=pb.CAMERA_COMPACT_LABELS, + ) + _expect( + "Library camera layout" in str(updated_explicit_profile.get("camera_scene_directive", "")), + "explicit scene_camera_profile_key should override text-matched scene profile", + ) def smoke_config_route_location_theme() -> None: