Use metadata for scene camera profiles

2026-06-27 13:25:36 +02:00
parent 75a71a2df6
commit f811c02641
5 changed files with 194 additions and 11 deletions
@@ -424,6 +424,8 @@ Rows keep the selected `scene_entry`, `location_theme`, `scene_theme`,
 `composition_entry`, `composition_theme`, and `scene_camera_profile_key` in
 `metadata_json` so location/camera behavior can be debugged without guessing
 from prompt text alone.
+When camera-aware profile routing runs, explicit `scene_camera_profile_key` and
+theme metadata are used before fallback text matching.

 `SxCP SDXL Formatter` rewrites prompt builder output or `metadata_json` into
 comma-tag SDXL/Pony-style prompts. Connect `metadata_json` when possible so
@@ -653,6 +653,9 @@ Camera handling:
 Current camera-aware scene adapter:

 - Scene profiles live in `scene_camera_adapters.SCENE_CAMERA_PROFILES`.
+- Profile resolution is metadata-first: explicit `scene_camera_profile_key`,
+  selected `scene_entry` profile keys, and theme metadata are preferred before
+  text matching.
 - Coworking/business-cafe/office scenes and classical library/book-stack scenes
  are detected by `scene_camera_profile`.
 - Location themes preserve `theme` on configs and selected scene entries, and
@@ -47,10 +47,29 @@ def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind
    return scene_camera_adapters.coworking_composition_prompt(scene_text, composition, subject_kind)


+def row_scene_text(row: dict[str, Any]) -> Any:
+    return row.get("scene_text") or row.get("source_scene_text") or row.get("scene")
+
+
+def row_scene_theme(row: dict[str, Any]) -> str:
+    return str(row.get("scene_theme") or row.get("location_theme") or "")
+
+
+def row_scene_profile_key(row: dict[str, Any]) -> str:
+    return str(row.get("scene_camera_profile_key") or "")
+
+
 def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict[str, Any]:
-    scene_text = row.get("scene_text") or row.get("source_scene_text") or row.get("scene")
+    scene_text = row_scene_text(row)
    old_composition = str(row.get("composition") or "").strip()
-    new_composition = coworking_composition_prompt(scene_text, old_composition, subject_kind)
+    new_composition = scene_camera_adapters.contextual_composition_prompt(
+        scene_text,
+        old_composition,
+        subject_kind,
+        scene_entry=row.get("scene_entry"),
+        theme=row_scene_theme(row),
+        profile_key=row_scene_profile_key(row),
+    )
    if not old_composition or new_composition == old_composition:
        return row
    row["source_composition"] = row.get("source_composition") or old_composition
@@ -70,8 +89,19 @@ def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict
    return row


-def scene_camera_profile_metadata(scene_text: Any) -> dict[str, str]:
-    profile = scene_camera_adapters.scene_camera_profile(scene_text)
+def scene_camera_profile_metadata(
+    scene_text: Any = "",
+    *,
+    scene_entry: Any = None,
+    theme: Any = "",
+    profile_key: Any = "",
+) -> dict[str, str]:
+    profile = scene_camera_adapters.scene_camera_profile(
+        scene_text,
+        scene_entry=scene_entry,
+        theme=theme,
+        profile_key=profile_key,
+    )
    if not profile:
        return {}
    return {
@@ -89,6 +119,10 @@ def camera_scene_directive_for_context(
    pov_labels: list[str] | None = None,
    subject_kind: str = "subjects",
    compact_labels: Mapping[str, str] | None = None,
+    *,
+    scene_entry: Any = None,
+    theme: Any = "",
+    profile_key: Any = "",
 ) -> tuple[str, dict[str, Any]]:
    parsed = camera_policy.parse_camera_config(camera_config)
    directive = scene_camera_adapters.camera_scene_directive_for_context(
@@ -97,6 +131,9 @@ def camera_scene_directive_for_context(
        pov_labels,
        subject_kind,
        compact_labels,
+        scene_entry=scene_entry,
+        theme=theme,
+        profile_key=profile_key,
    )
    return directive, parsed

@@ -141,17 +178,25 @@ def apply_camera_config(
    pov_labels = row_pov_labels(row, pov_label_resolver)
    subject_kind = row_camera_subject_kind(row)
    row = apply_contextual_composition(row, subject_kind)
-    profile_metadata = scene_camera_profile_metadata(row.get("scene_text") or row.get("source_scene_text") or row.get("scene"))
+    profile_metadata = scene_camera_profile_metadata(
+        row_scene_text(row),
+        scene_entry=row.get("scene_entry"),
+        theme=row_scene_theme(row),
+        profile_key=row_scene_profile_key(row),
+    )
    if profile_metadata:
        row["scene_camera_profile"] = profile_metadata
        row["scene_camera_profile_key"] = profile_metadata.get("key", "")
    scene_directive, parsed = camera_scene_directive_for_context(
-        row.get("scene_text") or row.get("source_scene_text") or row.get("scene"),
+        row_scene_text(row),
        row.get("composition") or row.get("source_composition"),
        parsed,
        pov_labels,
        subject_kind,
        compact_labels,
+        scene_entry=row.get("scene_entry"),
+        theme=row_scene_theme(row),
+        profile_key=row_scene_profile_key(row),
    )
    row["camera_config"] = parsed
    row["camera_scene_directive"] = scene_directive
@@ -112,6 +112,12 @@ SCENE_CAMERA_PROFILES: tuple[dict[str, Any], ...] = (
    },
 )

+SCENE_CAMERA_PROFILE_KEYS = {str(profile["key"]): dict(profile) for profile in SCENE_CAMERA_PROFILES}
+
+THEME_PROFILE_KEYS = {
+    "classical_library": "classical_library",
+}
+
 MISMATCHED_COMPOSITION_TERMS = (
    "outfit-check",
    "outfit check",
@@ -123,8 +129,63 @@ MISMATCHED_COMPOSITION_TERMS = (
 )


-def scene_camera_profile(scene_text: Any) -> dict[str, Any]:
-    text = str(scene_text or "").lower()
+def _profile_by_key(value: Any) -> dict[str, Any]:
+    key = str(value or "").strip()
+    if not key:
+        return {}
+    if key in SCENE_CAMERA_PROFILE_KEYS:
+        return dict(SCENE_CAMERA_PROFILE_KEYS[key])
+    mapped_key = THEME_PROFILE_KEYS.get(key)
+    if mapped_key and mapped_key in SCENE_CAMERA_PROFILE_KEYS:
+        return dict(SCENE_CAMERA_PROFILE_KEYS[mapped_key])
+    return {}
+
+
+def _scene_entry_text(scene_entry: Any) -> str:
+    if not isinstance(scene_entry, dict):
+        return ""
+    return str(
+        scene_entry.get("prompt")
+        or scene_entry.get("description")
+        or scene_entry.get("text")
+        or scene_entry.get("name")
+        or ""
+    ).strip()
+
+
+def _scene_entry_profile_key(scene_entry: Any) -> str:
+    if not isinstance(scene_entry, dict):
+        return ""
+    return str(
+        scene_entry.get("scene_camera_profile_key")
+        or scene_entry.get("camera_profile_key")
+        or scene_entry.get("camera_profile")
+        or scene_entry.get("profile")
+        or ""
+    ).strip()
+
+
+def scene_camera_profile(
+    scene_text: Any = "",
+    *,
+    scene_entry: Any = None,
+    theme: Any = "",
+    profile_key: Any = "",
+) -> dict[str, Any]:
+    explicit_profile = _profile_by_key(profile_key)
+    if explicit_profile:
+        return explicit_profile
+    entry_profile = _profile_by_key(_scene_entry_profile_key(scene_entry))
+    if entry_profile:
+        return entry_profile
+    theme_profile = _profile_by_key(theme)
+    if theme_profile:
+        return theme_profile
+    if isinstance(scene_entry, dict):
+        entry_theme_profile = _profile_by_key(scene_entry.get("theme"))
+        if entry_theme_profile:
+            return entry_theme_profile
+    text = " ".join(part for part in (str(scene_text or ""), _scene_entry_text(scene_entry)) if part).lower()
    if not text:
        return {}
    for profile in SCENE_CAMERA_PROFILES:
@@ -323,8 +384,12 @@ def scene_camera_directive(
    pov_labels: list[str] | None = None,
    subject_kind: str = "subjects",
    compact_labels: Mapping[str, str] | None = None,
+    *,
+    scene_entry: Any = None,
+    theme: Any = "",
+    profile_key: Any = "",
 ) -> str:
-    profile = scene_camera_profile(scene_text)
+    profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key)
    if not profile:
        return ""
    direction = str(parsed.get("orbit_direction") or "").strip()
@@ -378,11 +443,19 @@ def profile_composition_text(profile: dict[str, Any], subject_kind: str) -> str:
    return text


-def contextual_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
+def contextual_composition_prompt(
+    scene_text: Any,
+    composition: Any,
+    subject_kind: str = "subjects",
+    *,
+    scene_entry: Any = None,
+    theme: Any = "",
+    profile_key: Any = "",
+) -> str:
    text = str(composition or "").strip()
    if not text:
        return text
-    profile = scene_camera_profile(scene_text)
+    profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key)
    if not profile:
        return text
    lower = text.lower()
@@ -410,6 +483,10 @@ def camera_scene_directive_for_context(
    pov_labels: list[str] | None = None,
    subject_kind: str = "subjects",
    compact_labels: Mapping[str, str] | None = None,
+    *,
+    scene_entry: Any = None,
+    theme: Any = "",
+    profile_key: Any = "",
 ) -> str:
    if (
        parsed_camera_config.get("camera_detail") == "off"
@@ -422,4 +499,7 @@ def camera_scene_directive_for_context(
        pov_labels,
        subject_kind,
        compact_labels,
+        scene_entry=scene_entry,
+        theme=theme,
+        profile_key=profile_key,
    )
@@ -570,6 +570,59 @@ def smoke_row_camera_policy() -> None:
    _expect("bag" not in library_composition.lower(), "row camera library composition leaked bag wording")
    _expect("shoes" not in library_composition.lower(), "row camera library composition leaked shoes wording")
    _expect("library" in library_composition.lower(), "row camera library composition did not become location-aware")
+    metadata_profile_row = {
+        "prompt": "A generated adult prompt. Composition: vertical polished mirror view with bag and shoes visible. Avoid: low quality.",
+        "caption": "sxcppnl7, generated adult prompt, polished mirror view with bag and shoes visible, illustration",
+        "scene_text": "private themed room with neutral walls and warm lamps",
+        "scene_entry": {
+            "slug": "library_by_metadata",
+            "prompt": "private themed room with neutral walls and warm lamps",
+            "theme": "classical_library",
+        },
+        "scene_theme": "classical_library",
+        "composition": "polished mirror view with bag and shoes visible",
+        "subject_type": "woman",
+        "women_count": 1,
+        "men_count": 0,
+    }
+    updated_metadata_profile = row_camera.apply_camera_config(
+        metadata_profile_row,
+        _orbit_camera(horizontal_angle=315, vertical_angle=0, zoom=5.0),
+        compact_labels=pb.CAMERA_COMPACT_LABELS,
+    )
+    metadata_scene = _expect_text(
+        "row_camera_policy.metadata_scene",
+        updated_metadata_profile.get("camera_scene_directive"),
+        40,
+    )
+    _expect("Library camera layout" in metadata_scene, "row camera should prefer scene theme metadata over generic scene text")
+    _expect(
+        updated_metadata_profile.get("scene_camera_profile_key") == "classical_library",
+        "row camera should expose metadata-selected profile key",
+    )
+    _expect(
+        "library" in str(updated_metadata_profile.get("composition", "")).lower(),
+        "row camera metadata-selected profile did not clean composition",
+    )
+    explicit_profile_row = {
+        "prompt": "A generated adult prompt. Composition: vertical polished mirror view with bag and shoes visible. Avoid: low quality.",
+        "caption": "sxcppnl7, generated adult prompt, polished mirror view with bag and shoes visible, illustration",
+        "scene_text": "coworking lounge with tall windows, warm desks, and glass partitions",
+        "scene_camera_profile_key": "classical_library",
+        "composition": "polished mirror view with bag and shoes visible",
+        "subject_type": "woman",
+        "women_count": 1,
+        "men_count": 0,
+    }
+    updated_explicit_profile = row_camera.apply_camera_config(
+        explicit_profile_row,
+        _orbit_camera(horizontal_angle=315, vertical_angle=0, zoom=5.0),
+        compact_labels=pb.CAMERA_COMPACT_LABELS,
+    )
+    _expect(
+        "Library camera layout" in str(updated_explicit_profile.get("camera_scene_directive", "")),
+        "explicit scene_camera_profile_key should override text-matched scene profile",
+    )


 def smoke_config_route_location_theme() -> None: