Use metadata for scene camera profiles

2026-06-27 13:25:36 +02:00
parent 75a71a2df6
commit f811c02641
5 changed files with 194 additions and 11 deletions
@@ -424,6 +424,8 @@ Rows keep the selected `scene_entry`, `location_theme`, `scene_theme`,
 `composition_entry`, `composition_theme`, and `scene_camera_profile_key` in
 `metadata_json` so location/camera behavior can be debugged without guessing
 from prompt text alone.
 When camera-aware profile routing runs, explicit `scene_camera_profile_key` and
 theme metadata are used before fallback text matching.
 `SxCP SDXL Formatter` rewrites prompt builder output or `metadata_json` into
 comma-tag SDXL/Pony-style prompts. Connect `metadata_json` when possible so
@@ -653,6 +653,9 @@ Camera handling:
 Current camera-aware scene adapter:
 - Scene profiles live in `scene_camera_adapters.SCENE_CAMERA_PROFILES`.
 - Profile resolution is metadata-first: explicit `scene_camera_profile_key`,
  selected `scene_entry` profile keys, and theme metadata are preferred before
  text matching.
 - Coworking/business-cafe/office scenes and classical library/book-stack scenes
  are detected by `scene_camera_profile`.
 - Location themes preserve `theme` on configs and selected scene entries, and
@@ -47,10 +47,29 @@ def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind
    return scene_camera_adapters.coworking_composition_prompt(scene_text, composition, subject_kind)
 def row_scene_text(row: dict[str, Any]) -> Any:
    return row.get("scene_text") or row.get("source_scene_text") or row.get("scene")
 def row_scene_theme(row: dict[str, Any]) -> str:
    return str(row.get("scene_theme") or row.get("location_theme") or "")
 def row_scene_profile_key(row: dict[str, Any]) -> str:
    return str(row.get("scene_camera_profile_key") or "")
 def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict[str, Any]:
-    scene_text = row.get("scene_text") or row.get("source_scene_text") or row.get("scene")
+    scene_text = row_scene_text(row)
    old_composition = str(row.get("composition") or "").strip()
-    new_composition = coworking_composition_prompt(scene_text, old_composition, subject_kind)
+    new_composition = scene_camera_adapters.contextual_composition_prompt(
        scene_text,
        old_composition,
        subject_kind,
        scene_entry=row.get("scene_entry"),
        theme=row_scene_theme(row),
        profile_key=row_scene_profile_key(row),
    )
    if not old_composition or new_composition == old_composition:
        return row
    row["source_composition"] = row.get("source_composition") or old_composition
@@ -70,8 +89,19 @@ def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict
    return row
-def scene_camera_profile_metadata(scene_text: Any) -> dict[str, str]:
+def scene_camera_profile_metadata(
-    profile = scene_camera_adapters.scene_camera_profile(scene_text)
+    scene_text: Any = "",
    *,
    scene_entry: Any = None,
    theme: Any = "",
    profile_key: Any = "",
 ) -> dict[str, str]:
    profile = scene_camera_adapters.scene_camera_profile(
        scene_text,
        scene_entry=scene_entry,
        theme=theme,
        profile_key=profile_key,
    )
    if not profile:
        return {}
    return {
@@ -89,6 +119,10 @@ def camera_scene_directive_for_context(
    pov_labels: list[str] | None = None,
    subject_kind: str = "subjects",
    compact_labels: Mapping[str, str] | None = None,
    *,
    scene_entry: Any = None,
    theme: Any = "",
    profile_key: Any = "",
 ) -> tuple[str, dict[str, Any]]:
    parsed = camera_policy.parse_camera_config(camera_config)
    directive = scene_camera_adapters.camera_scene_directive_for_context(
@@ -97,6 +131,9 @@ def camera_scene_directive_for_context(
        pov_labels,
        subject_kind,
        compact_labels,
        scene_entry=scene_entry,
        theme=theme,
        profile_key=profile_key,
    )
    return directive, parsed
@@ -141,17 +178,25 @@ def apply_camera_config(
    pov_labels = row_pov_labels(row, pov_label_resolver)
    subject_kind = row_camera_subject_kind(row)
    row = apply_contextual_composition(row, subject_kind)
-    profile_metadata = scene_camera_profile_metadata(row.get("scene_text") or row.get("source_scene_text") or row.get("scene"))
+    profile_metadata = scene_camera_profile_metadata(
        row_scene_text(row),
        scene_entry=row.get("scene_entry"),
        theme=row_scene_theme(row),
        profile_key=row_scene_profile_key(row),
    )
    if profile_metadata:
        row["scene_camera_profile"] = profile_metadata
        row["scene_camera_profile_key"] = profile_metadata.get("key", "")
    scene_directive, parsed = camera_scene_directive_for_context(
-        row.get("scene_text") or row.get("source_scene_text") or row.get("scene"),
+        row_scene_text(row),
        row.get("composition") or row.get("source_composition"),
        parsed,
        pov_labels,
        subject_kind,
        compact_labels,
        scene_entry=row.get("scene_entry"),
        theme=row_scene_theme(row),
        profile_key=row_scene_profile_key(row),
    )
    row["camera_config"] = parsed
    row["camera_scene_directive"] = scene_directive
@@ -112,6 +112,12 @@ SCENE_CAMERA_PROFILES: tuple[dict[str, Any], ...] = (
    },
 )
 SCENE_CAMERA_PROFILE_KEYS = {str(profile["key"]): dict(profile) for profile in SCENE_CAMERA_PROFILES}
 THEME_PROFILE_KEYS = {
    "classical_library": "classical_library",
 }
 MISMATCHED_COMPOSITION_TERMS = (
    "outfit-check",
    "outfit check",
@@ -123,8 +129,63 @@ MISMATCHED_COMPOSITION_TERMS = (
 )
-def scene_camera_profile(scene_text: Any) -> dict[str, Any]:
+def _profile_by_key(value: Any) -> dict[str, Any]:
-    text = str(scene_text or "").lower()
+    key = str(value or "").strip()
    if not key:
        return {}
    if key in SCENE_CAMERA_PROFILE_KEYS:
        return dict(SCENE_CAMERA_PROFILE_KEYS[key])
    mapped_key = THEME_PROFILE_KEYS.get(key)
    if mapped_key and mapped_key in SCENE_CAMERA_PROFILE_KEYS:
        return dict(SCENE_CAMERA_PROFILE_KEYS[mapped_key])
    return {}
 def _scene_entry_text(scene_entry: Any) -> str:
    if not isinstance(scene_entry, dict):
        return ""
    return str(
        scene_entry.get("prompt")
        or scene_entry.get("description")
        or scene_entry.get("text")
        or scene_entry.get("name")
        or ""
    ).strip()
 def _scene_entry_profile_key(scene_entry: Any) -> str:
    if not isinstance(scene_entry, dict):
        return ""
    return str(
        scene_entry.get("scene_camera_profile_key")
        or scene_entry.get("camera_profile_key")
        or scene_entry.get("camera_profile")
        or scene_entry.get("profile")
        or ""
    ).strip()
 def scene_camera_profile(
    scene_text: Any = "",
    *,
    scene_entry: Any = None,
    theme: Any = "",
    profile_key: Any = "",
 ) -> dict[str, Any]:
    explicit_profile = _profile_by_key(profile_key)
    if explicit_profile:
        return explicit_profile
    entry_profile = _profile_by_key(_scene_entry_profile_key(scene_entry))
    if entry_profile:
        return entry_profile
    theme_profile = _profile_by_key(theme)
    if theme_profile:
        return theme_profile
    if isinstance(scene_entry, dict):
        entry_theme_profile = _profile_by_key(scene_entry.get("theme"))
        if entry_theme_profile:
            return entry_theme_profile
    text = " ".join(part for part in (str(scene_text or ""), _scene_entry_text(scene_entry)) if part).lower()
    if not text:
        return {}
    for profile in SCENE_CAMERA_PROFILES:
@@ -323,8 +384,12 @@ def scene_camera_directive(
    pov_labels: list[str] | None = None,
    subject_kind: str = "subjects",
    compact_labels: Mapping[str, str] | None = None,
    *,
    scene_entry: Any = None,
    theme: Any = "",
    profile_key: Any = "",
 ) -> str:
-    profile = scene_camera_profile(scene_text)
+    profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key)
    if not profile:
        return ""
    direction = str(parsed.get("orbit_direction") or "").strip()
@@ -378,11 +443,19 @@ def profile_composition_text(profile: dict[str, Any], subject_kind: str) -> str:
    return text
-def contextual_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
+def contextual_composition_prompt(
    scene_text: Any,
    composition: Any,
    subject_kind: str = "subjects",
    *,
    scene_entry: Any = None,
    theme: Any = "",
    profile_key: Any = "",
 ) -> str:
    text = str(composition or "").strip()
    if not text:
        return text
-    profile = scene_camera_profile(scene_text)
+    profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key)
    if not profile:
        return text
    lower = text.lower()
@@ -410,6 +483,10 @@ def camera_scene_directive_for_context(
    pov_labels: list[str] | None = None,
    subject_kind: str = "subjects",
    compact_labels: Mapping[str, str] | None = None,
    *,
    scene_entry: Any = None,
    theme: Any = "",
    profile_key: Any = "",
 ) -> str:
    if (
        parsed_camera_config.get("camera_detail") == "off"
@@ -422,4 +499,7 @@ def camera_scene_directive_for_context(
        pov_labels,
        subject_kind,
        compact_labels,
        scene_entry=scene_entry,
        theme=theme,
        profile_key=profile_key,
    )
@@ -570,6 +570,59 @@ def smoke_row_camera_policy() -> None:
    _expect("bag" not in library_composition.lower(), "row camera library composition leaked bag wording")
    _expect("shoes" not in library_composition.lower(), "row camera library composition leaked shoes wording")
    _expect("library" in library_composition.lower(), "row camera library composition did not become location-aware")
    metadata_profile_row = {
        "prompt": "A generated adult prompt. Composition: vertical polished mirror view with bag and shoes visible. Avoid: low quality.",
        "caption": "sxcppnl7, generated adult prompt, polished mirror view with bag and shoes visible, illustration",
        "scene_text": "private themed room with neutral walls and warm lamps",
        "scene_entry": {
            "slug": "library_by_metadata",
            "prompt": "private themed room with neutral walls and warm lamps",
            "theme": "classical_library",
        },
        "scene_theme": "classical_library",
        "composition": "polished mirror view with bag and shoes visible",
        "subject_type": "woman",
        "women_count": 1,
        "men_count": 0,
    }
    updated_metadata_profile = row_camera.apply_camera_config(
        metadata_profile_row,
        _orbit_camera(horizontal_angle=315, vertical_angle=0, zoom=5.0),
        compact_labels=pb.CAMERA_COMPACT_LABELS,
    )
    metadata_scene = _expect_text(
        "row_camera_policy.metadata_scene",
        updated_metadata_profile.get("camera_scene_directive"),
        40,
    )
    _expect("Library camera layout" in metadata_scene, "row camera should prefer scene theme metadata over generic scene text")
    _expect(
        updated_metadata_profile.get("scene_camera_profile_key") == "classical_library",
        "row camera should expose metadata-selected profile key",
    )
    _expect(
        "library" in str(updated_metadata_profile.get("composition", "")).lower(),
        "row camera metadata-selected profile did not clean composition",
    )
    explicit_profile_row = {
        "prompt": "A generated adult prompt. Composition: vertical polished mirror view with bag and shoes visible. Avoid: low quality.",
        "caption": "sxcppnl7, generated adult prompt, polished mirror view with bag and shoes visible, illustration",
        "scene_text": "coworking lounge with tall windows, warm desks, and glass partitions",
        "scene_camera_profile_key": "classical_library",
        "composition": "polished mirror view with bag and shoes visible",
        "subject_type": "woman",
        "women_count": 1,
        "men_count": 0,
    }
    updated_explicit_profile = row_camera.apply_camera_config(
        explicit_profile_row,
        _orbit_camera(horizontal_angle=315, vertical_angle=0, zoom=5.0),
        compact_labels=pb.CAMERA_COMPACT_LABELS,
    )
    _expect(
        "Library camera layout" in str(updated_explicit_profile.get("camera_scene_directive", "")),
        "explicit scene_camera_profile_key should override text-matched scene profile",
    )
 def smoke_config_route_location_theme() -> None: