Use metadata for scene camera profiles

This commit is contained in:
2026-06-27 13:25:36 +02:00
parent 75a71a2df6
commit f811c02641
5 changed files with 194 additions and 11 deletions
+2
View File
@@ -424,6 +424,8 @@ Rows keep the selected `scene_entry`, `location_theme`, `scene_theme`,
`composition_entry`, `composition_theme`, and `scene_camera_profile_key` in
`metadata_json` so location/camera behavior can be debugged without guessing
from prompt text alone.
When camera-aware profile routing runs, explicit `scene_camera_profile_key` and
theme metadata are used before fallback text matching.
`SxCP SDXL Formatter` rewrites prompt builder output or `metadata_json` into
comma-tag SDXL/Pony-style prompts. Connect `metadata_json` when possible so
+3
View File
@@ -653,6 +653,9 @@ Camera handling:
Current camera-aware scene adapter:
- Scene profiles live in `scene_camera_adapters.SCENE_CAMERA_PROFILES`.
- Profile resolution is metadata-first: explicit `scene_camera_profile_key`,
selected `scene_entry` profile keys, and theme metadata are preferred before
text matching.
- Coworking/business-cafe/office scenes and classical library/book-stack scenes
are detected by `scene_camera_profile`.
- Location themes preserve `theme` on configs and selected scene entries, and
+51 -6
View File
@@ -47,10 +47,29 @@ def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind
return scene_camera_adapters.coworking_composition_prompt(scene_text, composition, subject_kind)
def row_scene_text(row: dict[str, Any]) -> Any:
return row.get("scene_text") or row.get("source_scene_text") or row.get("scene")
def row_scene_theme(row: dict[str, Any]) -> str:
return str(row.get("scene_theme") or row.get("location_theme") or "")
def row_scene_profile_key(row: dict[str, Any]) -> str:
return str(row.get("scene_camera_profile_key") or "")
def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict[str, Any]:
scene_text = row.get("scene_text") or row.get("source_scene_text") or row.get("scene")
scene_text = row_scene_text(row)
old_composition = str(row.get("composition") or "").strip()
new_composition = coworking_composition_prompt(scene_text, old_composition, subject_kind)
new_composition = scene_camera_adapters.contextual_composition_prompt(
scene_text,
old_composition,
subject_kind,
scene_entry=row.get("scene_entry"),
theme=row_scene_theme(row),
profile_key=row_scene_profile_key(row),
)
if not old_composition or new_composition == old_composition:
return row
row["source_composition"] = row.get("source_composition") or old_composition
@@ -70,8 +89,19 @@ def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict
return row
def scene_camera_profile_metadata(scene_text: Any) -> dict[str, str]:
profile = scene_camera_adapters.scene_camera_profile(scene_text)
def scene_camera_profile_metadata(
scene_text: Any = "",
*,
scene_entry: Any = None,
theme: Any = "",
profile_key: Any = "",
) -> dict[str, str]:
profile = scene_camera_adapters.scene_camera_profile(
scene_text,
scene_entry=scene_entry,
theme=theme,
profile_key=profile_key,
)
if not profile:
return {}
return {
@@ -89,6 +119,10 @@ def camera_scene_directive_for_context(
pov_labels: list[str] | None = None,
subject_kind: str = "subjects",
compact_labels: Mapping[str, str] | None = None,
*,
scene_entry: Any = None,
theme: Any = "",
profile_key: Any = "",
) -> tuple[str, dict[str, Any]]:
parsed = camera_policy.parse_camera_config(camera_config)
directive = scene_camera_adapters.camera_scene_directive_for_context(
@@ -97,6 +131,9 @@ def camera_scene_directive_for_context(
pov_labels,
subject_kind,
compact_labels,
scene_entry=scene_entry,
theme=theme,
profile_key=profile_key,
)
return directive, parsed
@@ -141,17 +178,25 @@ def apply_camera_config(
pov_labels = row_pov_labels(row, pov_label_resolver)
subject_kind = row_camera_subject_kind(row)
row = apply_contextual_composition(row, subject_kind)
profile_metadata = scene_camera_profile_metadata(row.get("scene_text") or row.get("source_scene_text") or row.get("scene"))
profile_metadata = scene_camera_profile_metadata(
row_scene_text(row),
scene_entry=row.get("scene_entry"),
theme=row_scene_theme(row),
profile_key=row_scene_profile_key(row),
)
if profile_metadata:
row["scene_camera_profile"] = profile_metadata
row["scene_camera_profile_key"] = profile_metadata.get("key", "")
scene_directive, parsed = camera_scene_directive_for_context(
row.get("scene_text") or row.get("source_scene_text") or row.get("scene"),
row_scene_text(row),
row.get("composition") or row.get("source_composition"),
parsed,
pov_labels,
subject_kind,
compact_labels,
scene_entry=row.get("scene_entry"),
theme=row_scene_theme(row),
profile_key=row_scene_profile_key(row),
)
row["camera_config"] = parsed
row["camera_scene_directive"] = scene_directive
+85 -5
View File
@@ -112,6 +112,12 @@ SCENE_CAMERA_PROFILES: tuple[dict[str, Any], ...] = (
},
)
SCENE_CAMERA_PROFILE_KEYS = {str(profile["key"]): dict(profile) for profile in SCENE_CAMERA_PROFILES}
THEME_PROFILE_KEYS = {
"classical_library": "classical_library",
}
MISMATCHED_COMPOSITION_TERMS = (
"outfit-check",
"outfit check",
@@ -123,8 +129,63 @@ MISMATCHED_COMPOSITION_TERMS = (
)
def scene_camera_profile(scene_text: Any) -> dict[str, Any]:
text = str(scene_text or "").lower()
def _profile_by_key(value: Any) -> dict[str, Any]:
key = str(value or "").strip()
if not key:
return {}
if key in SCENE_CAMERA_PROFILE_KEYS:
return dict(SCENE_CAMERA_PROFILE_KEYS[key])
mapped_key = THEME_PROFILE_KEYS.get(key)
if mapped_key and mapped_key in SCENE_CAMERA_PROFILE_KEYS:
return dict(SCENE_CAMERA_PROFILE_KEYS[mapped_key])
return {}
def _scene_entry_text(scene_entry: Any) -> str:
if not isinstance(scene_entry, dict):
return ""
return str(
scene_entry.get("prompt")
or scene_entry.get("description")
or scene_entry.get("text")
or scene_entry.get("name")
or ""
).strip()
def _scene_entry_profile_key(scene_entry: Any) -> str:
if not isinstance(scene_entry, dict):
return ""
return str(
scene_entry.get("scene_camera_profile_key")
or scene_entry.get("camera_profile_key")
or scene_entry.get("camera_profile")
or scene_entry.get("profile")
or ""
).strip()
def scene_camera_profile(
scene_text: Any = "",
*,
scene_entry: Any = None,
theme: Any = "",
profile_key: Any = "",
) -> dict[str, Any]:
explicit_profile = _profile_by_key(profile_key)
if explicit_profile:
return explicit_profile
entry_profile = _profile_by_key(_scene_entry_profile_key(scene_entry))
if entry_profile:
return entry_profile
theme_profile = _profile_by_key(theme)
if theme_profile:
return theme_profile
if isinstance(scene_entry, dict):
entry_theme_profile = _profile_by_key(scene_entry.get("theme"))
if entry_theme_profile:
return entry_theme_profile
text = " ".join(part for part in (str(scene_text or ""), _scene_entry_text(scene_entry)) if part).lower()
if not text:
return {}
for profile in SCENE_CAMERA_PROFILES:
@@ -323,8 +384,12 @@ def scene_camera_directive(
pov_labels: list[str] | None = None,
subject_kind: str = "subjects",
compact_labels: Mapping[str, str] | None = None,
*,
scene_entry: Any = None,
theme: Any = "",
profile_key: Any = "",
) -> str:
profile = scene_camera_profile(scene_text)
profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key)
if not profile:
return ""
direction = str(parsed.get("orbit_direction") or "").strip()
@@ -378,11 +443,19 @@ def profile_composition_text(profile: dict[str, Any], subject_kind: str) -> str:
return text
def contextual_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
def contextual_composition_prompt(
scene_text: Any,
composition: Any,
subject_kind: str = "subjects",
*,
scene_entry: Any = None,
theme: Any = "",
profile_key: Any = "",
) -> str:
text = str(composition or "").strip()
if not text:
return text
profile = scene_camera_profile(scene_text)
profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key)
if not profile:
return text
lower = text.lower()
@@ -410,6 +483,10 @@ def camera_scene_directive_for_context(
pov_labels: list[str] | None = None,
subject_kind: str = "subjects",
compact_labels: Mapping[str, str] | None = None,
*,
scene_entry: Any = None,
theme: Any = "",
profile_key: Any = "",
) -> str:
if (
parsed_camera_config.get("camera_detail") == "off"
@@ -422,4 +499,7 @@ def camera_scene_directive_for_context(
pov_labels,
subject_kind,
compact_labels,
scene_entry=scene_entry,
theme=theme,
profile_key=profile_key,
)
+53
View File
@@ -570,6 +570,59 @@ def smoke_row_camera_policy() -> None:
_expect("bag" not in library_composition.lower(), "row camera library composition leaked bag wording")
_expect("shoes" not in library_composition.lower(), "row camera library composition leaked shoes wording")
_expect("library" in library_composition.lower(), "row camera library composition did not become location-aware")
metadata_profile_row = {
"prompt": "A generated adult prompt. Composition: vertical polished mirror view with bag and shoes visible. Avoid: low quality.",
"caption": "sxcppnl7, generated adult prompt, polished mirror view with bag and shoes visible, illustration",
"scene_text": "private themed room with neutral walls and warm lamps",
"scene_entry": {
"slug": "library_by_metadata",
"prompt": "private themed room with neutral walls and warm lamps",
"theme": "classical_library",
},
"scene_theme": "classical_library",
"composition": "polished mirror view with bag and shoes visible",
"subject_type": "woman",
"women_count": 1,
"men_count": 0,
}
updated_metadata_profile = row_camera.apply_camera_config(
metadata_profile_row,
_orbit_camera(horizontal_angle=315, vertical_angle=0, zoom=5.0),
compact_labels=pb.CAMERA_COMPACT_LABELS,
)
metadata_scene = _expect_text(
"row_camera_policy.metadata_scene",
updated_metadata_profile.get("camera_scene_directive"),
40,
)
_expect("Library camera layout" in metadata_scene, "row camera should prefer scene theme metadata over generic scene text")
_expect(
updated_metadata_profile.get("scene_camera_profile_key") == "classical_library",
"row camera should expose metadata-selected profile key",
)
_expect(
"library" in str(updated_metadata_profile.get("composition", "")).lower(),
"row camera metadata-selected profile did not clean composition",
)
explicit_profile_row = {
"prompt": "A generated adult prompt. Composition: vertical polished mirror view with bag and shoes visible. Avoid: low quality.",
"caption": "sxcppnl7, generated adult prompt, polished mirror view with bag and shoes visible, illustration",
"scene_text": "coworking lounge with tall windows, warm desks, and glass partitions",
"scene_camera_profile_key": "classical_library",
"composition": "polished mirror view with bag and shoes visible",
"subject_type": "woman",
"women_count": 1,
"men_count": 0,
}
updated_explicit_profile = row_camera.apply_camera_config(
explicit_profile_row,
_orbit_camera(horizontal_angle=315, vertical_angle=0, zoom=5.0),
compact_labels=pb.CAMERA_COMPACT_LABELS,
)
_expect(
"Library camera layout" in str(updated_explicit_profile.get("camera_scene_directive", "")),
"explicit scene_camera_profile_key should override text-matched scene profile",
)
def smoke_config_route_location_theme() -> None: