Use metadata for scene camera profiles
This commit is contained in:
@@ -424,6 +424,8 @@ Rows keep the selected `scene_entry`, `location_theme`, `scene_theme`,
|
||||
`composition_entry`, `composition_theme`, and `scene_camera_profile_key` in
|
||||
`metadata_json` so location/camera behavior can be debugged without guessing
|
||||
from prompt text alone.
|
||||
When camera-aware profile routing runs, explicit `scene_camera_profile_key` and
|
||||
theme metadata are used before fallback text matching.
|
||||
|
||||
`SxCP SDXL Formatter` rewrites prompt builder output or `metadata_json` into
|
||||
comma-tag SDXL/Pony-style prompts. Connect `metadata_json` when possible so
|
||||
|
||||
@@ -653,6 +653,9 @@ Camera handling:
|
||||
Current camera-aware scene adapter:
|
||||
|
||||
- Scene profiles live in `scene_camera_adapters.SCENE_CAMERA_PROFILES`.
|
||||
- Profile resolution is metadata-first: explicit `scene_camera_profile_key`,
|
||||
selected `scene_entry` profile keys, and theme metadata are preferred before
|
||||
text matching.
|
||||
- Coworking/business-cafe/office scenes and classical library/book-stack scenes
|
||||
are detected by `scene_camera_profile`.
|
||||
- Location themes preserve `theme` on configs and selected scene entries, and
|
||||
|
||||
+51
-6
@@ -47,10 +47,29 @@ def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind
|
||||
return scene_camera_adapters.coworking_composition_prompt(scene_text, composition, subject_kind)
|
||||
|
||||
|
||||
def row_scene_text(row: dict[str, Any]) -> Any:
|
||||
return row.get("scene_text") or row.get("source_scene_text") or row.get("scene")
|
||||
|
||||
|
||||
def row_scene_theme(row: dict[str, Any]) -> str:
|
||||
return str(row.get("scene_theme") or row.get("location_theme") or "")
|
||||
|
||||
|
||||
def row_scene_profile_key(row: dict[str, Any]) -> str:
|
||||
return str(row.get("scene_camera_profile_key") or "")
|
||||
|
||||
|
||||
def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict[str, Any]:
|
||||
scene_text = row.get("scene_text") or row.get("source_scene_text") or row.get("scene")
|
||||
scene_text = row_scene_text(row)
|
||||
old_composition = str(row.get("composition") or "").strip()
|
||||
new_composition = coworking_composition_prompt(scene_text, old_composition, subject_kind)
|
||||
new_composition = scene_camera_adapters.contextual_composition_prompt(
|
||||
scene_text,
|
||||
old_composition,
|
||||
subject_kind,
|
||||
scene_entry=row.get("scene_entry"),
|
||||
theme=row_scene_theme(row),
|
||||
profile_key=row_scene_profile_key(row),
|
||||
)
|
||||
if not old_composition or new_composition == old_composition:
|
||||
return row
|
||||
row["source_composition"] = row.get("source_composition") or old_composition
|
||||
@@ -70,8 +89,19 @@ def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict
|
||||
return row
|
||||
|
||||
|
||||
def scene_camera_profile_metadata(scene_text: Any) -> dict[str, str]:
|
||||
profile = scene_camera_adapters.scene_camera_profile(scene_text)
|
||||
def scene_camera_profile_metadata(
|
||||
scene_text: Any = "",
|
||||
*,
|
||||
scene_entry: Any = None,
|
||||
theme: Any = "",
|
||||
profile_key: Any = "",
|
||||
) -> dict[str, str]:
|
||||
profile = scene_camera_adapters.scene_camera_profile(
|
||||
scene_text,
|
||||
scene_entry=scene_entry,
|
||||
theme=theme,
|
||||
profile_key=profile_key,
|
||||
)
|
||||
if not profile:
|
||||
return {}
|
||||
return {
|
||||
@@ -89,6 +119,10 @@ def camera_scene_directive_for_context(
|
||||
pov_labels: list[str] | None = None,
|
||||
subject_kind: str = "subjects",
|
||||
compact_labels: Mapping[str, str] | None = None,
|
||||
*,
|
||||
scene_entry: Any = None,
|
||||
theme: Any = "",
|
||||
profile_key: Any = "",
|
||||
) -> tuple[str, dict[str, Any]]:
|
||||
parsed = camera_policy.parse_camera_config(camera_config)
|
||||
directive = scene_camera_adapters.camera_scene_directive_for_context(
|
||||
@@ -97,6 +131,9 @@ def camera_scene_directive_for_context(
|
||||
pov_labels,
|
||||
subject_kind,
|
||||
compact_labels,
|
||||
scene_entry=scene_entry,
|
||||
theme=theme,
|
||||
profile_key=profile_key,
|
||||
)
|
||||
return directive, parsed
|
||||
|
||||
@@ -141,17 +178,25 @@ def apply_camera_config(
|
||||
pov_labels = row_pov_labels(row, pov_label_resolver)
|
||||
subject_kind = row_camera_subject_kind(row)
|
||||
row = apply_contextual_composition(row, subject_kind)
|
||||
profile_metadata = scene_camera_profile_metadata(row.get("scene_text") or row.get("source_scene_text") or row.get("scene"))
|
||||
profile_metadata = scene_camera_profile_metadata(
|
||||
row_scene_text(row),
|
||||
scene_entry=row.get("scene_entry"),
|
||||
theme=row_scene_theme(row),
|
||||
profile_key=row_scene_profile_key(row),
|
||||
)
|
||||
if profile_metadata:
|
||||
row["scene_camera_profile"] = profile_metadata
|
||||
row["scene_camera_profile_key"] = profile_metadata.get("key", "")
|
||||
scene_directive, parsed = camera_scene_directive_for_context(
|
||||
row.get("scene_text") or row.get("source_scene_text") or row.get("scene"),
|
||||
row_scene_text(row),
|
||||
row.get("composition") or row.get("source_composition"),
|
||||
parsed,
|
||||
pov_labels,
|
||||
subject_kind,
|
||||
compact_labels,
|
||||
scene_entry=row.get("scene_entry"),
|
||||
theme=row_scene_theme(row),
|
||||
profile_key=row_scene_profile_key(row),
|
||||
)
|
||||
row["camera_config"] = parsed
|
||||
row["camera_scene_directive"] = scene_directive
|
||||
|
||||
@@ -112,6 +112,12 @@ SCENE_CAMERA_PROFILES: tuple[dict[str, Any], ...] = (
|
||||
},
|
||||
)
|
||||
|
||||
SCENE_CAMERA_PROFILE_KEYS = {str(profile["key"]): dict(profile) for profile in SCENE_CAMERA_PROFILES}
|
||||
|
||||
THEME_PROFILE_KEYS = {
|
||||
"classical_library": "classical_library",
|
||||
}
|
||||
|
||||
MISMATCHED_COMPOSITION_TERMS = (
|
||||
"outfit-check",
|
||||
"outfit check",
|
||||
@@ -123,8 +129,63 @@ MISMATCHED_COMPOSITION_TERMS = (
|
||||
)
|
||||
|
||||
|
||||
def scene_camera_profile(scene_text: Any) -> dict[str, Any]:
|
||||
text = str(scene_text or "").lower()
|
||||
def _profile_by_key(value: Any) -> dict[str, Any]:
|
||||
key = str(value or "").strip()
|
||||
if not key:
|
||||
return {}
|
||||
if key in SCENE_CAMERA_PROFILE_KEYS:
|
||||
return dict(SCENE_CAMERA_PROFILE_KEYS[key])
|
||||
mapped_key = THEME_PROFILE_KEYS.get(key)
|
||||
if mapped_key and mapped_key in SCENE_CAMERA_PROFILE_KEYS:
|
||||
return dict(SCENE_CAMERA_PROFILE_KEYS[mapped_key])
|
||||
return {}
|
||||
|
||||
|
||||
def _scene_entry_text(scene_entry: Any) -> str:
|
||||
if not isinstance(scene_entry, dict):
|
||||
return ""
|
||||
return str(
|
||||
scene_entry.get("prompt")
|
||||
or scene_entry.get("description")
|
||||
or scene_entry.get("text")
|
||||
or scene_entry.get("name")
|
||||
or ""
|
||||
).strip()
|
||||
|
||||
|
||||
def _scene_entry_profile_key(scene_entry: Any) -> str:
|
||||
if not isinstance(scene_entry, dict):
|
||||
return ""
|
||||
return str(
|
||||
scene_entry.get("scene_camera_profile_key")
|
||||
or scene_entry.get("camera_profile_key")
|
||||
or scene_entry.get("camera_profile")
|
||||
or scene_entry.get("profile")
|
||||
or ""
|
||||
).strip()
|
||||
|
||||
|
||||
def scene_camera_profile(
|
||||
scene_text: Any = "",
|
||||
*,
|
||||
scene_entry: Any = None,
|
||||
theme: Any = "",
|
||||
profile_key: Any = "",
|
||||
) -> dict[str, Any]:
|
||||
explicit_profile = _profile_by_key(profile_key)
|
||||
if explicit_profile:
|
||||
return explicit_profile
|
||||
entry_profile = _profile_by_key(_scene_entry_profile_key(scene_entry))
|
||||
if entry_profile:
|
||||
return entry_profile
|
||||
theme_profile = _profile_by_key(theme)
|
||||
if theme_profile:
|
||||
return theme_profile
|
||||
if isinstance(scene_entry, dict):
|
||||
entry_theme_profile = _profile_by_key(scene_entry.get("theme"))
|
||||
if entry_theme_profile:
|
||||
return entry_theme_profile
|
||||
text = " ".join(part for part in (str(scene_text or ""), _scene_entry_text(scene_entry)) if part).lower()
|
||||
if not text:
|
||||
return {}
|
||||
for profile in SCENE_CAMERA_PROFILES:
|
||||
@@ -323,8 +384,12 @@ def scene_camera_directive(
|
||||
pov_labels: list[str] | None = None,
|
||||
subject_kind: str = "subjects",
|
||||
compact_labels: Mapping[str, str] | None = None,
|
||||
*,
|
||||
scene_entry: Any = None,
|
||||
theme: Any = "",
|
||||
profile_key: Any = "",
|
||||
) -> str:
|
||||
profile = scene_camera_profile(scene_text)
|
||||
profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key)
|
||||
if not profile:
|
||||
return ""
|
||||
direction = str(parsed.get("orbit_direction") or "").strip()
|
||||
@@ -378,11 +443,19 @@ def profile_composition_text(profile: dict[str, Any], subject_kind: str) -> str:
|
||||
return text
|
||||
|
||||
|
||||
def contextual_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
|
||||
def contextual_composition_prompt(
|
||||
scene_text: Any,
|
||||
composition: Any,
|
||||
subject_kind: str = "subjects",
|
||||
*,
|
||||
scene_entry: Any = None,
|
||||
theme: Any = "",
|
||||
profile_key: Any = "",
|
||||
) -> str:
|
||||
text = str(composition or "").strip()
|
||||
if not text:
|
||||
return text
|
||||
profile = scene_camera_profile(scene_text)
|
||||
profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key)
|
||||
if not profile:
|
||||
return text
|
||||
lower = text.lower()
|
||||
@@ -410,6 +483,10 @@ def camera_scene_directive_for_context(
|
||||
pov_labels: list[str] | None = None,
|
||||
subject_kind: str = "subjects",
|
||||
compact_labels: Mapping[str, str] | None = None,
|
||||
*,
|
||||
scene_entry: Any = None,
|
||||
theme: Any = "",
|
||||
profile_key: Any = "",
|
||||
) -> str:
|
||||
if (
|
||||
parsed_camera_config.get("camera_detail") == "off"
|
||||
@@ -422,4 +499,7 @@ def camera_scene_directive_for_context(
|
||||
pov_labels,
|
||||
subject_kind,
|
||||
compact_labels,
|
||||
scene_entry=scene_entry,
|
||||
theme=theme,
|
||||
profile_key=profile_key,
|
||||
)
|
||||
|
||||
@@ -570,6 +570,59 @@ def smoke_row_camera_policy() -> None:
|
||||
_expect("bag" not in library_composition.lower(), "row camera library composition leaked bag wording")
|
||||
_expect("shoes" not in library_composition.lower(), "row camera library composition leaked shoes wording")
|
||||
_expect("library" in library_composition.lower(), "row camera library composition did not become location-aware")
|
||||
metadata_profile_row = {
|
||||
"prompt": "A generated adult prompt. Composition: vertical polished mirror view with bag and shoes visible. Avoid: low quality.",
|
||||
"caption": "sxcppnl7, generated adult prompt, polished mirror view with bag and shoes visible, illustration",
|
||||
"scene_text": "private themed room with neutral walls and warm lamps",
|
||||
"scene_entry": {
|
||||
"slug": "library_by_metadata",
|
||||
"prompt": "private themed room with neutral walls and warm lamps",
|
||||
"theme": "classical_library",
|
||||
},
|
||||
"scene_theme": "classical_library",
|
||||
"composition": "polished mirror view with bag and shoes visible",
|
||||
"subject_type": "woman",
|
||||
"women_count": 1,
|
||||
"men_count": 0,
|
||||
}
|
||||
updated_metadata_profile = row_camera.apply_camera_config(
|
||||
metadata_profile_row,
|
||||
_orbit_camera(horizontal_angle=315, vertical_angle=0, zoom=5.0),
|
||||
compact_labels=pb.CAMERA_COMPACT_LABELS,
|
||||
)
|
||||
metadata_scene = _expect_text(
|
||||
"row_camera_policy.metadata_scene",
|
||||
updated_metadata_profile.get("camera_scene_directive"),
|
||||
40,
|
||||
)
|
||||
_expect("Library camera layout" in metadata_scene, "row camera should prefer scene theme metadata over generic scene text")
|
||||
_expect(
|
||||
updated_metadata_profile.get("scene_camera_profile_key") == "classical_library",
|
||||
"row camera should expose metadata-selected profile key",
|
||||
)
|
||||
_expect(
|
||||
"library" in str(updated_metadata_profile.get("composition", "")).lower(),
|
||||
"row camera metadata-selected profile did not clean composition",
|
||||
)
|
||||
explicit_profile_row = {
|
||||
"prompt": "A generated adult prompt. Composition: vertical polished mirror view with bag and shoes visible. Avoid: low quality.",
|
||||
"caption": "sxcppnl7, generated adult prompt, polished mirror view with bag and shoes visible, illustration",
|
||||
"scene_text": "coworking lounge with tall windows, warm desks, and glass partitions",
|
||||
"scene_camera_profile_key": "classical_library",
|
||||
"composition": "polished mirror view with bag and shoes visible",
|
||||
"subject_type": "woman",
|
||||
"women_count": 1,
|
||||
"men_count": 0,
|
||||
}
|
||||
updated_explicit_profile = row_camera.apply_camera_config(
|
||||
explicit_profile_row,
|
||||
_orbit_camera(horizontal_angle=315, vertical_angle=0, zoom=5.0),
|
||||
compact_labels=pb.CAMERA_COMPACT_LABELS,
|
||||
)
|
||||
_expect(
|
||||
"Library camera layout" in str(updated_explicit_profile.get("camera_scene_directive", "")),
|
||||
"explicit scene_camera_profile_key should override text-matched scene profile",
|
||||
)
|
||||
|
||||
|
||||
def smoke_config_route_location_theme() -> None:
|
||||
|
||||
Reference in New Issue
Block a user