Use metadata for scene camera profiles
This commit is contained in:
@@ -424,6 +424,8 @@ Rows keep the selected `scene_entry`, `location_theme`, `scene_theme`,
|
|||||||
`composition_entry`, `composition_theme`, and `scene_camera_profile_key` in
|
`composition_entry`, `composition_theme`, and `scene_camera_profile_key` in
|
||||||
`metadata_json` so location/camera behavior can be debugged without guessing
|
`metadata_json` so location/camera behavior can be debugged without guessing
|
||||||
from prompt text alone.
|
from prompt text alone.
|
||||||
|
When camera-aware profile routing runs, explicit `scene_camera_profile_key` and
|
||||||
|
theme metadata are used before fallback text matching.
|
||||||
|
|
||||||
`SxCP SDXL Formatter` rewrites prompt builder output or `metadata_json` into
|
`SxCP SDXL Formatter` rewrites prompt builder output or `metadata_json` into
|
||||||
comma-tag SDXL/Pony-style prompts. Connect `metadata_json` when possible so
|
comma-tag SDXL/Pony-style prompts. Connect `metadata_json` when possible so
|
||||||
|
|||||||
@@ -653,6 +653,9 @@ Camera handling:
|
|||||||
Current camera-aware scene adapter:
|
Current camera-aware scene adapter:
|
||||||
|
|
||||||
- Scene profiles live in `scene_camera_adapters.SCENE_CAMERA_PROFILES`.
|
- Scene profiles live in `scene_camera_adapters.SCENE_CAMERA_PROFILES`.
|
||||||
|
- Profile resolution is metadata-first: explicit `scene_camera_profile_key`,
|
||||||
|
selected `scene_entry` profile keys, and theme metadata are preferred before
|
||||||
|
text matching.
|
||||||
- Coworking/business-cafe/office scenes and classical library/book-stack scenes
|
- Coworking/business-cafe/office scenes and classical library/book-stack scenes
|
||||||
are detected by `scene_camera_profile`.
|
are detected by `scene_camera_profile`.
|
||||||
- Location themes preserve `theme` on configs and selected scene entries, and
|
- Location themes preserve `theme` on configs and selected scene entries, and
|
||||||
|
|||||||
+51
-6
@@ -47,10 +47,29 @@ def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind
|
|||||||
return scene_camera_adapters.coworking_composition_prompt(scene_text, composition, subject_kind)
|
return scene_camera_adapters.coworking_composition_prompt(scene_text, composition, subject_kind)
|
||||||
|
|
||||||
|
|
||||||
|
def row_scene_text(row: dict[str, Any]) -> Any:
|
||||||
|
return row.get("scene_text") or row.get("source_scene_text") or row.get("scene")
|
||||||
|
|
||||||
|
|
||||||
|
def row_scene_theme(row: dict[str, Any]) -> str:
|
||||||
|
return str(row.get("scene_theme") or row.get("location_theme") or "")
|
||||||
|
|
||||||
|
|
||||||
|
def row_scene_profile_key(row: dict[str, Any]) -> str:
|
||||||
|
return str(row.get("scene_camera_profile_key") or "")
|
||||||
|
|
||||||
|
|
||||||
def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict[str, Any]:
|
def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict[str, Any]:
|
||||||
scene_text = row.get("scene_text") or row.get("source_scene_text") or row.get("scene")
|
scene_text = row_scene_text(row)
|
||||||
old_composition = str(row.get("composition") or "").strip()
|
old_composition = str(row.get("composition") or "").strip()
|
||||||
new_composition = coworking_composition_prompt(scene_text, old_composition, subject_kind)
|
new_composition = scene_camera_adapters.contextual_composition_prompt(
|
||||||
|
scene_text,
|
||||||
|
old_composition,
|
||||||
|
subject_kind,
|
||||||
|
scene_entry=row.get("scene_entry"),
|
||||||
|
theme=row_scene_theme(row),
|
||||||
|
profile_key=row_scene_profile_key(row),
|
||||||
|
)
|
||||||
if not old_composition or new_composition == old_composition:
|
if not old_composition or new_composition == old_composition:
|
||||||
return row
|
return row
|
||||||
row["source_composition"] = row.get("source_composition") or old_composition
|
row["source_composition"] = row.get("source_composition") or old_composition
|
||||||
@@ -70,8 +89,19 @@ def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict
|
|||||||
return row
|
return row
|
||||||
|
|
||||||
|
|
||||||
def scene_camera_profile_metadata(scene_text: Any) -> dict[str, str]:
|
def scene_camera_profile_metadata(
|
||||||
profile = scene_camera_adapters.scene_camera_profile(scene_text)
|
scene_text: Any = "",
|
||||||
|
*,
|
||||||
|
scene_entry: Any = None,
|
||||||
|
theme: Any = "",
|
||||||
|
profile_key: Any = "",
|
||||||
|
) -> dict[str, str]:
|
||||||
|
profile = scene_camera_adapters.scene_camera_profile(
|
||||||
|
scene_text,
|
||||||
|
scene_entry=scene_entry,
|
||||||
|
theme=theme,
|
||||||
|
profile_key=profile_key,
|
||||||
|
)
|
||||||
if not profile:
|
if not profile:
|
||||||
return {}
|
return {}
|
||||||
return {
|
return {
|
||||||
@@ -89,6 +119,10 @@ def camera_scene_directive_for_context(
|
|||||||
pov_labels: list[str] | None = None,
|
pov_labels: list[str] | None = None,
|
||||||
subject_kind: str = "subjects",
|
subject_kind: str = "subjects",
|
||||||
compact_labels: Mapping[str, str] | None = None,
|
compact_labels: Mapping[str, str] | None = None,
|
||||||
|
*,
|
||||||
|
scene_entry: Any = None,
|
||||||
|
theme: Any = "",
|
||||||
|
profile_key: Any = "",
|
||||||
) -> tuple[str, dict[str, Any]]:
|
) -> tuple[str, dict[str, Any]]:
|
||||||
parsed = camera_policy.parse_camera_config(camera_config)
|
parsed = camera_policy.parse_camera_config(camera_config)
|
||||||
directive = scene_camera_adapters.camera_scene_directive_for_context(
|
directive = scene_camera_adapters.camera_scene_directive_for_context(
|
||||||
@@ -97,6 +131,9 @@ def camera_scene_directive_for_context(
|
|||||||
pov_labels,
|
pov_labels,
|
||||||
subject_kind,
|
subject_kind,
|
||||||
compact_labels,
|
compact_labels,
|
||||||
|
scene_entry=scene_entry,
|
||||||
|
theme=theme,
|
||||||
|
profile_key=profile_key,
|
||||||
)
|
)
|
||||||
return directive, parsed
|
return directive, parsed
|
||||||
|
|
||||||
@@ -141,17 +178,25 @@ def apply_camera_config(
|
|||||||
pov_labels = row_pov_labels(row, pov_label_resolver)
|
pov_labels = row_pov_labels(row, pov_label_resolver)
|
||||||
subject_kind = row_camera_subject_kind(row)
|
subject_kind = row_camera_subject_kind(row)
|
||||||
row = apply_contextual_composition(row, subject_kind)
|
row = apply_contextual_composition(row, subject_kind)
|
||||||
profile_metadata = scene_camera_profile_metadata(row.get("scene_text") or row.get("source_scene_text") or row.get("scene"))
|
profile_metadata = scene_camera_profile_metadata(
|
||||||
|
row_scene_text(row),
|
||||||
|
scene_entry=row.get("scene_entry"),
|
||||||
|
theme=row_scene_theme(row),
|
||||||
|
profile_key=row_scene_profile_key(row),
|
||||||
|
)
|
||||||
if profile_metadata:
|
if profile_metadata:
|
||||||
row["scene_camera_profile"] = profile_metadata
|
row["scene_camera_profile"] = profile_metadata
|
||||||
row["scene_camera_profile_key"] = profile_metadata.get("key", "")
|
row["scene_camera_profile_key"] = profile_metadata.get("key", "")
|
||||||
scene_directive, parsed = camera_scene_directive_for_context(
|
scene_directive, parsed = camera_scene_directive_for_context(
|
||||||
row.get("scene_text") or row.get("source_scene_text") or row.get("scene"),
|
row_scene_text(row),
|
||||||
row.get("composition") or row.get("source_composition"),
|
row.get("composition") or row.get("source_composition"),
|
||||||
parsed,
|
parsed,
|
||||||
pov_labels,
|
pov_labels,
|
||||||
subject_kind,
|
subject_kind,
|
||||||
compact_labels,
|
compact_labels,
|
||||||
|
scene_entry=row.get("scene_entry"),
|
||||||
|
theme=row_scene_theme(row),
|
||||||
|
profile_key=row_scene_profile_key(row),
|
||||||
)
|
)
|
||||||
row["camera_config"] = parsed
|
row["camera_config"] = parsed
|
||||||
row["camera_scene_directive"] = scene_directive
|
row["camera_scene_directive"] = scene_directive
|
||||||
|
|||||||
@@ -112,6 +112,12 @@ SCENE_CAMERA_PROFILES: tuple[dict[str, Any], ...] = (
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
SCENE_CAMERA_PROFILE_KEYS = {str(profile["key"]): dict(profile) for profile in SCENE_CAMERA_PROFILES}
|
||||||
|
|
||||||
|
THEME_PROFILE_KEYS = {
|
||||||
|
"classical_library": "classical_library",
|
||||||
|
}
|
||||||
|
|
||||||
MISMATCHED_COMPOSITION_TERMS = (
|
MISMATCHED_COMPOSITION_TERMS = (
|
||||||
"outfit-check",
|
"outfit-check",
|
||||||
"outfit check",
|
"outfit check",
|
||||||
@@ -123,8 +129,63 @@ MISMATCHED_COMPOSITION_TERMS = (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def scene_camera_profile(scene_text: Any) -> dict[str, Any]:
|
def _profile_by_key(value: Any) -> dict[str, Any]:
|
||||||
text = str(scene_text or "").lower()
|
key = str(value or "").strip()
|
||||||
|
if not key:
|
||||||
|
return {}
|
||||||
|
if key in SCENE_CAMERA_PROFILE_KEYS:
|
||||||
|
return dict(SCENE_CAMERA_PROFILE_KEYS[key])
|
||||||
|
mapped_key = THEME_PROFILE_KEYS.get(key)
|
||||||
|
if mapped_key and mapped_key in SCENE_CAMERA_PROFILE_KEYS:
|
||||||
|
return dict(SCENE_CAMERA_PROFILE_KEYS[mapped_key])
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def _scene_entry_text(scene_entry: Any) -> str:
|
||||||
|
if not isinstance(scene_entry, dict):
|
||||||
|
return ""
|
||||||
|
return str(
|
||||||
|
scene_entry.get("prompt")
|
||||||
|
or scene_entry.get("description")
|
||||||
|
or scene_entry.get("text")
|
||||||
|
or scene_entry.get("name")
|
||||||
|
or ""
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _scene_entry_profile_key(scene_entry: Any) -> str:
|
||||||
|
if not isinstance(scene_entry, dict):
|
||||||
|
return ""
|
||||||
|
return str(
|
||||||
|
scene_entry.get("scene_camera_profile_key")
|
||||||
|
or scene_entry.get("camera_profile_key")
|
||||||
|
or scene_entry.get("camera_profile")
|
||||||
|
or scene_entry.get("profile")
|
||||||
|
or ""
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def scene_camera_profile(
|
||||||
|
scene_text: Any = "",
|
||||||
|
*,
|
||||||
|
scene_entry: Any = None,
|
||||||
|
theme: Any = "",
|
||||||
|
profile_key: Any = "",
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
explicit_profile = _profile_by_key(profile_key)
|
||||||
|
if explicit_profile:
|
||||||
|
return explicit_profile
|
||||||
|
entry_profile = _profile_by_key(_scene_entry_profile_key(scene_entry))
|
||||||
|
if entry_profile:
|
||||||
|
return entry_profile
|
||||||
|
theme_profile = _profile_by_key(theme)
|
||||||
|
if theme_profile:
|
||||||
|
return theme_profile
|
||||||
|
if isinstance(scene_entry, dict):
|
||||||
|
entry_theme_profile = _profile_by_key(scene_entry.get("theme"))
|
||||||
|
if entry_theme_profile:
|
||||||
|
return entry_theme_profile
|
||||||
|
text = " ".join(part for part in (str(scene_text or ""), _scene_entry_text(scene_entry)) if part).lower()
|
||||||
if not text:
|
if not text:
|
||||||
return {}
|
return {}
|
||||||
for profile in SCENE_CAMERA_PROFILES:
|
for profile in SCENE_CAMERA_PROFILES:
|
||||||
@@ -323,8 +384,12 @@ def scene_camera_directive(
|
|||||||
pov_labels: list[str] | None = None,
|
pov_labels: list[str] | None = None,
|
||||||
subject_kind: str = "subjects",
|
subject_kind: str = "subjects",
|
||||||
compact_labels: Mapping[str, str] | None = None,
|
compact_labels: Mapping[str, str] | None = None,
|
||||||
|
*,
|
||||||
|
scene_entry: Any = None,
|
||||||
|
theme: Any = "",
|
||||||
|
profile_key: Any = "",
|
||||||
) -> str:
|
) -> str:
|
||||||
profile = scene_camera_profile(scene_text)
|
profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key)
|
||||||
if not profile:
|
if not profile:
|
||||||
return ""
|
return ""
|
||||||
direction = str(parsed.get("orbit_direction") or "").strip()
|
direction = str(parsed.get("orbit_direction") or "").strip()
|
||||||
@@ -378,11 +443,19 @@ def profile_composition_text(profile: dict[str, Any], subject_kind: str) -> str:
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
def contextual_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
|
def contextual_composition_prompt(
|
||||||
|
scene_text: Any,
|
||||||
|
composition: Any,
|
||||||
|
subject_kind: str = "subjects",
|
||||||
|
*,
|
||||||
|
scene_entry: Any = None,
|
||||||
|
theme: Any = "",
|
||||||
|
profile_key: Any = "",
|
||||||
|
) -> str:
|
||||||
text = str(composition or "").strip()
|
text = str(composition or "").strip()
|
||||||
if not text:
|
if not text:
|
||||||
return text
|
return text
|
||||||
profile = scene_camera_profile(scene_text)
|
profile = scene_camera_profile(scene_text, scene_entry=scene_entry, theme=theme, profile_key=profile_key)
|
||||||
if not profile:
|
if not profile:
|
||||||
return text
|
return text
|
||||||
lower = text.lower()
|
lower = text.lower()
|
||||||
@@ -410,6 +483,10 @@ def camera_scene_directive_for_context(
|
|||||||
pov_labels: list[str] | None = None,
|
pov_labels: list[str] | None = None,
|
||||||
subject_kind: str = "subjects",
|
subject_kind: str = "subjects",
|
||||||
compact_labels: Mapping[str, str] | None = None,
|
compact_labels: Mapping[str, str] | None = None,
|
||||||
|
*,
|
||||||
|
scene_entry: Any = None,
|
||||||
|
theme: Any = "",
|
||||||
|
profile_key: Any = "",
|
||||||
) -> str:
|
) -> str:
|
||||||
if (
|
if (
|
||||||
parsed_camera_config.get("camera_detail") == "off"
|
parsed_camera_config.get("camera_detail") == "off"
|
||||||
@@ -422,4 +499,7 @@ def camera_scene_directive_for_context(
|
|||||||
pov_labels,
|
pov_labels,
|
||||||
subject_kind,
|
subject_kind,
|
||||||
compact_labels,
|
compact_labels,
|
||||||
|
scene_entry=scene_entry,
|
||||||
|
theme=theme,
|
||||||
|
profile_key=profile_key,
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -570,6 +570,59 @@ def smoke_row_camera_policy() -> None:
|
|||||||
_expect("bag" not in library_composition.lower(), "row camera library composition leaked bag wording")
|
_expect("bag" not in library_composition.lower(), "row camera library composition leaked bag wording")
|
||||||
_expect("shoes" not in library_composition.lower(), "row camera library composition leaked shoes wording")
|
_expect("shoes" not in library_composition.lower(), "row camera library composition leaked shoes wording")
|
||||||
_expect("library" in library_composition.lower(), "row camera library composition did not become location-aware")
|
_expect("library" in library_composition.lower(), "row camera library composition did not become location-aware")
|
||||||
|
metadata_profile_row = {
|
||||||
|
"prompt": "A generated adult prompt. Composition: vertical polished mirror view with bag and shoes visible. Avoid: low quality.",
|
||||||
|
"caption": "sxcppnl7, generated adult prompt, polished mirror view with bag and shoes visible, illustration",
|
||||||
|
"scene_text": "private themed room with neutral walls and warm lamps",
|
||||||
|
"scene_entry": {
|
||||||
|
"slug": "library_by_metadata",
|
||||||
|
"prompt": "private themed room with neutral walls and warm lamps",
|
||||||
|
"theme": "classical_library",
|
||||||
|
},
|
||||||
|
"scene_theme": "classical_library",
|
||||||
|
"composition": "polished mirror view with bag and shoes visible",
|
||||||
|
"subject_type": "woman",
|
||||||
|
"women_count": 1,
|
||||||
|
"men_count": 0,
|
||||||
|
}
|
||||||
|
updated_metadata_profile = row_camera.apply_camera_config(
|
||||||
|
metadata_profile_row,
|
||||||
|
_orbit_camera(horizontal_angle=315, vertical_angle=0, zoom=5.0),
|
||||||
|
compact_labels=pb.CAMERA_COMPACT_LABELS,
|
||||||
|
)
|
||||||
|
metadata_scene = _expect_text(
|
||||||
|
"row_camera_policy.metadata_scene",
|
||||||
|
updated_metadata_profile.get("camera_scene_directive"),
|
||||||
|
40,
|
||||||
|
)
|
||||||
|
_expect("Library camera layout" in metadata_scene, "row camera should prefer scene theme metadata over generic scene text")
|
||||||
|
_expect(
|
||||||
|
updated_metadata_profile.get("scene_camera_profile_key") == "classical_library",
|
||||||
|
"row camera should expose metadata-selected profile key",
|
||||||
|
)
|
||||||
|
_expect(
|
||||||
|
"library" in str(updated_metadata_profile.get("composition", "")).lower(),
|
||||||
|
"row camera metadata-selected profile did not clean composition",
|
||||||
|
)
|
||||||
|
explicit_profile_row = {
|
||||||
|
"prompt": "A generated adult prompt. Composition: vertical polished mirror view with bag and shoes visible. Avoid: low quality.",
|
||||||
|
"caption": "sxcppnl7, generated adult prompt, polished mirror view with bag and shoes visible, illustration",
|
||||||
|
"scene_text": "coworking lounge with tall windows, warm desks, and glass partitions",
|
||||||
|
"scene_camera_profile_key": "classical_library",
|
||||||
|
"composition": "polished mirror view with bag and shoes visible",
|
||||||
|
"subject_type": "woman",
|
||||||
|
"women_count": 1,
|
||||||
|
"men_count": 0,
|
||||||
|
}
|
||||||
|
updated_explicit_profile = row_camera.apply_camera_config(
|
||||||
|
explicit_profile_row,
|
||||||
|
_orbit_camera(horizontal_angle=315, vertical_angle=0, zoom=5.0),
|
||||||
|
compact_labels=pb.CAMERA_COMPACT_LABELS,
|
||||||
|
)
|
||||||
|
_expect(
|
||||||
|
"Library camera layout" in str(updated_explicit_profile.get("camera_scene_directive", "")),
|
||||||
|
"explicit scene_camera_profile_key should override text-matched scene profile",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def smoke_config_route_location_theme() -> None:
|
def smoke_config_route_location_theme() -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user