Extract row camera policy

2026-06-27 02:54:35 +02:00
parent 132d457bf7
commit 1cc65e35b5
5 changed files with 210 additions and 86 deletions
@@ -116,8 +116,7 @@ Keep here:

 Move or isolate later:

- pair assembly and camera mutation helpers that still live in
-  `prompt_builder.py`.
+- pair assembly helpers that still live in `prompt_builder.py`.

 Already isolated:

@@ -179,8 +178,9 @@ Already isolated:
  side-lying, and front/back group layouts.
 - camera option schema, orbit/Qwen translation, config parsing, camera
  directive text, and camera caption text live in `camera_config.py`;
-  camera-scene prose and coworking composition adaptation live in
-  `scene_camera_adapters.py`; `prompt_builder.py` still owns row mutation.
+  camera-scene prose lives in `scene_camera_adapters.py`; row-level camera
+  insertion, contextual coworking composition mutation, subject-kind detection,
+  and POV suppression live in `row_camera.py`.
 - shared hardcore environment-anchor cleanup lives in
  `hardcore_text_cleanup.py` and normalizes malformed pool joins before metadata
  reaches formatter routes.
@@ -528,6 +528,8 @@ Near-term:
  `/home/ethanfel/.codex/memories/scene-camera-system.md` when editing POV.
 - Keep `scene_camera_adapters.py` as the owner for location-aware camera prose;
  add new location families there one at a time.
+- Keep `row_camera.py` as the owner for inserting camera/scene directives into
+  generated rows, including POV suppression of normal third-person camera text.

 Medium-term:

@@ -94,6 +94,7 @@ Core helper ownership:
 | `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. |
 | `route_metadata.py` | Shared row-level route metadata readers for normalized action family, position family/keys, and formatter hints used by Krea2, SDXL, and caption routes. |
 | `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. |
+| `row_camera.py` | Row-level camera insertion, contextual coworking composition mutation, subject-kind detection, POV label fallback, and POV suppression of normal camera directives. |
 | `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. |
 | `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. |
 | `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, embedded soft/hard row output and side-metadata synchronization, and embedded row sanitation. |
@@ -911,8 +912,8 @@ Use these traces to narrow a problem in one pass.
 1. Check `scene_text` and `composition` separately.
 2. If scene is good and composition is bad, edit composition pools, not
   location pools.
-3. If a scene-camera adapter rewrote composition, inspect
-   `scene_camera_adapters.py`.
+3. If a scene-camera adapter rewrote composition, inspect `row_camera.py` first
+   for row mutation and `scene_camera_adapters.py` for location-specific prose.
 4. If the issue comes from `Location Theme`, edit `location_config.py` / `THEMATIC_LOCATION_PRESETS`.

 ### Trigger missing after formatting
@@ -40,7 +40,7 @@ try:
    from . import pair_rows
    from . import pair_options
    from . import row_normalization as row_policy
-    from . import scene_camera_adapters
+    from . import row_camera as row_camera_policy
    from . import seed_config as seed_policy
    from .hardcore_text_cleanup import (
        sanitize_hardcore_axis_values as _sanitize_hardcore_axis_values,
@@ -81,7 +81,7 @@ except ImportError:  # Allows local smoke tests with `python -c`.
    import pair_rows
    import pair_options
    import row_normalization as row_policy
-    import scene_camera_adapters
+    import row_camera as row_camera_policy
    import seed_config as seed_policy
    from hardcore_text_cleanup import (
        sanitize_hardcore_axis_values as _sanitize_hardcore_axis_values,
@@ -1699,42 +1699,19 @@ def _camera_directive(camera_config: str | dict[str, Any] | None) -> tuple[str,


 def _insert_positive_directive(prompt: str, directive: str) -> str:
-    marker = " Avoid:"
-    if marker in prompt:
-        before, after = prompt.split(marker, 1)
-        return f"{before.rstrip()} {directive}{marker}{after}"
-    return f"{prompt.rstrip()} {directive}"
+    return row_camera_policy.insert_positive_directive(prompt, directive)


 def _camera_caption_text(parsed: dict[str, Any]) -> str:
-    return camera_policy.camera_caption_text(parsed)
+    return row_camera_policy.camera_caption_text(parsed)


 def _coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
-    return scene_camera_adapters.coworking_composition_prompt(scene_text, composition, subject_kind)
+    return row_camera_policy.coworking_composition_prompt(scene_text, composition, subject_kind)


 def _apply_coworking_composition(row: dict[str, Any], subject_kind: str) -> dict[str, Any]:
-    scene_text = row.get("scene_text") or row.get("source_scene_text") or row.get("scene")
-    old_composition = str(row.get("composition") or "").strip()
-    new_composition = _coworking_composition_prompt(scene_text, old_composition, subject_kind)
-    if not old_composition or new_composition == old_composition:
-        return row
-    row["source_composition"] = row.get("source_composition") or old_composition
-    row["composition"] = new_composition
-    row["composition_prompt"] = _composition_prompt(new_composition)
-    prompt = str(row.get("prompt") or "")
-    replacements = (
-        (f"Composition: vertical {old_composition}.", f"Composition: {_composition_prompt(new_composition)}."),
-        (f"Composition: {old_composition}.", f"Composition: {_composition_prompt(new_composition)}."),
-        (f"Framed as {old_composition}.", f"Framed as {new_composition}."),
-    )
-    for old_fragment, new_fragment in replacements:
-        if old_fragment in prompt:
-            row["prompt"] = prompt.replace(old_fragment, new_fragment)
-            break
-    row["caption"] = str(row.get("caption") or "").replace(f", {old_composition},", f", {new_composition},")
-    return row
+    return row_camera_policy.apply_contextual_composition(row, subject_kind)


 def _camera_scene_directive_for_context(
@@ -1744,10 +1721,10 @@ def _camera_scene_directive_for_context(
    pov_labels: list[str] | None = None,
    subject_kind: str = "subjects",
 ) -> tuple[str, dict[str, Any]]:
-    parsed = _parse_camera_config(camera_config)
-    directive = scene_camera_adapters.camera_scene_directive_for_context(
+    directive, parsed = row_camera_policy.camera_scene_directive_for_context(
        scene_text,
-        parsed,
+        composition,
+        camera_config,
        pov_labels,
        subject_kind,
        CAMERA_COMPACT_LABELS,
@@ -1756,53 +1733,23 @@ def _camera_scene_directive_for_context(


 def _row_camera_subject_kind(row: dict[str, Any]) -> str:
-    subject_type = str(row.get("subject_type") or row.get("primary_subject") or "").lower()
-    if subject_type in ("woman", "adult woman") or subject_type == "single_any":
-        return "woman"
-    if subject_type in ("man", "adult man"):
-        return "man"
-    try:
-        women_count = int(row.get("women_count") or 0)
-        men_count = int(row.get("men_count") or 0)
-    except (TypeError, ValueError):
-        women_count = men_count = 0
-    if women_count == 1 and men_count == 0:
-        return "woman"
-    if women_count == 0 and men_count == 1:
-        return "man"
-    if women_count + men_count == 2:
-        return "couple"
-    return "subjects"
+    return row_camera_policy.row_camera_subject_kind(row)


-def _apply_camera_config(row: dict[str, Any], camera_config: str | dict[str, Any] | None) -> dict[str, Any]:
-    directive, parsed = _camera_directive(camera_config)
-    pov_labels = _pov_character_labels(
+def _camera_pov_labels_for_row(row: dict[str, Any]) -> list[str]:
+    return _pov_character_labels(
        _character_slot_label_map(_parse_character_cast(row.get("character_cast_slots"))),
        int(row.get("men_count") or 0) if str(row.get("men_count") or "").isdigit() else 0,
    )
-    if not pov_labels:
-        pov_labels = [str(label) for label in _list_from(row.get("pov_character_labels")) if str(label).strip()]
-    subject_kind = _row_camera_subject_kind(row)
-    row = _apply_coworking_composition(row, subject_kind)
-    scene_directive, parsed = _camera_scene_directive_for_context(
-        row.get("scene_text") or row.get("source_scene_text") or row.get("scene"),
-        row.get("composition") or row.get("source_composition"),
-        parsed,
-        pov_labels,
-        subject_kind,
+
+
+def _apply_camera_config(row: dict[str, Any], camera_config: str | dict[str, Any] | None) -> dict[str, Any]:
+    return row_camera_policy.apply_camera_config(
+        row,
+        camera_config,
+        pov_label_resolver=_camera_pov_labels_for_row,
+        compact_labels=CAMERA_COMPACT_LABELS,
    )
-    row["camera_config"] = parsed
-    row["camera_scene_directive"] = scene_directive
-    row["camera_directive"] = "" if pov_labels else directive
-    combined_directive = " ".join(part for part in (scene_directive, row["camera_directive"]) if part)
-    if not combined_directive:
-        return row
-    row["prompt"] = _insert_positive_directive(row["prompt"], combined_directive)
-    camera_caption = _camera_caption_text(parsed)
-    if camera_caption and not pov_labels:
-        row["caption"] = f"{row.get('caption', '').rstrip()}, {camera_caption}"
-    return row


 def _row_seed(seed: int, row_number: int, salt: int = 0) -> int:
@@ -3168,13 +3115,7 @@ def _apply_character_profile_to_context(


 def _composition_prompt(composition: str) -> str:
-    composition = str(composition or "").strip()
-    if not composition:
-        return composition
-    lower = composition.lower()
-    if lower.startswith("vertical ") or " vertical " in lower or lower.endswith(" vertical"):
-        return composition
-    return f"vertical {composition}"
+    return row_camera_policy.composition_prompt(composition)


 def _appearance_for_subject(
@@ -0,0 +1,150 @@
+from __future__ import annotations
+
+from typing import Any, Callable, Mapping
+
+try:
+    from . import camera_config as camera_policy
+    from . import scene_camera_adapters
+except ImportError:  # Allows local smoke tests with top-level imports.
+    import camera_config as camera_policy
+    import scene_camera_adapters
+
+
+PovLabelResolver = Callable[[dict[str, Any]], list[str]]
+
+
+def _list_from(value: Any) -> list[Any]:
+    if value is None:
+        return []
+    if isinstance(value, list):
+        return value
+    return [value]
+
+
+def composition_prompt(composition: Any) -> str:
+    composition = str(composition or "").strip()
+    if not composition:
+        return composition
+    lower = composition.lower()
+    if lower.startswith("vertical ") or " vertical " in lower or lower.endswith(" vertical"):
+        return composition
+    return f"vertical {composition}"
+
+
+def insert_positive_directive(prompt: str, directive: str) -> str:
+    marker = " Avoid:"
+    if marker in prompt:
+        before, after = prompt.split(marker, 1)
+        return f"{before.rstrip()} {directive}{marker}{after}"
+    return f"{prompt.rstrip()} {directive}"
+
+
+def camera_caption_text(parsed: dict[str, Any]) -> str:
+    return camera_policy.camera_caption_text(parsed)
+
+
+def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str:
+    return scene_camera_adapters.coworking_composition_prompt(scene_text, composition, subject_kind)
+
+
+def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict[str, Any]:
+    scene_text = row.get("scene_text") or row.get("source_scene_text") or row.get("scene")
+    old_composition = str(row.get("composition") or "").strip()
+    new_composition = coworking_composition_prompt(scene_text, old_composition, subject_kind)
+    if not old_composition or new_composition == old_composition:
+        return row
+    row["source_composition"] = row.get("source_composition") or old_composition
+    row["composition"] = new_composition
+    row["composition_prompt"] = composition_prompt(new_composition)
+    prompt = str(row.get("prompt") or "")
+    replacements = (
+        (f"Composition: vertical {old_composition}.", f"Composition: {composition_prompt(new_composition)}."),
+        (f"Composition: {old_composition}.", f"Composition: {composition_prompt(new_composition)}."),
+        (f"Framed as {old_composition}.", f"Framed as {new_composition}."),
+    )
+    for old_fragment, new_fragment in replacements:
+        if old_fragment in prompt:
+            row["prompt"] = prompt.replace(old_fragment, new_fragment)
+            break
+    row["caption"] = str(row.get("caption") or "").replace(f", {old_composition},", f", {new_composition},")
+    return row
+
+
+def camera_scene_directive_for_context(
+    scene_text: Any,
+    composition: Any,
+    camera_config: str | dict[str, Any] | None,
+    pov_labels: list[str] | None = None,
+    subject_kind: str = "subjects",
+    compact_labels: Mapping[str, str] | None = None,
+) -> tuple[str, dict[str, Any]]:
+    parsed = camera_policy.parse_camera_config(camera_config)
+    directive = scene_camera_adapters.camera_scene_directive_for_context(
+        scene_text,
+        parsed,
+        pov_labels,
+        subject_kind,
+        compact_labels,
+    )
+    return directive, parsed
+
+
+def row_camera_subject_kind(row: dict[str, Any]) -> str:
+    subject_type = str(row.get("subject_type") or row.get("primary_subject") or "").lower()
+    if subject_type in ("woman", "adult woman") or subject_type == "single_any":
+        return "woman"
+    if subject_type in ("man", "adult man"):
+        return "man"
+    try:
+        women_count = int(row.get("women_count") or 0)
+        men_count = int(row.get("men_count") or 0)
+    except (TypeError, ValueError):
+        women_count = men_count = 0
+    if women_count == 1 and men_count == 0:
+        return "woman"
+    if women_count == 0 and men_count == 1:
+        return "man"
+    if women_count + men_count == 2:
+        return "couple"
+    return "subjects"
+
+
+def row_pov_labels(row: dict[str, Any], resolver: PovLabelResolver | None = None) -> list[str]:
+    resolved: list[str] = []
+    if resolver is not None:
+        resolved = [str(label) for label in _list_from(resolver(row)) if str(label).strip()]
+    if resolved:
+        return resolved
+    return [str(label) for label in _list_from(row.get("pov_character_labels")) if str(label).strip()]
+
+
+def apply_camera_config(
+    row: dict[str, Any],
+    camera_config: str | dict[str, Any] | None,
+    *,
+    pov_label_resolver: PovLabelResolver | None = None,
+    compact_labels: Mapping[str, str] | None = None,
+) -> dict[str, Any]:
+    directive, parsed = camera_policy.camera_directive(camera_config)
+    pov_labels = row_pov_labels(row, pov_label_resolver)
+    subject_kind = row_camera_subject_kind(row)
+    row = apply_contextual_composition(row, subject_kind)
+    scene_directive, parsed = camera_scene_directive_for_context(
+        row.get("scene_text") or row.get("source_scene_text") or row.get("scene"),
+        row.get("composition") or row.get("source_composition"),
+        parsed,
+        pov_labels,
+        subject_kind,
+        compact_labels,
+    )
+    row["camera_config"] = parsed
+    row["camera_scene_directive"] = scene_directive
+    row["camera_directive"] = "" if pov_labels else directive
+    combined_directive = " ".join(part for part in (scene_directive, row["camera_directive"]) if part)
+    if not combined_directive:
+        return row
+    row["prompt"] = insert_positive_directive(str(row.get("prompt") or ""), combined_directive)
+    caption = camera_caption_text(parsed)
+    if caption and not pov_labels:
+        row["caption"] = f"{row.get('caption', '').rstrip()}, {caption}"
+    return row
@@ -44,6 +44,7 @@ import loop_nodes  # noqa: E402
 import prompt_builder as pb  # noqa: E402
 import row_normalization  # noqa: E402
 import route_metadata  # noqa: E402
+import row_camera  # noqa: E402
 import server_routes  # noqa: E402
 import sdxl_formatter  # noqa: E402
 import sdxl_presets  # noqa: E402
@@ -466,6 +467,34 @@ def smoke_camera_scene_single() -> None:
    _expect_formatter_outputs(row, "camera_scene_single", target="single")


+def smoke_row_camera_policy() -> None:
+    row = {
+        "prompt": "A generated adult prompt. Composition: vertical office-lobby walking composition. Avoid: low quality.",
+        "caption": "sxcppnl7, generated adult prompt, office-lobby walking composition, illustration",
+        "scene_text": "coworking lounge with tall windows, warm desks, and a polished outfit-check angle",
+        "composition": "office-lobby walking composition",
+        "subject_type": "configured_cast",
+        "women_count": 1,
+        "men_count": 1,
+        "pov_character_labels": ["Man A"],
+    }
+    updated = row_camera.apply_camera_config(
+        row,
+        _orbit_camera(horizontal_angle=45, vertical_angle=0, zoom=5.5),
+        compact_labels=pb.CAMERA_COMPACT_LABELS,
+    )
+    _expect(updated.get("camera_directive") == "", "POV row camera policy should suppress normal camera directive")
+    scene_directive = _expect_text("row_camera_policy.camera_scene_directive", updated.get("camera_scene_directive"), 40)
+    _expect("Coworking camera layout from POV" in scene_directive, "row camera policy missed POV coworking layout")
+    _expect("first-person spatial geometry" in scene_directive, "row camera policy lost POV geometry instruction")
+    _expect("Camera:" not in updated.get("prompt", ""), "row camera policy should not add normal Camera label")
+    _expect("45-degree front-right quarter view" not in updated.get("caption", ""), "POV row camera policy should not append camera caption")
+    _expect(
+        "coworking lounge frame with the couple near a desk edge" in updated.get("composition", ""),
+        "row camera policy did not adapt coworking composition for couple rows",
+    )
+
+
 def smoke_config_route_location_theme() -> None:
    location_config, composition_config = _classical_library_theme_configs()
    row = pb.build_prompt_from_configs(
@@ -3380,6 +3409,7 @@ def smoke_node_profile_filter_registration() -> None:
 SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [
    ("builtin_single_woman", smoke_builtin_single),
    ("camera_scene_single", smoke_camera_scene_single),
+    ("row_camera_policy", smoke_row_camera_policy),
    ("config_route_location_theme", smoke_config_route_location_theme),
    ("location_config_policy", smoke_location_config_policy),
    ("category_cast_config_policy", smoke_category_cast_config_policy),