diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md index 706395e..4fd81f5 100644 --- a/docs/prompt-architecture-improvement-plan.md +++ b/docs/prompt-architecture-improvement-plan.md @@ -116,8 +116,7 @@ Keep here: Move or isolate later: -- pair assembly and camera mutation helpers that still live in - `prompt_builder.py`. +- pair assembly helpers that still live in `prompt_builder.py`. Already isolated: @@ -179,8 +178,9 @@ Already isolated: side-lying, and front/back group layouts. - camera option schema, orbit/Qwen translation, config parsing, camera directive text, and camera caption text live in `camera_config.py`; - camera-scene prose and coworking composition adaptation live in - `scene_camera_adapters.py`; `prompt_builder.py` still owns row mutation. + camera-scene prose lives in `scene_camera_adapters.py`; row-level camera + insertion, contextual coworking composition mutation, subject-kind detection, + and POV suppression live in `row_camera.py`. - shared hardcore environment-anchor cleanup lives in `hardcore_text_cleanup.py` and normalizes malformed pool joins before metadata reaches formatter routes. @@ -528,6 +528,8 @@ Near-term: `/home/ethanfel/.codex/memories/scene-camera-system.md` when editing POV. - Keep `scene_camera_adapters.py` as the owner for location-aware camera prose; add new location families there one at a time. +- Keep `row_camera.py` as the owner for inserting camera/scene directives into + generated rows, including POV suppression of normal third-person camera text. Medium-term: diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index 542974b..363b2d7 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -94,6 +94,7 @@ Core helper ownership: | `hardcore_action_metadata.py` | Source action-family and position-family metadata used by Krea2, SDXL, and caption routes. | | `route_metadata.py` | Shared row-level route metadata readers for normalized action family, position family/keys, and formatter hints used by Krea2, SDXL, and caption routes. | | `scene_camera_adapters.py` | Location-aware camera/scene prose such as coworking lounge camera layout. | +| `row_camera.py` | Row-level camera insertion, contextual coworking composition mutation, subject-kind detection, POV label fallback, and POV suppression of normal camera directives. | | `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. | | `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup. | | `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, embedded soft/hard row output and side-metadata synchronization, and embedded row sanitation. | @@ -911,8 +912,8 @@ Use these traces to narrow a problem in one pass. 1. Check `scene_text` and `composition` separately. 2. If scene is good and composition is bad, edit composition pools, not location pools. -3. If a scene-camera adapter rewrote composition, inspect - `scene_camera_adapters.py`. +3. If a scene-camera adapter rewrote composition, inspect `row_camera.py` first + for row mutation and `scene_camera_adapters.py` for location-specific prose. 4. If the issue comes from `Location Theme`, edit `location_config.py` / `THEMATIC_LOCATION_PRESETS`. ### Trigger missing after formatting diff --git a/prompt_builder.py b/prompt_builder.py index eabcac4..8d213d2 100644 --- a/prompt_builder.py +++ b/prompt_builder.py @@ -40,7 +40,7 @@ try: from . import pair_rows from . import pair_options from . import row_normalization as row_policy - from . import scene_camera_adapters + from . import row_camera as row_camera_policy from . import seed_config as seed_policy from .hardcore_text_cleanup import ( sanitize_hardcore_axis_values as _sanitize_hardcore_axis_values, @@ -81,7 +81,7 @@ except ImportError: # Allows local smoke tests with `python -c`. import pair_rows import pair_options import row_normalization as row_policy - import scene_camera_adapters + import row_camera as row_camera_policy import seed_config as seed_policy from hardcore_text_cleanup import ( sanitize_hardcore_axis_values as _sanitize_hardcore_axis_values, @@ -1699,42 +1699,19 @@ def _camera_directive(camera_config: str | dict[str, Any] | None) -> tuple[str, def _insert_positive_directive(prompt: str, directive: str) -> str: - marker = " Avoid:" - if marker in prompt: - before, after = prompt.split(marker, 1) - return f"{before.rstrip()} {directive}{marker}{after}" - return f"{prompt.rstrip()} {directive}" + return row_camera_policy.insert_positive_directive(prompt, directive) def _camera_caption_text(parsed: dict[str, Any]) -> str: - return camera_policy.camera_caption_text(parsed) + return row_camera_policy.camera_caption_text(parsed) def _coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str: - return scene_camera_adapters.coworking_composition_prompt(scene_text, composition, subject_kind) + return row_camera_policy.coworking_composition_prompt(scene_text, composition, subject_kind) def _apply_coworking_composition(row: dict[str, Any], subject_kind: str) -> dict[str, Any]: - scene_text = row.get("scene_text") or row.get("source_scene_text") or row.get("scene") - old_composition = str(row.get("composition") or "").strip() - new_composition = _coworking_composition_prompt(scene_text, old_composition, subject_kind) - if not old_composition or new_composition == old_composition: - return row - row["source_composition"] = row.get("source_composition") or old_composition - row["composition"] = new_composition - row["composition_prompt"] = _composition_prompt(new_composition) - prompt = str(row.get("prompt") or "") - replacements = ( - (f"Composition: vertical {old_composition}.", f"Composition: {_composition_prompt(new_composition)}."), - (f"Composition: {old_composition}.", f"Composition: {_composition_prompt(new_composition)}."), - (f"Framed as {old_composition}.", f"Framed as {new_composition}."), - ) - for old_fragment, new_fragment in replacements: - if old_fragment in prompt: - row["prompt"] = prompt.replace(old_fragment, new_fragment) - break - row["caption"] = str(row.get("caption") or "").replace(f", {old_composition},", f", {new_composition},") - return row + return row_camera_policy.apply_contextual_composition(row, subject_kind) def _camera_scene_directive_for_context( @@ -1744,10 +1721,10 @@ def _camera_scene_directive_for_context( pov_labels: list[str] | None = None, subject_kind: str = "subjects", ) -> tuple[str, dict[str, Any]]: - parsed = _parse_camera_config(camera_config) - directive = scene_camera_adapters.camera_scene_directive_for_context( + directive, parsed = row_camera_policy.camera_scene_directive_for_context( scene_text, - parsed, + composition, + camera_config, pov_labels, subject_kind, CAMERA_COMPACT_LABELS, @@ -1756,53 +1733,23 @@ def _camera_scene_directive_for_context( def _row_camera_subject_kind(row: dict[str, Any]) -> str: - subject_type = str(row.get("subject_type") or row.get("primary_subject") or "").lower() - if subject_type in ("woman", "adult woman") or subject_type == "single_any": - return "woman" - if subject_type in ("man", "adult man"): - return "man" - try: - women_count = int(row.get("women_count") or 0) - men_count = int(row.get("men_count") or 0) - except (TypeError, ValueError): - women_count = men_count = 0 - if women_count == 1 and men_count == 0: - return "woman" - if women_count == 0 and men_count == 1: - return "man" - if women_count + men_count == 2: - return "couple" - return "subjects" + return row_camera_policy.row_camera_subject_kind(row) -def _apply_camera_config(row: dict[str, Any], camera_config: str | dict[str, Any] | None) -> dict[str, Any]: - directive, parsed = _camera_directive(camera_config) - pov_labels = _pov_character_labels( +def _camera_pov_labels_for_row(row: dict[str, Any]) -> list[str]: + return _pov_character_labels( _character_slot_label_map(_parse_character_cast(row.get("character_cast_slots"))), int(row.get("men_count") or 0) if str(row.get("men_count") or "").isdigit() else 0, ) - if not pov_labels: - pov_labels = [str(label) for label in _list_from(row.get("pov_character_labels")) if str(label).strip()] - subject_kind = _row_camera_subject_kind(row) - row = _apply_coworking_composition(row, subject_kind) - scene_directive, parsed = _camera_scene_directive_for_context( - row.get("scene_text") or row.get("source_scene_text") or row.get("scene"), - row.get("composition") or row.get("source_composition"), - parsed, - pov_labels, - subject_kind, + + +def _apply_camera_config(row: dict[str, Any], camera_config: str | dict[str, Any] | None) -> dict[str, Any]: + return row_camera_policy.apply_camera_config( + row, + camera_config, + pov_label_resolver=_camera_pov_labels_for_row, + compact_labels=CAMERA_COMPACT_LABELS, ) - row["camera_config"] = parsed - row["camera_scene_directive"] = scene_directive - row["camera_directive"] = "" if pov_labels else directive - combined_directive = " ".join(part for part in (scene_directive, row["camera_directive"]) if part) - if not combined_directive: - return row - row["prompt"] = _insert_positive_directive(row["prompt"], combined_directive) - camera_caption = _camera_caption_text(parsed) - if camera_caption and not pov_labels: - row["caption"] = f"{row.get('caption', '').rstrip()}, {camera_caption}" - return row def _row_seed(seed: int, row_number: int, salt: int = 0) -> int: @@ -3168,13 +3115,7 @@ def _apply_character_profile_to_context( def _composition_prompt(composition: str) -> str: - composition = str(composition or "").strip() - if not composition: - return composition - lower = composition.lower() - if lower.startswith("vertical ") or " vertical " in lower or lower.endswith(" vertical"): - return composition - return f"vertical {composition}" + return row_camera_policy.composition_prompt(composition) def _appearance_for_subject( diff --git a/row_camera.py b/row_camera.py new file mode 100644 index 0000000..a4c6ca8 --- /dev/null +++ b/row_camera.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +from typing import Any, Callable, Mapping + +try: + from . import camera_config as camera_policy + from . import scene_camera_adapters +except ImportError: # Allows local smoke tests with top-level imports. + import camera_config as camera_policy + import scene_camera_adapters + + +PovLabelResolver = Callable[[dict[str, Any]], list[str]] + + +def _list_from(value: Any) -> list[Any]: + if value is None: + return [] + if isinstance(value, list): + return value + return [value] + + +def composition_prompt(composition: Any) -> str: + composition = str(composition or "").strip() + if not composition: + return composition + lower = composition.lower() + if lower.startswith("vertical ") or " vertical " in lower or lower.endswith(" vertical"): + return composition + return f"vertical {composition}" + + +def insert_positive_directive(prompt: str, directive: str) -> str: + marker = " Avoid:" + if marker in prompt: + before, after = prompt.split(marker, 1) + return f"{before.rstrip()} {directive}{marker}{after}" + return f"{prompt.rstrip()} {directive}" + + +def camera_caption_text(parsed: dict[str, Any]) -> str: + return camera_policy.camera_caption_text(parsed) + + +def coworking_composition_prompt(scene_text: Any, composition: Any, subject_kind: str = "subjects") -> str: + return scene_camera_adapters.coworking_composition_prompt(scene_text, composition, subject_kind) + + +def apply_contextual_composition(row: dict[str, Any], subject_kind: str) -> dict[str, Any]: + scene_text = row.get("scene_text") or row.get("source_scene_text") or row.get("scene") + old_composition = str(row.get("composition") or "").strip() + new_composition = coworking_composition_prompt(scene_text, old_composition, subject_kind) + if not old_composition or new_composition == old_composition: + return row + row["source_composition"] = row.get("source_composition") or old_composition + row["composition"] = new_composition + row["composition_prompt"] = composition_prompt(new_composition) + prompt = str(row.get("prompt") or "") + replacements = ( + (f"Composition: vertical {old_composition}.", f"Composition: {composition_prompt(new_composition)}."), + (f"Composition: {old_composition}.", f"Composition: {composition_prompt(new_composition)}."), + (f"Framed as {old_composition}.", f"Framed as {new_composition}."), + ) + for old_fragment, new_fragment in replacements: + if old_fragment in prompt: + row["prompt"] = prompt.replace(old_fragment, new_fragment) + break + row["caption"] = str(row.get("caption") or "").replace(f", {old_composition},", f", {new_composition},") + return row + + +def camera_scene_directive_for_context( + scene_text: Any, + composition: Any, + camera_config: str | dict[str, Any] | None, + pov_labels: list[str] | None = None, + subject_kind: str = "subjects", + compact_labels: Mapping[str, str] | None = None, +) -> tuple[str, dict[str, Any]]: + parsed = camera_policy.parse_camera_config(camera_config) + directive = scene_camera_adapters.camera_scene_directive_for_context( + scene_text, + parsed, + pov_labels, + subject_kind, + compact_labels, + ) + return directive, parsed + + +def row_camera_subject_kind(row: dict[str, Any]) -> str: + subject_type = str(row.get("subject_type") or row.get("primary_subject") or "").lower() + if subject_type in ("woman", "adult woman") or subject_type == "single_any": + return "woman" + if subject_type in ("man", "adult man"): + return "man" + try: + women_count = int(row.get("women_count") or 0) + men_count = int(row.get("men_count") or 0) + except (TypeError, ValueError): + women_count = men_count = 0 + if women_count == 1 and men_count == 0: + return "woman" + if women_count == 0 and men_count == 1: + return "man" + if women_count + men_count == 2: + return "couple" + return "subjects" + + +def row_pov_labels(row: dict[str, Any], resolver: PovLabelResolver | None = None) -> list[str]: + resolved: list[str] = [] + if resolver is not None: + resolved = [str(label) for label in _list_from(resolver(row)) if str(label).strip()] + if resolved: + return resolved + return [str(label) for label in _list_from(row.get("pov_character_labels")) if str(label).strip()] + + +def apply_camera_config( + row: dict[str, Any], + camera_config: str | dict[str, Any] | None, + *, + pov_label_resolver: PovLabelResolver | None = None, + compact_labels: Mapping[str, str] | None = None, +) -> dict[str, Any]: + directive, parsed = camera_policy.camera_directive(camera_config) + pov_labels = row_pov_labels(row, pov_label_resolver) + subject_kind = row_camera_subject_kind(row) + row = apply_contextual_composition(row, subject_kind) + scene_directive, parsed = camera_scene_directive_for_context( + row.get("scene_text") or row.get("source_scene_text") or row.get("scene"), + row.get("composition") or row.get("source_composition"), + parsed, + pov_labels, + subject_kind, + compact_labels, + ) + row["camera_config"] = parsed + row["camera_scene_directive"] = scene_directive + row["camera_directive"] = "" if pov_labels else directive + combined_directive = " ".join(part for part in (scene_directive, row["camera_directive"]) if part) + if not combined_directive: + return row + row["prompt"] = insert_positive_directive(str(row.get("prompt") or ""), combined_directive) + caption = camera_caption_text(parsed) + if caption and not pov_labels: + row["caption"] = f"{row.get('caption', '').rstrip()}, {caption}" + return row diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index 0eafd0a..97da7c7 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -44,6 +44,7 @@ import loop_nodes # noqa: E402 import prompt_builder as pb # noqa: E402 import row_normalization # noqa: E402 import route_metadata # noqa: E402 +import row_camera # noqa: E402 import server_routes # noqa: E402 import sdxl_formatter # noqa: E402 import sdxl_presets # noqa: E402 @@ -466,6 +467,34 @@ def smoke_camera_scene_single() -> None: _expect_formatter_outputs(row, "camera_scene_single", target="single") +def smoke_row_camera_policy() -> None: + row = { + "prompt": "A generated adult prompt. Composition: vertical office-lobby walking composition. Avoid: low quality.", + "caption": "sxcppnl7, generated adult prompt, office-lobby walking composition, illustration", + "scene_text": "coworking lounge with tall windows, warm desks, and a polished outfit-check angle", + "composition": "office-lobby walking composition", + "subject_type": "configured_cast", + "women_count": 1, + "men_count": 1, + "pov_character_labels": ["Man A"], + } + updated = row_camera.apply_camera_config( + row, + _orbit_camera(horizontal_angle=45, vertical_angle=0, zoom=5.5), + compact_labels=pb.CAMERA_COMPACT_LABELS, + ) + _expect(updated.get("camera_directive") == "", "POV row camera policy should suppress normal camera directive") + scene_directive = _expect_text("row_camera_policy.camera_scene_directive", updated.get("camera_scene_directive"), 40) + _expect("Coworking camera layout from POV" in scene_directive, "row camera policy missed POV coworking layout") + _expect("first-person spatial geometry" in scene_directive, "row camera policy lost POV geometry instruction") + _expect("Camera:" not in updated.get("prompt", ""), "row camera policy should not add normal Camera label") + _expect("45-degree front-right quarter view" not in updated.get("caption", ""), "POV row camera policy should not append camera caption") + _expect( + "coworking lounge frame with the couple near a desk edge" in updated.get("composition", ""), + "row camera policy did not adapt coworking composition for couple rows", + ) + + def smoke_config_route_location_theme() -> None: location_config, composition_config = _classical_library_theme_configs() row = pb.build_prompt_from_configs( @@ -3380,6 +3409,7 @@ def smoke_node_profile_filter_registration() -> None: SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [ ("builtin_single_woman", smoke_builtin_single), ("camera_scene_single", smoke_camera_scene_single), + ("row_camera_policy", smoke_row_camera_policy), ("config_route_location_theme", smoke_config_route_location_theme), ("location_config_policy", smoke_location_config_policy), ("category_cast_config_policy", smoke_category_cast_config_policy),