diff --git a/caption_format_route.py b/caption_format_route.py new file mode 100644 index 0000000..49199c3 --- /dev/null +++ b/caption_format_route.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Callable + + +@dataclass(frozen=True) +class CaptionFormatRequest: + source_text: str + metadata_json: str = "" + input_hint: str = "auto" + trigger: str = "" + include_trigger: bool = True + detail_level: str = "balanced" + style_policy: str = "drop_style_tail" + caption_profile: str = "manual_controls" + + +@dataclass(frozen=True) +class CaptionFormatRoute: + caption: str + method: str + branch: str + input_hint: str + detail_level: str + style_policy: str + include_trigger: bool + keep_style: bool + + def as_tuple(self) -> tuple[str, str]: + return self.caption, self.method + + +@dataclass(frozen=True) +class CaptionFormatDependencies: + apply_caption_profile: Callable[[str, str, str, bool], tuple[str, str, bool]] + keep_style_terms: Callable[[str], bool] + row_from_inputs: Callable[[str, str, str], tuple[dict[str, Any] | None, str]] + metadata_to_prose: Callable[[dict[str, Any], str, bool], tuple[str, str]] + text_to_prose: Callable[[str, str, bool], tuple[str, str]] + with_trigger: Callable[[str, str, bool], str] + sanitize_prose_text: Callable[..., str] + + +def naturalize_caption_result( + request: CaptionFormatRequest, + deps: CaptionFormatDependencies, +) -> CaptionFormatRoute: + input_hint = request.input_hint if request.input_hint in ("auto", "metadata_json", "caption_or_prompt") else "auto" + detail_level, style_policy, include_trigger = deps.apply_caption_profile( + request.caption_profile, + request.detail_level, + request.style_policy, + request.include_trigger, + ) + keep_style = deps.keep_style_terms(style_policy) + row, row_method = deps.row_from_inputs(request.source_text, request.metadata_json, input_hint) + if row is not None: + prose, method = deps.metadata_to_prose(row, detail_level, keep_style) + caption = deps.sanitize_prose_text( + deps.with_trigger(prose, request.trigger, include_trigger), + triggers=(request.trigger,), + ) + full_method = f"{row_method}:{method}" + return CaptionFormatRoute( + caption=caption, + method=full_method, + branch="metadata", + input_hint=input_hint, + detail_level=detail_level, + style_policy=style_policy, + include_trigger=include_trigger, + keep_style=keep_style, + ) + + prose, method = deps.text_to_prose(request.source_text, detail_level, keep_style) + caption = deps.sanitize_prose_text( + deps.with_trigger(prose, request.trigger, include_trigger), + triggers=(request.trigger,), + ) + return CaptionFormatRoute( + caption=caption, + method=method, + branch="text", + input_hint=input_hint, + detail_level=detail_level, + style_policy=style_policy, + include_trigger=include_trigger, + keep_style=keep_style, + ) + + +def naturalize_caption( + request: CaptionFormatRequest, + deps: CaptionFormatDependencies, +) -> tuple[str, str]: + return naturalize_caption_result(request, deps).as_tuple() diff --git a/caption_naturalizer.py b/caption_naturalizer.py index 5b52960..066972d 100644 --- a/caption_naturalizer.py +++ b/caption_naturalizer.py @@ -3,12 +3,14 @@ from __future__ import annotations from typing import Any try: + from . import caption_format_route from . import caption_metadata_routes from . import caption_policy from . import caption_text_policy from . import formatter_input as input_policy from .prompt_hygiene import sanitize_prose_text except ImportError: # Allows local smoke tests with `python -c`. + import caption_format_route import caption_metadata_routes import caption_policy import caption_text_policy @@ -318,6 +320,23 @@ def _text_to_prose(text: str, detail_level: str, keep_style: bool) -> tuple[str, return prose or _sentence(text), "text(fallback)" +def _caption_format_dependencies() -> caption_format_route.CaptionFormatDependencies: + return caption_format_route.CaptionFormatDependencies( + apply_caption_profile=lambda profile, detail, style, include: caption_policy.apply_caption_profile( + profile, + detail_level=detail, + style_policy=style, + include_trigger=include, + ), + keep_style_terms=caption_policy.keep_style_terms, + row_from_inputs=_row_from_inputs, + metadata_to_prose=_metadata_to_prose, + text_to_prose=_text_to_prose, + with_trigger=_with_trigger, + sanitize_prose_text=sanitize_prose_text, + ) + + def naturalize_caption( source_text: str, metadata_json: str = "", @@ -329,19 +348,16 @@ def naturalize_caption( caption_profile: str = caption_policy.CAPTION_PROFILE_DEFAULT, ) -> tuple[str, str]: """Rewrite tag-style prompt/caption text into compact natural language.""" - input_hint = input_hint if input_hint in ("auto", "metadata_json", "caption_or_prompt") else "auto" - detail_level, style_policy, include_trigger = caption_policy.apply_caption_profile( - caption_profile, - detail_level=detail_level, - style_policy=style_policy, - include_trigger=include_trigger, + return caption_format_route.naturalize_caption( + caption_format_route.CaptionFormatRequest( + source_text=source_text, + metadata_json=metadata_json, + input_hint=input_hint, + trigger=trigger, + include_trigger=include_trigger, + detail_level=detail_level, + style_policy=style_policy, + caption_profile=caption_profile, + ), + _caption_format_dependencies(), ) - keep_style = caption_policy.keep_style_terms(style_policy) - row, row_method = _row_from_inputs(source_text, metadata_json, input_hint) - if row is not None: - prose, method = _metadata_to_prose(row, detail_level, keep_style) - caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,)) - return caption, f"{row_method}:{method}" - prose, method = _text_to_prose(source_text, detail_level, keep_style) - caption = sanitize_prose_text(_with_trigger(prose, trigger, include_trigger), triggers=(trigger,)) - return caption, method diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md index 80b5239..3772822 100644 --- a/docs/prompt-architecture-improvement-plan.md +++ b/docs/prompt-architecture-improvement-plan.md @@ -453,6 +453,10 @@ Keep here: Already isolated: +- `caption_format_route.py` owns top-level caption dispatch, including input + hint normalization, caption profile application, metadata-vs-text branching, + trigger wrapping, final prose hygiene, and method/output shape; + `caption_naturalizer.py` keeps the public wrapper. - `caption_metadata_routes.py` owns metadata row natural-language assembly for single, couple, configured-cast, group/layout, and Insta/OF pair routes behind `CaptionMetadataRouteRequest`, `CaptionMetadataRouteDependencies`, and diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index 3231091..3ac2be9 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -65,7 +65,7 @@ call the same core generation functions. | `SxCP Insta/OF Prompt Pair` | `build_insta_of_pair` | Builds a softcore row and hardcore row with shared cast/continuity options. | | `SxCP Krea2 Formatter` | `format_krea2_prompt` -> `krea_format_route.py` | Converts metadata rows or pair metadata into Krea2-friendly prose. | | `SxCP SDXL Formatter` | `format_sdxl_prompt` -> `sdxl_format_route.py` | Converts metadata rows or pair metadata into SDXL/tag style prompts. | -| `SxCP Caption Naturalizer` | `naturalize_caption` | Converts rows into more natural sentence captions. | +| `SxCP Caption Naturalizer` | `naturalize_caption` -> `caption_format_route.py` | Converts rows into more natural sentence captions. | Core helper ownership: @@ -132,6 +132,7 @@ Core helper ownership: | `sdxl_presets.py` | SDXL formatter profiles, style presets, quality presets, default negative prompt, and metadata-family tag hints used by the SDXL formatter and node choice lists. | | `sdxl_format_route.py` | Top-level SDXL dispatch, formatter profile application, target and nude-weight normalization, metadata-vs-text input selection, single-vs-pair branching, final prompt/negative output shape, and fallback routing. | | `sdxl_tag_policy.py` | SDXL tag splitting, tag-key dedupe, count inference, character descriptor tags, metadata-family/camera/explicit helper tags, and route dependency assembly used by `sdxl_formatter.py` and `sdxl_tag_routes.py`. | +| `caption_format_route.py` | Top-level caption dispatch, input-hint normalization, caption profile application, metadata-vs-text branching, trigger wrapping, final prose hygiene, and method/output shape. | | `caption_policy.py` | Caption naturalizer policy data and helpers: caption profiles, style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. | | `caption_text_policy.py` | Caption sentence helpers, trigger wrapping, formatter-hint append, row-value fallback wrappers, cast text wrappers, single-caption front parsing, and metadata-route dependency assembly used by `caption_naturalizer.py` and `caption_metadata_routes.py`. | diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index 711c367..be93b6a 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -25,6 +25,7 @@ if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) import caption_naturalizer # noqa: E402 +import caption_format_route # noqa: E402 import caption_metadata_routes # noqa: E402 import caption_policy # noqa: E402 import caption_text_policy # noqa: E402 @@ -2532,6 +2533,77 @@ def smoke_caption_policy() -> None: _expect(browsing_method == "text(fallback)", "Caption browsing profile changed fallback method") +def smoke_caption_format_route_policy() -> None: + row = _prompt_row( + name="caption_format_route_single", + category="woman", + subcategory="random", + seed=3801, + men_count=0, + ) + metadata_request = caption_format_route.CaptionFormatRequest( + source_text="", + metadata_json=_json(row), + input_hint="metadata_json", + trigger=Trigger, + include_trigger=False, + detail_level="concise", + style_policy="keep_style_terms", + caption_profile="training_dense", + ) + typed_metadata = caption_format_route.naturalize_caption_result( + metadata_request, + caption_naturalizer._caption_format_dependencies(), + ) + public_metadata = caption_naturalizer.naturalize_caption( + "", + metadata_json=metadata_request.metadata_json, + input_hint=metadata_request.input_hint, + trigger=metadata_request.trigger, + include_trigger=metadata_request.include_trigger, + detail_level=metadata_request.detail_level, + style_policy=metadata_request.style_policy, + caption_profile=metadata_request.caption_profile, + ) + _expect(typed_metadata.as_tuple() == public_metadata, "Typed caption format route should match public metadata output") + _expect(typed_metadata.branch == "metadata", "Typed caption format route changed metadata branch") + _expect(typed_metadata.input_hint == "metadata_json", "Typed caption route lost input hint") + _expect(typed_metadata.detail_level == "dense", "Typed caption route lost training_dense detail override") + _expect(typed_metadata.style_policy == "drop_style_tail", "Typed caption route lost training_dense style override") + _expect(typed_metadata.include_trigger is True, "Typed caption route lost training_dense trigger override") + _expect(typed_metadata.caption.startswith(Trigger), "Typed caption metadata route should prepend training trigger") + + fallback_request = caption_format_route.CaptionFormatRequest( + source_text="woman, red dress, studio, coloured pencil comic illustration", + input_hint="bad_hint", + trigger=Trigger, + include_trigger=True, + detail_level="dense", + style_policy="drop_style_tail", + caption_profile="browsing", + ) + typed_fallback = caption_format_route.naturalize_caption_result( + fallback_request, + caption_naturalizer._caption_format_dependencies(), + ) + public_fallback = caption_naturalizer.naturalize_caption( + fallback_request.source_text, + input_hint=fallback_request.input_hint, + trigger=fallback_request.trigger, + include_trigger=fallback_request.include_trigger, + detail_level=fallback_request.detail_level, + style_policy=fallback_request.style_policy, + caption_profile=fallback_request.caption_profile, + ) + _expect(typed_fallback.as_tuple() == public_fallback, "Typed caption format route should match public fallback output") + _expect(typed_fallback.branch == "text", "Typed caption format route changed fallback branch") + _expect(typed_fallback.input_hint == "auto", "Typed caption route should normalize invalid input hint") + _expect(typed_fallback.include_trigger is False, "Typed caption browsing profile should disable trigger") + _expect(typed_fallback.keep_style is True, "Typed caption browsing profile should keep style terms") + _expect(not typed_fallback.caption.startswith(Trigger), "Typed caption fallback route should not prepend browsing trigger") + _expect(typed_fallback.method == "text(fallback)", "Typed caption fallback method changed") + + def smoke_caption_text_policy() -> None: row = { "primary_subject": "woman", @@ -5746,6 +5818,7 @@ SMOKE_CASES: list[tuple[str, Callable[[], None]]] = [ ("krea_format_route_policy", smoke_krea_format_route_policy), ("formatter_cast_policy", smoke_formatter_cast_policy), ("caption_policy", smoke_caption_policy), + ("caption_format_route_policy", smoke_caption_format_route_policy), ("caption_text_policy", smoke_caption_text_policy), ("caption_metadata_routes", smoke_caption_metadata_routes), ("sdxl_presets_policy", smoke_sdxl_presets_policy),