From 21da2949c611d1e87a0d07af12ff2103264b937d Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sat, 27 Jun 2026 01:43:48 +0200 Subject: [PATCH] Add caption naturalizer profiles --- README.md | 3 ++ __init__.py | 1 + caption_naturalizer.py | 8 +++- caption_policy.py | 43 ++++++++++++++++++++ docs/prompt-architecture-improvement-plan.md | 7 +++- docs/prompt-pool-routing-map.md | 2 +- node_formatter.py | 5 +++ tools/prompt_smoke.py | 36 ++++++++++++++++ 8 files changed, 101 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7921fb9..3d73cf0 100644 --- a/README.md +++ b/README.md @@ -436,6 +436,9 @@ pool and intensity settings. Naturalizer controls: - `input_hint`: `auto`, `metadata_json`, or `caption_or_prompt`. +- `caption_profile`: `manual_controls` keeps the detail/style/trigger widgets + authoritative; `training_concise`, `training_dense`, and `browsing` apply + preset caption behavior. - `detail_level`: `concise`, `balanced`, or `dense`. - `style_policy`: `drop_style_tail` removes old fixed style tails; `keep_style_terms` keeps style descriptions in the rewritten text. diff --git a/__init__.py b/__init__.py index 8a8fa35..78903e1 100644 --- a/__init__.py +++ b/__init__.py @@ -309,6 +309,7 @@ NODE_INPUT_TOOLTIPS = { }, "SxCPCaptionNaturalizer": { "metadata_json": "Best input for training captions because it preserves structured generator details.", + "caption_profile": "Preset behavior for the caption rewrite. manual_controls keeps detail/style/include-trigger widgets authoritative.", "style_policy": "drop_style_tail removes generation/style boilerplate; keep_style_terms preserves more of it.", "include_trigger": "Keep this true for LoRA/training captions so the trigger token is learned.", }, diff --git a/caption_naturalizer.py b/caption_naturalizer.py index 56be80f..c418e3a 100644 --- a/caption_naturalizer.py +++ b/caption_naturalizer.py @@ -627,10 +627,16 @@ def naturalize_caption( include_trigger: bool = True, detail_level: str = "balanced", style_policy: str = "drop_style_tail", + caption_profile: str = caption_policy.CAPTION_PROFILE_DEFAULT, ) -> tuple[str, str]: """Rewrite tag-style prompt/caption text into compact natural language.""" input_hint = input_hint if input_hint in ("auto", "metadata_json", "caption_or_prompt") else "auto" - detail_level = caption_policy.normalize_detail_level(detail_level) + detail_level, style_policy, include_trigger = caption_policy.apply_caption_profile( + caption_profile, + detail_level=detail_level, + style_policy=style_policy, + include_trigger=include_trigger, + ) keep_style = caption_policy.keep_style_terms(style_policy) row, row_method = _row_from_inputs(source_text, metadata_json, input_hint) if row is not None: diff --git a/caption_policy.py b/caption_policy.py index afb12d3..6ef65c3 100644 --- a/caption_policy.py +++ b/caption_policy.py @@ -16,6 +16,26 @@ DEFAULT_TRIGGER = "sxcppnl7" DETAIL_LEVELS = ("balanced", "concise", "dense") STYLE_POLICIES = ("drop_style_tail", "keep_style_terms") +CAPTION_PROFILE_DEFAULT = "manual_controls" + +CAPTION_PROFILES = { + "manual_controls": {}, + "training_concise": { + "detail_level": "concise", + "style_policy": "drop_style_tail", + "include_trigger": True, + }, + "training_dense": { + "detail_level": "dense", + "style_policy": "drop_style_tail", + "include_trigger": True, + }, + "browsing": { + "detail_level": "balanced", + "style_policy": "keep_style_terms", + "include_trigger": False, + }, +} STYLE_TAILS = [ ", coloured pencil comic illustration, crisp linework, hatching, soft pastel palette, warm sensual lighting, textured parchment paper", @@ -59,6 +79,29 @@ def normalize_style_policy(value: str) -> str: return value if value in STYLE_POLICIES else "drop_style_tail" +def caption_profile_choices() -> list[str]: + return list(CAPTION_PROFILES) + + +def normalize_caption_profile(value: str) -> str: + return value if value in CAPTION_PROFILES else CAPTION_PROFILE_DEFAULT + + +def apply_caption_profile( + caption_profile: str, + *, + detail_level: str, + style_policy: str, + include_trigger: bool, +) -> tuple[str, str, bool]: + profile = CAPTION_PROFILES[normalize_caption_profile(caption_profile)] + return ( + normalize_detail_level(profile.get("detail_level", detail_level)), + normalize_style_policy(profile.get("style_policy", style_policy)), + bool(profile.get("include_trigger", include_trigger)), + ) + + def keep_style_terms(style_policy: str) -> bool: return normalize_style_policy(style_policy) == "keep_style_terms" diff --git a/docs/prompt-architecture-improvement-plan.md b/docs/prompt-architecture-improvement-plan.md index 70e15a9..644a7cc 100644 --- a/docs/prompt-architecture-improvement-plan.md +++ b/docs/prompt-architecture-improvement-plan.md @@ -309,10 +309,13 @@ Keep here: - shared cast descriptor parsing and label replacement from `krea_cast.py`. - caption detail-level/style-policy normalization, clothing cleanup, and composition cleanup from `caption_policy.py`. +- caption profiles for manual controls, concise training captions, dense + training captions, and browsing captions live in `caption_policy.py` and are + exposed by `SxCP Caption Naturalizer`. Improve later: - -- add a `caption_profile` option for concise/dense LoRA caption styles. +- add more caption profiles if a new training or browsing workflow needs a + distinct default. ### Category JSON Path diff --git a/docs/prompt-pool-routing-map.md b/docs/prompt-pool-routing-map.md index feb9f66..e658dbc 100644 --- a/docs/prompt-pool-routing-map.md +++ b/docs/prompt-pool-routing-map.md @@ -97,7 +97,7 @@ Core helper ownership: | `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, and embedded soft/hard row sanitation. | | `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. | | `sdxl_presets.py` | SDXL style presets, quality presets, default negative prompt, and metadata-family tag hints used by the SDXL formatter and node choice lists. | -| `caption_policy.py` | Caption naturalizer policy data and helpers: style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. | +| `caption_policy.py` | Caption naturalizer policy data and helpers: caption profiles, style tails, item labels, metadata-family caption labels, detail/style-policy normalization, clothing cleanup, and composition cleanup. | ## Node IO Map diff --git a/node_formatter.py b/node_formatter.py index 202c857..e32cfd5 100644 --- a/node_formatter.py +++ b/node_formatter.py @@ -2,10 +2,12 @@ from __future__ import annotations try: from .caption_naturalizer import naturalize_caption + from .caption_policy import caption_profile_choices from .krea_formatter import format_krea2_prompt from .sdxl_formatter import format_sdxl_prompt, sdxl_quality_preset_choices, sdxl_style_preset_choices except ImportError: # Allows local smoke tests from the repository root. from caption_naturalizer import naturalize_caption + from caption_policy import caption_profile_choices from krea_formatter import format_krea2_prompt from sdxl_formatter import format_sdxl_prompt, sdxl_quality_preset_choices, sdxl_style_preset_choices @@ -17,6 +19,7 @@ class SxCPCaptionNaturalizer: "required": { "source_text": ("STRING", {"default": "", "multiline": True}), "input_hint": (["auto", "metadata_json", "caption_or_prompt"], {"default": "auto"}), + "caption_profile": (caption_profile_choices(), {"default": "manual_controls"}), "detail_level": (["balanced", "concise", "dense"], {"default": "balanced"}), "style_policy": (["drop_style_tail", "keep_style_terms"], {"default": "drop_style_tail"}), "trigger": ("STRING", {"default": "sxcppnl7"}), @@ -37,6 +40,7 @@ class SxCPCaptionNaturalizer: self, source_text, input_hint, + caption_profile, detail_level, style_policy, trigger, @@ -53,6 +57,7 @@ class SxCPCaptionNaturalizer: include_trigger=include_trigger, detail_level=detail_level, style_policy=style_policy, + caption_profile=caption_profile, ) diff --git a/tools/prompt_smoke.py b/tools/prompt_smoke.py index 1a71ffc..33d1b99 100644 --- a/tools/prompt_smoke.py +++ b/tools/prompt_smoke.py @@ -969,6 +969,31 @@ def smoke_caption_policy() -> None: _expect(caption_policy.keep_style_terms("keep_style_terms") is True, "Caption style policy keep flag changed") _expect(caption_policy.detail_allows("concise") is False, "Caption concise detail gate changed") _expect(caption_policy.detail_allows("dense", dense_only=True) is True, "Caption dense-only gate changed") + _expect("training_concise" in caption_policy.caption_profile_choices(), "Caption profile choices lost training_concise") + _expect( + caption_policy.normalize_caption_profile("bad") == caption_policy.CAPTION_PROFILE_DEFAULT, + "Caption invalid profile fallback changed", + ) + _expect( + caption_policy.apply_caption_profile( + "training_dense", + detail_level="concise", + style_policy="keep_style_terms", + include_trigger=False, + ) + == ("dense", "drop_style_tail", True), + "Caption training_dense profile overrides changed", + ) + _expect( + caption_policy.apply_caption_profile( + "manual_controls", + detail_level="concise", + style_policy="keep_style_terms", + include_trigger=False, + ) + == ("concise", "keep_style_terms", False), + "Caption manual profile should preserve explicit controls", + ) style_tail = caption_policy.STYLE_TAILS[0] _expect( @@ -991,6 +1016,13 @@ def smoke_caption_policy() -> None: _expect(caption_policy.metadata_action_label(row) == "oral action", "Caption action-family label changed") row = {"action_family": "oral", "position_family": "anal"} _expect(caption_naturalizer._metadata_action_label(row) == "anal action", "Caption position-family label priority changed") + browsing_caption, browsing_method = caption_naturalizer.naturalize_caption( + "woman, red dress, studio", + caption_profile="browsing", + include_trigger=True, + ) + _expect(not browsing_caption.startswith(Trigger), "Caption browsing profile should disable trigger by default") + _expect(browsing_method == "text(fallback)", "Caption browsing profile changed fallback method") def smoke_sdxl_presets_policy() -> None: @@ -2763,6 +2795,7 @@ def smoke_node_formatter_registration() -> None: caption, caption_method = sxcp_nodes.NODE_CLASS_MAPPINGS["SxCPCaptionNaturalizer"]().build( "A woman standing by a window, best quality", "caption_or_prompt", + "manual_controls", "concise", "drop_style_tail", "sxcppnl7", @@ -2771,6 +2804,9 @@ def smoke_node_formatter_registration() -> None: _expect_text("node_formatter.caption", caption, 20) _expect(caption.startswith("sxcppnl7"), "Caption Naturalizer did not prepend trigger") _expect("text(" in caption_method, "Caption Naturalizer method changed unexpectedly") + caption_inputs = sxcp_nodes.NODE_CLASS_MAPPINGS["SxCPCaptionNaturalizer"].INPUT_TYPES().get("required") or {} + _expect("caption_profile" in caption_inputs, "Caption Naturalizer lost caption_profile input") + _expect("tooltip" in caption_inputs["caption_profile"][1], "Caption profile tooltip injection missing") krea_output = krea_node().build( "sxcppnl7 A woman standing by a window",