Normalize built-in row subject metadata
This commit is contained in:
@@ -132,7 +132,7 @@ Core helper ownership:
|
|||||||
| `krea_row_fields.py` | Shared Krea normal-row field extraction for item, scene, pose, expression, composition/source-composition, camera, and style used by normal and configured-cast routes. |
|
| `krea_row_fields.py` | Shared Krea normal-row field extraction for item, scene, pose, expression, composition/source-composition, camera, and style used by normal and configured-cast routes. |
|
||||||
| `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. |
|
| `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. |
|
||||||
| `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup, including route-agnostic negative-prompt merge/dedupe. |
|
| `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup, including route-agnostic negative-prompt merge/dedupe. |
|
||||||
| `row_normalization.py` | Final prompt-row and pair metadata normalization: trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, embedded soft/hard row output and side-metadata synchronization, and embedded row sanitation. |
|
| `row_normalization.py` | Final prompt-row and pair metadata normalization: legacy built-in subject/count metadata enrichment, trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, embedded soft/hard row output and side-metadata synchronization, and embedded row sanitation. |
|
||||||
| `formatter_detail.py` | Shared formatter detail-level choices, normalization, and concise/balanced/dense gates used by Krea2 and caption routes. |
|
| `formatter_detail.py` | Shared formatter detail-level choices, normalization, and concise/balanced/dense gates used by Krea2 and caption routes. |
|
||||||
| `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, fallback field-label stripping, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
|
| `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, fallback field-label stripping, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
|
||||||
| `formatter_target.py` | Shared formatter target choices and normalization for `auto`, `single`, `softcore`, and `hardcore`, including pair-side selection and combined-caption inclusion policy. |
|
| `formatter_target.py` | Shared formatter target choices and normalization for `auto`, `single`, `softcore`, and `hardcore`, including pair-side selection and combined-caption inclusion policy. |
|
||||||
|
|||||||
@@ -32,6 +32,56 @@ def caption_from_parts(parts: list[Any] | tuple[Any, ...], *, active_trigger: st
|
|||||||
return sanitize_caption_text(text, triggers=_trigger_tuple(active_trigger))
|
return sanitize_caption_text(text, triggers=_trigger_tuple(active_trigger))
|
||||||
|
|
||||||
|
|
||||||
|
def _setdefault_nonempty(row: dict[str, Any], key: str, value: Any) -> None:
|
||||||
|
if str(row.get(key) or "").strip():
|
||||||
|
return
|
||||||
|
if str(value or "").strip():
|
||||||
|
row[key] = value
|
||||||
|
|
||||||
|
|
||||||
|
def _setdefault_count(row: dict[str, Any], key: str, value: int) -> None:
|
||||||
|
if str(row.get(key) or "").strip():
|
||||||
|
return
|
||||||
|
row[key] = int(value)
|
||||||
|
|
||||||
|
|
||||||
|
def _legacy_subject_metadata(row: dict[str, Any]) -> tuple[str, str, int | None, int | None]:
|
||||||
|
subject = str(row.get("primary_subject") or row.get("subject") or "").strip()
|
||||||
|
lower = subject.lower()
|
||||||
|
if lower in ("woman", "adult woman"):
|
||||||
|
return "woman", subject or "woman", 1, 0
|
||||||
|
if lower in ("man", "adult man"):
|
||||||
|
return "man", subject or "man", 0, 1
|
||||||
|
if "two women" in lower:
|
||||||
|
return "couple", subject or "two women", 2, 0
|
||||||
|
if "two men" in lower:
|
||||||
|
return "couple", subject or "two men", 0, 2
|
||||||
|
if "woman" in lower and "man" in lower:
|
||||||
|
return "couple", subject or "a woman and a man", 1, 1
|
||||||
|
if "group" in lower:
|
||||||
|
return "group", subject or "mixed adult group", 2, 2
|
||||||
|
if "layout" in lower:
|
||||||
|
return "layout", subject or "adult layout scene", None, None
|
||||||
|
return "", subject, None, None
|
||||||
|
|
||||||
|
|
||||||
|
def enrich_legacy_row_metadata(row: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
if row.get("source") != "built_in_generator":
|
||||||
|
return row
|
||||||
|
subject_type, subject_phrase, women_count, men_count = _legacy_subject_metadata(row)
|
||||||
|
_setdefault_nonempty(row, "subject_type", subject_type)
|
||||||
|
_setdefault_nonempty(row, "subject_phrase", subject_phrase)
|
||||||
|
if women_count is not None:
|
||||||
|
_setdefault_count(row, "women_count", women_count)
|
||||||
|
if men_count is not None:
|
||||||
|
_setdefault_count(row, "men_count", men_count)
|
||||||
|
if women_count is not None and men_count is not None and not str(row.get("person_count") or "").strip():
|
||||||
|
row["person_count"] = int(women_count) + int(men_count)
|
||||||
|
if str(row.get("scene") or "").strip() and not str(row.get("scene_slug") or "").strip():
|
||||||
|
row["scene_slug"] = row.get("scene")
|
||||||
|
return row
|
||||||
|
|
||||||
|
|
||||||
def normalize_prompt_row(
|
def normalize_prompt_row(
|
||||||
row: dict[str, Any],
|
row: dict[str, Any],
|
||||||
*,
|
*,
|
||||||
@@ -41,6 +91,7 @@ def normalize_prompt_row(
|
|||||||
extra_negative: str = "",
|
extra_negative: str = "",
|
||||||
default_negative: str = "",
|
default_negative: str = "",
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
|
row = enrich_legacy_row_metadata(row)
|
||||||
trigger = str(active_trigger or "").strip()
|
trigger = str(active_trigger or "").strip()
|
||||||
positive = str(extra_positive or "").strip()
|
positive = str(extra_positive or "").strip()
|
||||||
prompt = str(row.get("prompt", "") or "")
|
prompt = str(row.get("prompt", "") or "")
|
||||||
|
|||||||
@@ -571,7 +571,17 @@ def _fixture_hardcore_row(**overrides: Any) -> dict[str, Any]:
|
|||||||
def smoke_builtin_single() -> None:
|
def smoke_builtin_single() -> None:
|
||||||
row = _prompt_row(name="builtin_single_woman", category="woman", subcategory="random", seed=1001, men_count=0)
|
row = _prompt_row(name="builtin_single_woman", category="woman", subcategory="random", seed=1001, men_count=0)
|
||||||
_expect(row.get("source") == "built_in_generator", "builtin row should come from built-in generator")
|
_expect(row.get("source") == "built_in_generator", "builtin row should come from built-in generator")
|
||||||
|
_expect(row.get("subject_type") == "woman", "builtin single row lost normalized subject_type")
|
||||||
|
_expect(row.get("subject_phrase") == "woman", "builtin single row lost normalized subject_phrase")
|
||||||
|
_expect(row.get("women_count") == 1 and row.get("men_count") == 0, "builtin single row lost normalized cast counts")
|
||||||
|
_expect(row.get("person_count") == 1, "builtin single row lost normalized person count")
|
||||||
|
_expect("cast_summary" not in row, "builtin single row should not masquerade as configured cast")
|
||||||
_expect_trigger_once("builtin_single_woman.prompt", row.get("prompt"), Trigger)
|
_expect_trigger_once("builtin_single_woman.prompt", row.get("prompt"), Trigger)
|
||||||
|
sdxl = sdxl_formatter.format_sdxl_prompt("", metadata_json=_json(row), target="single", trigger=SdxlTrigger, prepend_trigger=True)
|
||||||
|
_expect("1woman" in str(sdxl.get("sdxl_prompt", "")).lower(), "builtin single SDXL route lost normalized woman count")
|
||||||
|
caption, caption_method = caption_naturalizer.naturalize_caption("", metadata_json=_json(row), target="single", trigger=Trigger, include_trigger=True)
|
||||||
|
_expect(caption_method.endswith("metadata(single)"), "builtin single caption route did not use single metadata branch")
|
||||||
|
_expect("woman" in caption.lower(), "builtin single caption route lost normalized subject")
|
||||||
_expect_formatter_outputs(row, "builtin_single_woman", target="single")
|
_expect_formatter_outputs(row, "builtin_single_woman", target="single")
|
||||||
|
|
||||||
|
|
||||||
@@ -2800,6 +2810,51 @@ def smoke_row_normalization_policy() -> None:
|
|||||||
_expect(row.get("trigger") == Trigger, "Row normalization lost active trigger")
|
_expect(row.get("trigger") == Trigger, "Row normalization lost active trigger")
|
||||||
_expect_no_duplicate_comma_items("row_normalization.negative", row.get("negative_prompt"))
|
_expect_no_duplicate_comma_items("row_normalization.negative", row.get("negative_prompt"))
|
||||||
|
|
||||||
|
legacy_couple = row_normalization.normalize_prompt_row(
|
||||||
|
{
|
||||||
|
"source": "built_in_generator",
|
||||||
|
"primary_subject": "two women",
|
||||||
|
"prompt": "Two adults in a clean legacy prompt.",
|
||||||
|
"caption": "legacy couple caption",
|
||||||
|
"negative_prompt": "bad anatomy",
|
||||||
|
},
|
||||||
|
active_trigger=Trigger,
|
||||||
|
prepend_trigger_to_prompt=False,
|
||||||
|
)
|
||||||
|
_expect(legacy_couple.get("subject_type") == "couple", "Legacy couple row lost normalized subject_type")
|
||||||
|
_expect(legacy_couple.get("women_count") == 2 and legacy_couple.get("men_count") == 0, "Legacy couple row lost normalized counts")
|
||||||
|
_expect(legacy_couple.get("person_count") == 2, "Legacy couple row lost normalized person count")
|
||||||
|
_expect("cast_summary" not in legacy_couple, "Legacy couple row should not gain configured-cast summary")
|
||||||
|
|
||||||
|
legacy_group = row_normalization.normalize_prompt_row(
|
||||||
|
{
|
||||||
|
"source": "built_in_generator",
|
||||||
|
"primary_subject": "mixed adult group",
|
||||||
|
"prompt": "Group legacy prompt.",
|
||||||
|
"caption": "legacy group caption",
|
||||||
|
"negative_prompt": "bad anatomy",
|
||||||
|
},
|
||||||
|
active_trigger=Trigger,
|
||||||
|
prepend_trigger_to_prompt=False,
|
||||||
|
)
|
||||||
|
_expect(legacy_group.get("subject_type") == "group", "Legacy group row lost normalized subject_type")
|
||||||
|
_expect(legacy_group.get("women_count") == 2 and legacy_group.get("men_count") == 2, "Legacy group row lost fallback counts")
|
||||||
|
_expect(legacy_group.get("person_count") == 4, "Legacy group row lost normalized person count")
|
||||||
|
|
||||||
|
legacy_layout = row_normalization.normalize_prompt_row(
|
||||||
|
{
|
||||||
|
"source": "built_in_generator",
|
||||||
|
"primary_subject": "layout scene",
|
||||||
|
"prompt": "Layout legacy prompt.",
|
||||||
|
"caption": "legacy layout caption",
|
||||||
|
"negative_prompt": "bad anatomy",
|
||||||
|
},
|
||||||
|
active_trigger=Trigger,
|
||||||
|
prepend_trigger_to_prompt=False,
|
||||||
|
)
|
||||||
|
_expect(legacy_layout.get("subject_type") == "layout", "Legacy layout row lost normalized subject_type")
|
||||||
|
_expect("women_count" not in legacy_layout and "men_count" not in legacy_layout, "Legacy layout row should not invent cast counts")
|
||||||
|
|
||||||
outputs = row_normalization.normalize_pair_text_outputs(
|
outputs = row_normalization.normalize_pair_text_outputs(
|
||||||
active_trigger=Trigger,
|
active_trigger=Trigger,
|
||||||
prepend_trigger_to_prompt=True,
|
prepend_trigger_to_prompt=True,
|
||||||
|
|||||||
Reference in New Issue
Block a user