Normalize built-in row action metadata
This commit is contained in:
@@ -132,7 +132,7 @@ Core helper ownership:
|
||||
| `krea_row_fields.py` | Shared Krea normal-row field extraction for item, scene, pose, expression, composition/source-composition, camera, and style used by normal and configured-cast routes. |
|
||||
| `krea_cast.py` | Shared formatter cast descriptor parsing, cast labels, cast prose, natural cast descriptor text, and label replacement used by Krea2 and caption routes. |
|
||||
| `prompt_hygiene.py` | Generic prompt, caption, and negative-prompt cleanup, including route-agnostic negative-prompt merge/dedupe. |
|
||||
| `row_normalization.py` | Final prompt-row and pair metadata normalization: legacy built-in subject/count/scene metadata enrichment, trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, embedded soft/hard row output and side-metadata synchronization, and embedded row sanitation. |
|
||||
| `row_normalization.py` | Final prompt-row and pair metadata normalization: legacy built-in subject/count/scene/item/pose/expression metadata enrichment, trigger prepending, extra-positive append, negative merge/dedupe, caption-part joining, embedded soft/hard row output and side-metadata synchronization, and embedded row sanitation. |
|
||||
| `formatter_detail.py` | Shared formatter detail-level choices, normalization, and concise/balanced/dense gates used by Krea2 and caption routes. |
|
||||
| `formatter_input.py` | Shared formatter input parsing: text cleanup, metadata/source JSON detection, trigger-prefix stripping, shared prompt field-label inventory, fallback field-label stripping, `Avoid:` splitting, prompt-field extraction, and metadata row-value fallback. |
|
||||
| `formatter_target.py` | Shared formatter target choices and normalization for `auto`, `single`, `softcore`, and `hardcore`, including pair-side selection and combined-caption inclusion policy. |
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
@@ -67,6 +68,51 @@ def _legacy_subject_metadata(row: dict[str, Any]) -> tuple[str, str, int | None,
|
||||
return "", subject, None, None
|
||||
|
||||
|
||||
_LEGACY_PROMPT_FIELD_LABELS = (
|
||||
"Ages",
|
||||
"Body types",
|
||||
"Scene",
|
||||
"Pose",
|
||||
"Facial expressions",
|
||||
"Facial expression",
|
||||
"Clothing",
|
||||
"Prop/detail",
|
||||
"Composition",
|
||||
"Use",
|
||||
"Avoid",
|
||||
)
|
||||
|
||||
|
||||
def _clean_text(value: Any) -> str:
|
||||
text = "" if value is None else str(value)
|
||||
text = re.sub(r"\s+", " ", text.replace("\n", " ")).strip()
|
||||
return re.sub(r"\s+([,.;:])", r"\1", text)
|
||||
|
||||
|
||||
def _legacy_prompt_field(row: dict[str, Any], label: str) -> str:
|
||||
prompt = _clean_text(row.get("prompt"))
|
||||
if not prompt:
|
||||
return ""
|
||||
labels = "|".join(re.escape(name) for name in _LEGACY_PROMPT_FIELD_LABELS)
|
||||
pattern = rf"{re.escape(label)}:\s*(.*?)(?=\. (?:{labels}):|\. Use\b|\. Avoid\b|$)"
|
||||
match = re.search(pattern, prompt)
|
||||
if not match:
|
||||
return ""
|
||||
return _clean_text(match.group(1)).rstrip(".")
|
||||
|
||||
|
||||
def _clean_legacy_pose(value: Any) -> str:
|
||||
text = _clean_text(value)
|
||||
text = text.replace(", affectionate and flirtatious but non-explicit", "")
|
||||
return text
|
||||
|
||||
|
||||
def _clean_legacy_clothing(value: Any) -> str:
|
||||
text = _clean_text(value)
|
||||
text = re.sub(r",?\s*(?:fashion editorial|resort) styling$", "", text, flags=re.IGNORECASE)
|
||||
return text.strip(" ,")
|
||||
|
||||
|
||||
def enrich_legacy_row_metadata(row: dict[str, Any]) -> dict[str, Any]:
|
||||
if row.get("source") != "built_in_generator":
|
||||
return row
|
||||
@@ -87,6 +133,15 @@ def enrich_legacy_row_metadata(row: dict[str, Any]) -> dict[str, Any]:
|
||||
if scene_text:
|
||||
row["scene_text"] = scene_text
|
||||
row.setdefault("scene_entry", {"slug": scene_slug, "prompt": scene_text})
|
||||
pose = _clean_legacy_pose(_legacy_prompt_field(row, "Pose"))
|
||||
_setdefault_nonempty(row, "pose", pose)
|
||||
expression = _legacy_prompt_field(row, "Facial expression") or _legacy_prompt_field(row, "Facial expressions")
|
||||
_setdefault_nonempty(row, "expression", expression)
|
||||
clothing = _clean_legacy_clothing(_legacy_prompt_field(row, "Clothing"))
|
||||
_setdefault_nonempty(row, "clothing", clothing)
|
||||
_setdefault_nonempty(row, "item", clothing)
|
||||
if clothing:
|
||||
_setdefault_nonempty(row, "item_label", "Clothing")
|
||||
return row
|
||||
|
||||
|
||||
|
||||
+37
-1
@@ -578,6 +578,11 @@ def smoke_builtin_single() -> None:
|
||||
_expect_text("builtin_single_woman.scene_text", row.get("scene_text"), 12)
|
||||
_expect(row.get("scene_slug") == row.get("scene"), "builtin single row lost legacy scene slug metadata")
|
||||
_expect(row.get("scene_entry", {}).get("slug") == row.get("scene"), "builtin single row lost scene_entry slug")
|
||||
item = _expect_text("builtin_single_woman.item", row.get("item"), 8)
|
||||
pose = _expect_text("builtin_single_woman.pose", row.get("pose"), 8)
|
||||
_expect(row.get("item_label") == "Clothing", "builtin single row lost item label")
|
||||
_expect(row.get("clothing") == item, "builtin single row did not mirror clothing into item metadata")
|
||||
_expect("fashion editorial styling" not in item.lower(), "builtin single item kept generic styling suffix")
|
||||
_expect("cast_summary" not in row, "builtin single row should not masquerade as configured cast")
|
||||
_expect_trigger_once("builtin_single_woman.prompt", row.get("prompt"), Trigger)
|
||||
krea = krea_formatter.format_krea2_prompt("", metadata_json=_json(row), target="single")
|
||||
@@ -593,6 +598,28 @@ def smoke_builtin_single() -> None:
|
||||
_expect(caption_method.endswith("metadata(single)"), "builtin single caption route did not use single metadata branch")
|
||||
_expect("woman" in caption.lower(), "builtin single caption route lost normalized subject")
|
||||
_expect(row.get("scene_text") in caption, "builtin single caption route used scene slug instead of scene text")
|
||||
|
||||
metadata_only = dict(row)
|
||||
metadata_only["prompt"] = ""
|
||||
metadata_only["caption"] = ""
|
||||
krea_metadata = krea_formatter.format_krea2_prompt("", metadata_json=_json(metadata_only), target="single")
|
||||
sdxl_metadata = sdxl_formatter.format_sdxl_prompt("", metadata_json=_json(metadata_only), target="single", trigger=SdxlTrigger, prepend_trigger=True)
|
||||
caption_metadata, caption_metadata_method = caption_naturalizer.naturalize_caption(
|
||||
"",
|
||||
metadata_json=_json(metadata_only),
|
||||
target="single",
|
||||
trigger=Trigger,
|
||||
include_trigger=True,
|
||||
)
|
||||
_expect(item in str(krea_metadata.get("krea_prompt", "")), "Krea metadata-only built-in route lost explicit item")
|
||||
_expect(pose in str(krea_metadata.get("krea_prompt", "")), "Krea metadata-only built-in route lost explicit pose")
|
||||
item_anchor = " ".join(re.findall(r"[a-z0-9]+", item.lower())[:3])
|
||||
pose_anchor = " ".join(re.findall(r"[a-z0-9]+", pose.lower())[:4])
|
||||
sdxl_metadata_prompt = str(sdxl_metadata.get("sdxl_prompt", "")).lower()
|
||||
_expect(item_anchor in sdxl_metadata_prompt, "SDXL metadata-only built-in route lost explicit item")
|
||||
_expect(pose_anchor in sdxl_metadata_prompt, "SDXL metadata-only built-in route lost explicit pose")
|
||||
_expect(caption_metadata_method.endswith("metadata(single)"), "Caption metadata-only built-in route did not use single metadata branch")
|
||||
_expect(item in caption_metadata and pose in caption_metadata, "Caption metadata-only built-in route lost explicit item or pose")
|
||||
_expect_formatter_outputs(row, "builtin_single_woman", target="single")
|
||||
|
||||
|
||||
@@ -2826,7 +2853,12 @@ def smoke_row_normalization_policy() -> None:
|
||||
"source": "built_in_generator",
|
||||
"primary_subject": "two women",
|
||||
"scene": "office",
|
||||
"prompt": "Two adults in a clean legacy prompt.",
|
||||
"prompt": (
|
||||
"Two adults in a clean legacy prompt. Scene: old room. "
|
||||
"Pose: standing close, affectionate and flirtatious but non-explicit. "
|
||||
"Facial expressions: one with a calm smile, the other with a side glance. "
|
||||
"Clothing: coordinated satin outfits, resort styling. Composition: vertical old frame."
|
||||
),
|
||||
"caption": "legacy couple caption",
|
||||
"negative_prompt": "bad anatomy",
|
||||
},
|
||||
@@ -2839,6 +2871,10 @@ def smoke_row_normalization_policy() -> None:
|
||||
_expect(legacy_couple.get("scene_slug") == "office", "Legacy couple row lost scene slug")
|
||||
_expect("cozy office desk" in str(legacy_couple.get("scene_text", "")), "Legacy couple row lost readable scene text")
|
||||
_expect(legacy_couple.get("scene_entry", {}).get("slug") == "office", "Legacy couple row lost scene entry")
|
||||
_expect(legacy_couple.get("pose") == "standing close", "Legacy couple row did not clean pose suffix")
|
||||
_expect(legacy_couple.get("item") == "coordinated satin outfits", "Legacy couple row did not clean clothing suffix")
|
||||
_expect(legacy_couple.get("item_label") == "Clothing", "Legacy couple row lost item label")
|
||||
_expect("calm smile" in str(legacy_couple.get("expression", "")), "Legacy couple row lost expression metadata")
|
||||
_expect("cast_summary" not in legacy_couple, "Legacy couple row should not gain configured-cast summary")
|
||||
|
||||
legacy_group = row_normalization.normalize_prompt_row(
|
||||
|
||||
Reference in New Issue
Block a user