Normalize built-in row action metadata

This commit is contained in:
2026-06-27 17:01:48 +02:00
parent 83d661919f
commit 4714e23dc8
3 changed files with 93 additions and 2 deletions
+55
View File
@@ -1,5 +1,6 @@
from __future__ import annotations
import re
from typing import Any
try:
@@ -67,6 +68,51 @@ def _legacy_subject_metadata(row: dict[str, Any]) -> tuple[str, str, int | None,
return "", subject, None, None
_LEGACY_PROMPT_FIELD_LABELS = (
"Ages",
"Body types",
"Scene",
"Pose",
"Facial expressions",
"Facial expression",
"Clothing",
"Prop/detail",
"Composition",
"Use",
"Avoid",
)
def _clean_text(value: Any) -> str:
text = "" if value is None else str(value)
text = re.sub(r"\s+", " ", text.replace("\n", " ")).strip()
return re.sub(r"\s+([,.;:])", r"\1", text)
def _legacy_prompt_field(row: dict[str, Any], label: str) -> str:
prompt = _clean_text(row.get("prompt"))
if not prompt:
return ""
labels = "|".join(re.escape(name) for name in _LEGACY_PROMPT_FIELD_LABELS)
pattern = rf"{re.escape(label)}:\s*(.*?)(?=\. (?:{labels}):|\. Use\b|\. Avoid\b|$)"
match = re.search(pattern, prompt)
if not match:
return ""
return _clean_text(match.group(1)).rstrip(".")
def _clean_legacy_pose(value: Any) -> str:
text = _clean_text(value)
text = text.replace(", affectionate and flirtatious but non-explicit", "")
return text
def _clean_legacy_clothing(value: Any) -> str:
text = _clean_text(value)
text = re.sub(r",?\s*(?:fashion editorial|resort) styling$", "", text, flags=re.IGNORECASE)
return text.strip(" ,")
def enrich_legacy_row_metadata(row: dict[str, Any]) -> dict[str, Any]:
if row.get("source") != "built_in_generator":
return row
@@ -87,6 +133,15 @@ def enrich_legacy_row_metadata(row: dict[str, Any]) -> dict[str, Any]:
if scene_text:
row["scene_text"] = scene_text
row.setdefault("scene_entry", {"slug": scene_slug, "prompt": scene_text})
pose = _clean_legacy_pose(_legacy_prompt_field(row, "Pose"))
_setdefault_nonempty(row, "pose", pose)
expression = _legacy_prompt_field(row, "Facial expression") or _legacy_prompt_field(row, "Facial expressions")
_setdefault_nonempty(row, "expression", expression)
clothing = _clean_legacy_clothing(_legacy_prompt_field(row, "Clothing"))
_setdefault_nonempty(row, "clothing", clothing)
_setdefault_nonempty(row, "item", clothing)
if clothing:
_setdefault_nonempty(row, "item_label", "Clothing")
return row