Normalize built-in row action metadata
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
@@ -67,6 +68,51 @@ def _legacy_subject_metadata(row: dict[str, Any]) -> tuple[str, str, int | None,
|
||||
return "", subject, None, None
|
||||
|
||||
|
||||
_LEGACY_PROMPT_FIELD_LABELS = (
|
||||
"Ages",
|
||||
"Body types",
|
||||
"Scene",
|
||||
"Pose",
|
||||
"Facial expressions",
|
||||
"Facial expression",
|
||||
"Clothing",
|
||||
"Prop/detail",
|
||||
"Composition",
|
||||
"Use",
|
||||
"Avoid",
|
||||
)
|
||||
|
||||
|
||||
def _clean_text(value: Any) -> str:
|
||||
text = "" if value is None else str(value)
|
||||
text = re.sub(r"\s+", " ", text.replace("\n", " ")).strip()
|
||||
return re.sub(r"\s+([,.;:])", r"\1", text)
|
||||
|
||||
|
||||
def _legacy_prompt_field(row: dict[str, Any], label: str) -> str:
|
||||
prompt = _clean_text(row.get("prompt"))
|
||||
if not prompt:
|
||||
return ""
|
||||
labels = "|".join(re.escape(name) for name in _LEGACY_PROMPT_FIELD_LABELS)
|
||||
pattern = rf"{re.escape(label)}:\s*(.*?)(?=\. (?:{labels}):|\. Use\b|\. Avoid\b|$)"
|
||||
match = re.search(pattern, prompt)
|
||||
if not match:
|
||||
return ""
|
||||
return _clean_text(match.group(1)).rstrip(".")
|
||||
|
||||
|
||||
def _clean_legacy_pose(value: Any) -> str:
|
||||
text = _clean_text(value)
|
||||
text = text.replace(", affectionate and flirtatious but non-explicit", "")
|
||||
return text
|
||||
|
||||
|
||||
def _clean_legacy_clothing(value: Any) -> str:
|
||||
text = _clean_text(value)
|
||||
text = re.sub(r",?\s*(?:fashion editorial|resort) styling$", "", text, flags=re.IGNORECASE)
|
||||
return text.strip(" ,")
|
||||
|
||||
|
||||
def enrich_legacy_row_metadata(row: dict[str, Any]) -> dict[str, Any]:
|
||||
if row.get("source") != "built_in_generator":
|
||||
return row
|
||||
@@ -87,6 +133,15 @@ def enrich_legacy_row_metadata(row: dict[str, Any]) -> dict[str, Any]:
|
||||
if scene_text:
|
||||
row["scene_text"] = scene_text
|
||||
row.setdefault("scene_entry", {"slug": scene_slug, "prompt": scene_text})
|
||||
pose = _clean_legacy_pose(_legacy_prompt_field(row, "Pose"))
|
||||
_setdefault_nonempty(row, "pose", pose)
|
||||
expression = _legacy_prompt_field(row, "Facial expression") or _legacy_prompt_field(row, "Facial expressions")
|
||||
_setdefault_nonempty(row, "expression", expression)
|
||||
clothing = _clean_legacy_clothing(_legacy_prompt_field(row, "Clothing"))
|
||||
_setdefault_nonempty(row, "clothing", clothing)
|
||||
_setdefault_nonempty(row, "item", clothing)
|
||||
if clothing:
|
||||
_setdefault_nonempty(row, "item_label", "Clothing")
|
||||
return row
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user