Normalize built-in row appearance metadata
This commit is contained in:
@@ -4,9 +4,11 @@ import re
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
from . import generate_prompt_batches as prompt_batches
|
||||
from . import row_location as row_location_policy
|
||||
from .prompt_hygiene import combine_negative_text, sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text
|
||||
except ImportError: # Allows local smoke tests with `python tools/prompt_smoke.py`.
|
||||
import generate_prompt_batches as prompt_batches
|
||||
import row_location as row_location_policy
|
||||
from prompt_hygiene import combine_negative_text, sanitize_caption_text, sanitize_negative_text, sanitize_prompt_text
|
||||
|
||||
@@ -113,6 +115,64 @@ def _clean_legacy_clothing(value: Any) -> str:
|
||||
return text.strip(" ,")
|
||||
|
||||
|
||||
def _legacy_body_phrase(row: dict[str, Any]) -> str:
|
||||
body_phrase = _clean_text(row.get("body_phrase"))
|
||||
if body_phrase:
|
||||
return body_phrase
|
||||
body = _clean_text(row.get("body_type") or row.get("body"))
|
||||
if not body:
|
||||
return ""
|
||||
figure_note = _clean_text(row.get("figure") or row.get("figure_note"))
|
||||
return _clean_text(prompt_batches.make_body_phrase(body, figure_note))
|
||||
|
||||
|
||||
def _strip_legacy_caption_lead(caption: str) -> str:
|
||||
pieces = caption.split(", ", 1)
|
||||
if len(pieces) == 2 and pieces[0].strip().lower() not in ("woman", "man"):
|
||||
return pieces[1].strip()
|
||||
return caption
|
||||
|
||||
|
||||
def _legacy_single_caption_front(row: dict[str, Any]) -> dict[str, str]:
|
||||
caption = _strip_legacy_caption_lead(_clean_text(row.get("caption")))
|
||||
if not caption:
|
||||
return {}
|
||||
|
||||
subject = _clean_text(row.get("primary_subject") or row.get("subject"))
|
||||
age = _clean_text(row.get("age_band") or row.get("age"))
|
||||
body_phrase = _legacy_body_phrase(row)
|
||||
if subject.lower() in ("woman", "man") and age and body_phrase:
|
||||
prefix = f"{subject}, {age}, {body_phrase}, "
|
||||
if caption.lower().startswith(prefix.lower()):
|
||||
try:
|
||||
skin, hair, eyes, _rest = caption[len(prefix) :].split(", ", 3)
|
||||
except ValueError:
|
||||
return {}
|
||||
return {
|
||||
"caption_subject": subject,
|
||||
"caption_age": age,
|
||||
"caption_body_phrase": body_phrase,
|
||||
"caption_skin": skin,
|
||||
"caption_hair": hair,
|
||||
"caption_eyes": eyes,
|
||||
}
|
||||
|
||||
pieces = [piece.strip() for piece in caption.split(", ", 6)]
|
||||
if len(pieces) < 7:
|
||||
return {}
|
||||
subject, age, body_phrase, skin, hair, eyes, _rest = pieces
|
||||
if subject.lower() not in ("woman", "man"):
|
||||
return {}
|
||||
return {
|
||||
"caption_subject": subject,
|
||||
"caption_age": age,
|
||||
"caption_body_phrase": body_phrase,
|
||||
"caption_skin": skin,
|
||||
"caption_hair": hair,
|
||||
"caption_eyes": eyes,
|
||||
}
|
||||
|
||||
|
||||
def enrich_legacy_row_metadata(row: dict[str, Any]) -> dict[str, Any]:
|
||||
if row.get("source") != "built_in_generator":
|
||||
return row
|
||||
@@ -133,6 +193,12 @@ def enrich_legacy_row_metadata(row: dict[str, Any]) -> dict[str, Any]:
|
||||
if scene_text:
|
||||
row["scene_text"] = scene_text
|
||||
row.setdefault("scene_entry", {"slug": scene_slug, "prompt": scene_text})
|
||||
if subject_type in ("woman", "man"):
|
||||
front = _legacy_single_caption_front(row)
|
||||
_setdefault_nonempty(row, "body_phrase", front.get("caption_body_phrase", ""))
|
||||
_setdefault_nonempty(row, "skin", front.get("caption_skin", ""))
|
||||
_setdefault_nonempty(row, "hair", front.get("caption_hair", ""))
|
||||
_setdefault_nonempty(row, "eyes", front.get("caption_eyes", ""))
|
||||
pose = _clean_legacy_pose(_legacy_prompt_field(row, "Pose"))
|
||||
_setdefault_nonempty(row, "pose", pose)
|
||||
expression = _legacy_prompt_field(row, "Facial expression") or _legacy_prompt_field(row, "Facial expressions")
|
||||
|
||||
Reference in New Issue
Block a user