Deduplicate pair caption cast descriptors

This commit is contained in:
2026-06-27 19:50:13 +02:00
parent 307ffdba3b
commit 4a3610fbc9
6 changed files with 41 additions and 6 deletions
+17
View File
@@ -505,6 +505,22 @@ def _formatter_expectation_issues(
return issues
def _caption_cast_descriptor_issues(name: str, row: dict[str, Any] | None, caption_text: str) -> list[str]:
if not isinstance(row, dict):
return []
descriptor = row.get("cast_descriptor_text") or row.get("shared_cast_descriptors")
if isinstance(descriptor, list):
descriptor_text = "; ".join(str(item or "").strip() for item in descriptor if str(item or "").strip())
else:
descriptor_text = str(descriptor or "").strip()
if not descriptor_text:
return []
natural_descriptor = caption_naturalizer._natural_cast_descriptor_text(descriptor_text)
if natural_descriptor and caption_text.count(natural_descriptor) > 1:
return [f"{name}.caption: repeated_cast_descriptor"]
return []
def _trace_dict(formatter_name: str, payload: dict[str, Any]) -> tuple[dict[str, Any], str]:
trace_text = str(payload.get("route_trace_json") or "")
if not trace_text:
@@ -637,6 +653,7 @@ def _formatter_issues(
if "metadata" not in str(method or ""):
issues.append(f"{name}.{formatter_name}: not_metadata_route:{method}")
issues.extend(_formatter_trace_issues(name, formats, target=target, row=row))
issues.extend(_caption_cast_descriptor_issues(name, row, caption_text))
for label, value in (
(f"{name}.krea_negative", krea.get("negative_prompt")),