Deduplicate pair caption cast descriptors

This commit is contained in:
2026-06-27 19:50:13 +02:00
parent 307ffdba3b
commit 4a3610fbc9
6 changed files with 41 additions and 6 deletions
+11
View File
@@ -4341,6 +4341,17 @@ def smoke_caption_metadata_routes() -> None:
_expect("Softcore side:" not in hard_route.prose, "Caption hardcore target should not include soft label")
_expect("Hardcore side:" not in hard_route.prose, "Caption hardcore target should not keep combined pair labels")
_expect(soft_route.prose != hard_route.prose, "Caption pair soft/hard targets should produce distinct prose")
shared_cast = pair.get("shared_cast_descriptors")
if isinstance(shared_cast, list):
shared_cast_text = "; ".join(str(item or "").strip() for item in shared_cast if str(item or "").strip())
else:
shared_cast_text = str(shared_cast or "").strip()
shared_cast_caption = caption_naturalizer._natural_cast_descriptor_text(shared_cast_text)
if shared_cast_caption:
_expect(
hard_route.prose.count(shared_cast_caption) <= 1,
"Caption hardcore target repeated shared cast descriptors",
)
public_hard, public_hard_method = caption_naturalizer.naturalize_caption(
"",
metadata_json=_json(pair),