Deduplicate pair caption cast descriptors
This commit is contained in:
@@ -363,12 +363,11 @@ def insta_of_pair_from_row_result(
|
|||||||
same_soft_cast = options.get("softcore_cast") == "same_as_hardcore"
|
same_soft_cast = options.get("softcore_cast") == "same_as_hardcore"
|
||||||
|
|
||||||
parts = []
|
parts = []
|
||||||
if cast_descriptor_text and same_soft_cast:
|
if not soft_text and not hard_text:
|
||||||
parts.append(deps.natural_cast_descriptor_text(cast_descriptor_text))
|
if cast_descriptor_text:
|
||||||
elif descriptor:
|
parts.append(deps.natural_cast_descriptor_text(cast_descriptor_text))
|
||||||
parts.append(f"A {descriptor}")
|
elif descriptor:
|
||||||
if cast_descriptor_text and not same_soft_cast:
|
parts.append(f"A {descriptor}")
|
||||||
parts.append(deps.natural_cast_descriptor_text(cast_descriptor_text))
|
|
||||||
if same_soft_cast and include_soft:
|
if same_soft_cast and include_soft:
|
||||||
parts.append(
|
parts.append(
|
||||||
deps.softcore_caption_setup_phrase(
|
deps.softcore_caption_setup_phrase(
|
||||||
|
|||||||
@@ -33,6 +33,9 @@ The map audit currently sees:
|
|||||||
caption outputs can be debugged by category, action/position family, selected
|
caption outputs can be debugged by category, action/position family, selected
|
||||||
pair side, scene profile, position keys, and POV labels instead of only
|
pair side, scene profile, position keys, and POV labels instead of only
|
||||||
proving that a metadata branch was used.
|
proving that a metadata branch was used.
|
||||||
|
- Insta/OF side-target training captions no longer prepend shared cast
|
||||||
|
descriptors when the selected side row already emits its own cast prose, and
|
||||||
|
route simulation flags repeated cast descriptors.
|
||||||
|
|
||||||
## Architectural Finding
|
## Architectural Finding
|
||||||
|
|
||||||
|
|||||||
@@ -1004,6 +1004,7 @@ issues for:
|
|||||||
action/position family, selected pair side, scene profile, position keys, and
|
action/position family, selected pair side, scene profile, position keys, and
|
||||||
POV labels;
|
POV labels;
|
||||||
- raw builder labels leaking into Krea output;
|
- raw builder labels leaking into Krea output;
|
||||||
|
- repeated cast descriptors in training-caption formatter output;
|
||||||
- duplicate negative-prompt comma items;
|
- duplicate negative-prompt comma items;
|
||||||
- softcore prompt noise;
|
- softcore prompt noise;
|
||||||
- POV routes emitting third-person camera text or losing first-person wording;
|
- POV routes emitting third-person camera text or losing first-person wording;
|
||||||
|
|||||||
@@ -110,6 +110,10 @@ AUDIT_DOC_SNIPPETS: tuple[tuple[str, str], ...] = (
|
|||||||
"docs/prompt-pool-routing-map.md",
|
"docs/prompt-pool-routing-map.md",
|
||||||
"formatter route traces exposing selected row metadata",
|
"formatter route traces exposing selected row metadata",
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
"docs/prompt-pool-routing-map.md",
|
||||||
|
"repeated cast descriptors in training-caption formatter output",
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
PROMPT_ROW_READ_SCAN_GLOBS: tuple[str, ...] = (
|
PROMPT_ROW_READ_SCAN_GLOBS: tuple[str, ...] = (
|
||||||
|
|||||||
@@ -505,6 +505,22 @@ def _formatter_expectation_issues(
|
|||||||
return issues
|
return issues
|
||||||
|
|
||||||
|
|
||||||
|
def _caption_cast_descriptor_issues(name: str, row: dict[str, Any] | None, caption_text: str) -> list[str]:
|
||||||
|
if not isinstance(row, dict):
|
||||||
|
return []
|
||||||
|
descriptor = row.get("cast_descriptor_text") or row.get("shared_cast_descriptors")
|
||||||
|
if isinstance(descriptor, list):
|
||||||
|
descriptor_text = "; ".join(str(item or "").strip() for item in descriptor if str(item or "").strip())
|
||||||
|
else:
|
||||||
|
descriptor_text = str(descriptor or "").strip()
|
||||||
|
if not descriptor_text:
|
||||||
|
return []
|
||||||
|
natural_descriptor = caption_naturalizer._natural_cast_descriptor_text(descriptor_text)
|
||||||
|
if natural_descriptor and caption_text.count(natural_descriptor) > 1:
|
||||||
|
return [f"{name}.caption: repeated_cast_descriptor"]
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def _trace_dict(formatter_name: str, payload: dict[str, Any]) -> tuple[dict[str, Any], str]:
|
def _trace_dict(formatter_name: str, payload: dict[str, Any]) -> tuple[dict[str, Any], str]:
|
||||||
trace_text = str(payload.get("route_trace_json") or "")
|
trace_text = str(payload.get("route_trace_json") or "")
|
||||||
if not trace_text:
|
if not trace_text:
|
||||||
@@ -637,6 +653,7 @@ def _formatter_issues(
|
|||||||
if "metadata" not in str(method or ""):
|
if "metadata" not in str(method or ""):
|
||||||
issues.append(f"{name}.{formatter_name}: not_metadata_route:{method}")
|
issues.append(f"{name}.{formatter_name}: not_metadata_route:{method}")
|
||||||
issues.extend(_formatter_trace_issues(name, formats, target=target, row=row))
|
issues.extend(_formatter_trace_issues(name, formats, target=target, row=row))
|
||||||
|
issues.extend(_caption_cast_descriptor_issues(name, row, caption_text))
|
||||||
|
|
||||||
for label, value in (
|
for label, value in (
|
||||||
(f"{name}.krea_negative", krea.get("negative_prompt")),
|
(f"{name}.krea_negative", krea.get("negative_prompt")),
|
||||||
|
|||||||
@@ -4341,6 +4341,17 @@ def smoke_caption_metadata_routes() -> None:
|
|||||||
_expect("Softcore side:" not in hard_route.prose, "Caption hardcore target should not include soft label")
|
_expect("Softcore side:" not in hard_route.prose, "Caption hardcore target should not include soft label")
|
||||||
_expect("Hardcore side:" not in hard_route.prose, "Caption hardcore target should not keep combined pair labels")
|
_expect("Hardcore side:" not in hard_route.prose, "Caption hardcore target should not keep combined pair labels")
|
||||||
_expect(soft_route.prose != hard_route.prose, "Caption pair soft/hard targets should produce distinct prose")
|
_expect(soft_route.prose != hard_route.prose, "Caption pair soft/hard targets should produce distinct prose")
|
||||||
|
shared_cast = pair.get("shared_cast_descriptors")
|
||||||
|
if isinstance(shared_cast, list):
|
||||||
|
shared_cast_text = "; ".join(str(item or "").strip() for item in shared_cast if str(item or "").strip())
|
||||||
|
else:
|
||||||
|
shared_cast_text = str(shared_cast or "").strip()
|
||||||
|
shared_cast_caption = caption_naturalizer._natural_cast_descriptor_text(shared_cast_text)
|
||||||
|
if shared_cast_caption:
|
||||||
|
_expect(
|
||||||
|
hard_route.prose.count(shared_cast_caption) <= 1,
|
||||||
|
"Caption hardcore target repeated shared cast descriptors",
|
||||||
|
)
|
||||||
public_hard, public_hard_method = caption_naturalizer.naturalize_caption(
|
public_hard, public_hard_method = caption_naturalizer.naturalize_caption(
|
||||||
"",
|
"",
|
||||||
metadata_json=_json(pair),
|
metadata_json=_json(pair),
|
||||||
|
|||||||
Reference in New Issue
Block a user