from __future__ import annotations import argparse import hashlib import json import re from pathlib import Path from typing import Any try: from . import krea2_pose_variant_catalog except ImportError: # Allows local CLI/tests from the repository root. import krea2_pose_variant_catalog SCHEMA = "sxcp_krea2_atlas_refine_manifest_v1" BATCH_SCHEMA = "sxcp_atlas_refine_prompt_batch_v1" RESULT_SHEET_SCHEMA = "sxcp_atlas_refine_result_sheet_v1" PROMOTION_REPORT_SCHEMA = "sxcp_atlas_refine_promotion_report_v1" SIDECAR_UPDATE_DRAFT_SCHEMA = "sxcp_atlas_refine_sidecar_update_draft_v1" SIDECAR_UPDATE_VALIDATION_SCHEMA = "sxcp_atlas_refine_sidecar_update_validation_v1" SIDECAR_APPLY_REPORT_SCHEMA = "sxcp_atlas_refine_sidecar_apply_report_v1" MATRIX_SIDECAR_UPDATE_DRAFT_SCHEMA = "sxcp_atlas_refine_matrix_sidecar_update_draft_v1" MATRIX_SIDECAR_UPDATE_VALIDATION_SCHEMA = "sxcp_atlas_refine_matrix_sidecar_update_validation_v1" MATRIX_SIDECAR_APPLY_REPORT_SCHEMA = "sxcp_atlas_refine_matrix_sidecar_apply_report_v1" SEED_SELECTION_SCHEMA = "sxcp_atlas_refine_seed_selection_v1" SEED_MATRIX_SCHEMA = "sxcp_atlas_refine_seed_matrix_v1" SEED_MATRIX_RESULT_SHEET_SCHEMA = "sxcp_atlas_refine_seed_matrix_result_sheet_v1" SEED_MATRIX_PROMOTION_REPORT_SCHEMA = "sxcp_atlas_refine_seed_matrix_promotion_report_v1" CATALOG_CUE_DRAFT_SCHEMA = "sxcp_atlas_refine_catalog_cue_draft_v1" COVERAGE_REPORT_SCHEMA = "sxcp_atlas_refine_coverage_report_v1" REFERENCE_POOL_REPORT_SCHEMA = "sxcp_atlas_reference_pool_report_v1" REFERENCE_CUE_REVIEW_SHEET_SCHEMA = "sxcp_atlas_reference_cue_review_sheet_v1" REFERENCE_CUE_CANDIDATE_DRAFT_SCHEMA = "sxcp_atlas_reference_cue_candidate_draft_v1" REFERENCE_CUE_SIDECAR_AUTHOR_DRAFT_SCHEMA = "sxcp_atlas_reference_cue_sidecar_author_draft_v1" REFERENCE_CUE_SIDECAR_AUTHOR_VALIDATION_SCHEMA = "sxcp_atlas_reference_cue_sidecar_author_validation_v1" REFERENCE_CUE_SIDECAR_AUTHOR_APPLY_REPORT_SCHEMA = "sxcp_atlas_reference_cue_sidecar_author_apply_report_v1" SIDECAR_SCAFFOLD_SCHEMA = "sxcp_atlas_refine_sidecar_scaffold_v1" BASELINE_SCORE_SHEET_SCHEMA = "sxcp_atlas_refine_baseline_score_sheet_v1" BASELINE_SCORE_UPDATE_DRAFT_SCHEMA = "sxcp_atlas_refine_baseline_score_update_draft_v1" BASELINE_SCORE_UPDATE_VALIDATION_SCHEMA = "sxcp_atlas_refine_baseline_score_update_validation_v1" BASELINE_SCORE_APPLY_REPORT_SCHEMA = "sxcp_atlas_refine_baseline_score_apply_report_v1" PROMPT_NOISE_REPORT_SCHEMA = "sxcp_atlas_refine_prompt_noise_report_v1" PROMPT_CLEANUP_SHEET_SCHEMA = "sxcp_atlas_refine_prompt_cleanup_sheet_v1" PROMPT_CLEANUP_VALIDATION_SCHEMA = "sxcp_atlas_refine_prompt_cleanup_validation_v1" PROMPT_CLEANUP_APPLY_REPORT_SCHEMA = "sxcp_atlas_refine_prompt_cleanup_apply_report_v1" DEFAULT_OUT_CHANNEL = "sxcp_eval_out" DEFAULT_IN_CHANNEL = "sxcp_eval_in" NEGATIVE_OUT_CHANNEL = "sxcp_eval_negative_out" PROMPT_ORDERS = {"subject_first", "geometry_only", "prompt_order_test"} PROMPT_SUFFIXES = {".txt", ".prompt"} IMAGE_SUFFIXES = {".png"} SIDECAR_SUFFIX = ".json" SEED_METADATA_KEYS = ( "sampler_seed", "generator_seed", "atlas_cue_seed", "micro_position_seed", "workspace_seed", ) SEED_SELECTION_SLOT_KEYS = tuple(key for key in SEED_METADATA_KEYS if key != "sampler_seed") CUE_AXIS_KEYS = ( "contact_depth", "hand_position", "foot_position", "body_angle", "camera_height", "workspace_surface", "clothing_visibility", "expression_eye_detail", "anatomy_shape_detail", ) SCORE_KEYS = ( "atlas_pose_match", "contact_match", "pose_ownership", "workspace_continuity", "clothing_visibility", "subject_identity", "expression_eye_control", "anatomy_proportion", "prompt_noise", ) PROMOTION_PASS_VALUES = {"pass"} PROMOTION_PROGRESS_VALUES = {"pass", "partial", "baseline"} PROMOTION_REQUIRED_PASS_KEYS = ( "pose_ownership", "workspace_continuity", "clothing_visibility", "subject_identity", "prompt_noise", ) PROMOTION_REQUIRED_PROGRESS_KEYS = ( "atlas_pose_match", "contact_match", "expression_eye_control", "anatomy_proportion", ) FORBIDDEN_PROMPT_FIELDS = ( "negative", "negative_prompt", "negative_text", "negative_channel", ) PROMPT_OPTION_WORD_RE = re.compile(r"\b(?:either|or|may|optionally)\b", re.IGNORECASE) PROMPT_NEGATIVE_CONDITIONING_RE = re.compile( r"\b(?:do not|must not|should not|never|without|no)\b", re.IGNORECASE, ) PROMPT_META_PHRASES = ( "keep the visible partner", "visible partner and the action primary", "context stays", "camera layout", "pov foreground clothing cue", "pov foreground body cue", "beside or behind the bodies", ) PROMPT_DUPLICATE_PHRASE_RE = re.compile(r"[^.!?;]+(?:[.!?;]|$)") PROMPT_DUPLICATE_MIN_WORDS = 6 MIN_STABLE_MATRIX_SAMPLER_SEEDS = 2 PROMPT_NOISE_CODES = ( "option_word", "negative_conditioning", "meta_instruction", "duplicate_phrase", ) def _sha256_text(text: str) -> str: return hashlib.sha256(text.encode("utf-8")).hexdigest() def _known_variant_keys() -> list[str]: return sorted(krea2_pose_variant_catalog.variant_keys(), key=len, reverse=True) def _variant_key_from_stem(stem: str, known_keys: list[str]) -> str: for key in known_keys: if stem == key or stem.startswith(f"{key}_"): return key match = re.match(r"^(?P.+?)_\d+_?$", stem) return match.group("key") if match else stem def _files_by_stem(folder: Path, suffixes: set[str]) -> dict[str, Path]: files: dict[str, Path] = {} for path in sorted(folder.iterdir(), key=lambda item: item.name.lower()): if path.is_file() and path.suffix.lower() in suffixes: files[path.stem] = path return files def _seed_metadata() -> dict[str, None]: return {key: None for key in SEED_METADATA_KEYS} def _cue_axes() -> dict[str, None]: return {key: None for key in CUE_AXIS_KEYS} def _score_template() -> dict[str, None]: return {key: None for key in SCORE_KEYS} def _merge_known_values(defaults: dict[str, Any], raw: Any) -> dict[str, Any]: merged = dict(defaults) if not isinstance(raw, dict): return merged for key in merged: if key in raw: merged[key] = raw[key] return merged def _merge_non_null_known_values(defaults: dict[str, Any], raw: Any) -> dict[str, Any]: merged = dict(defaults) if not isinstance(raw, dict): return merged for key in merged: value = raw.get(key) if value is not None: merged[key] = value return merged def _text(value: Any) -> str: return "" if value is None else str(value).strip() def _validate_no_negative_channel(value: Any, *, field: str) -> None: text = _text(value) if text == NEGATIVE_OUT_CHANNEL: raise ValueError(f"{field} must not use {NEGATIVE_OUT_CHANNEL}") if NEGATIVE_OUT_CHANNEL in text: raise ValueError(f"{field} must not mention {NEGATIVE_OUT_CHANNEL}") def _string_list(value: Any, *, field: str) -> list[str]: if value is None: return [] if not isinstance(value, list): raise ValueError(f"{field} must be a list of strings") items: list[str] = [] for index, item in enumerate(value): text = _text(item) if not text: raise ValueError(f"{field}[{index}] must be a non-empty string") _validate_no_negative_channel(text, field=f"{field}[{index}]") items.append(text) return items def _reference_images(value: Any, *, field: str) -> list[str]: refs = _string_list(value, field=field) atlas_root = _atlas_root_path() for index, ref in enumerate(refs): path = Path(ref) if path.is_absolute(): raise ValueError(f"{field}[{index}] must be relative to the atlas root") if ".." in path.parts: raise ValueError(f"{field}[{index}] must not contain .. path segments") if path.suffix.lower() != ".png": raise ValueError(f"{field}[{index}] must reference a PNG image") if atlas_root is not None and not (atlas_root / path).is_file(): raise ValueError(f"{field}[{index}] missing atlas reference image: {atlas_root / path}") return refs def _atlas_root_path() -> Path | None: try: catalog = krea2_pose_variant_catalog.load_catalog() except Exception: return None root_text = _text(catalog.get("atlas_root") if isinstance(catalog, dict) else "") if not root_text: return None root = Path(root_text) return root if root.is_dir() else None def _atlas_relative_path(path_value: str | Path, *, atlas_root: Path, field: str) -> Path: path = Path(path_value) if path.is_absolute(): try: path = path.relative_to(atlas_root) except ValueError as exc: raise ValueError(f"{field} must be inside the atlas root {atlas_root}") from exc if ".." in path.parts: raise ValueError(f"{field} must not contain .. path segments") return path def _reference_image_id(path: Path) -> str: stem = path.stem return stem.split("_", 1)[0] def _atlas_folder_images(atlas_root: Path, folder: str | Path, *, field: str) -> list[dict[str, Any]]: relative_folder = _atlas_relative_path(folder, atlas_root=atlas_root, field=field) folder_path = atlas_root / relative_folder if not folder_path.is_dir(): raise ValueError(f"{field} is missing atlas folder: {folder_path}") images: list[dict[str, Any]] = [] for path in sorted(folder_path.iterdir(), key=lambda item: item.name.lower()): if not path.is_file() or path.suffix.lower() != ".png": continue relative_path = relative_folder / path.name images.append( { "id": _reference_image_id(path), "relative_path": relative_path.as_posix(), "filename": path.name, "size_bytes": path.stat().st_size, } ) return images def build_reference_pool_report(variant_key: str, *, supplemental_folders: list[str] | None = None) -> dict[str, Any]: key = _text(variant_key) if not key: raise ValueError("variant_key is required") atlas_root = _atlas_root_path() if atlas_root is None: raise ValueError("catalog atlas_root is missing or not readable") variant = krea2_pose_variant_catalog.get_variant(key) if not variant: raise ValueError(f"unknown variant_key {key!r}") canonical_folders = [str(folder) for folder in variant.get("atlas_folders") or [] if _text(folder)] if not canonical_folders: raise ValueError(f"variant {key!r} has no atlas_folders") supplemental_folder_values = [str(folder) for folder in supplemental_folders or [] if _text(folder)] canonical_images: list[dict[str, Any]] = [] for index, folder in enumerate(canonical_folders): canonical_images.extend(_atlas_folder_images(atlas_root, folder, field=f"atlas_folders[{index}]")) supplemental_images: list[dict[str, Any]] = [] for index, folder in enumerate(supplemental_folder_values): supplemental_images.extend(_atlas_folder_images(atlas_root, folder, field=f"supplemental_folders[{index}]")) canonical_by_id = {image["id"]: image for image in canonical_images} supplemental_by_id = {image["id"]: image for image in supplemental_images} matched_ids = sorted(set(canonical_by_id) & set(supplemental_by_id)) supplemental_extra_ids = sorted(set(supplemental_by_id) - set(canonical_by_id)) canonical_missing_ids = sorted(set(canonical_by_id) - set(supplemental_by_id)) catalog_reference_images = _reference_images(variant.get("reference_images"), field=f"{key}.reference_images") return { "schema": REFERENCE_POOL_REPORT_SCHEMA, "variant_key": key, "atlas_root": str(atlas_root), "canonical_folders": canonical_folders, "supplemental_folders": supplemental_folder_values, "catalog_reference_images": catalog_reference_images, "catalog_reference_count": len(catalog_reference_images), "canonical_image_count": len(canonical_images), "supplemental_image_count": len(supplemental_images), "matched_image_count": len(matched_ids), "supplemental_extra_count": len(supplemental_extra_ids), "canonical_missing_supplemental_count": len(canonical_missing_ids), "canonical_images": [image["relative_path"] for image in canonical_images], "supplemental_images": [image["relative_path"] for image in supplemental_images], "matched_images": [ { "id": image_id, "canonical_image": canonical_by_id[image_id]["relative_path"], "supplemental_image": supplemental_by_id[image_id]["relative_path"], } for image_id in matched_ids ], "supplemental_extra_images": [supplemental_by_id[image_id]["relative_path"] for image_id in supplemental_extra_ids], "canonical_missing_supplemental_images": [canonical_by_id[image_id]["relative_path"] for image_id in canonical_missing_ids], } def _blank_review_cue_axes() -> dict[str, str]: return {key: "" for key in CUE_AXIS_KEYS} def _reference_review_item( *, image_id: str, role: str, canonical_image: str, supplemental_image: str, reference_images_template: list[str], ) -> dict[str, Any]: return { "id": image_id, "role": role, "canonical_image": canonical_image, "supplemental_image": supplemental_image, "reference_images_template": list(reference_images_template), "cue_axes": _blank_review_cue_axes(), "observed_positive_cues": [], "rejected_cues": [], "review_notes": "", "prompt_variant_template": { "id": "", "prompt_order": "subject_first", "append_cues": [], "reference_images": list(reference_images_template), "cue_axes": _cue_axes(), "seed_metadata": _seed_metadata(), "notes": "", }, } def build_reference_cue_review_sheet(variant_key: str, *, supplemental_folders: list[str] | None = None) -> dict[str, Any]: report = build_reference_pool_report(variant_key, supplemental_folders=supplemental_folders) catalog_reference_images = set(report.get("catalog_reference_images") or []) matched_by_canonical = { _text(item.get("canonical_image")): _text(item.get("supplemental_image")) for item in report.get("matched_images") or [] if isinstance(item, dict) } review_items: list[dict[str, Any]] = [] for canonical_image in report.get("canonical_images") or []: canonical_text = _text(canonical_image) if not canonical_text: continue role = "catalog_reference" if canonical_text in catalog_reference_images else "canonical_reference" review_items.append( _reference_review_item( image_id=_reference_image_id(Path(canonical_text)), role=role, canonical_image=canonical_text, supplemental_image=matched_by_canonical.get(canonical_text, ""), reference_images_template=[canonical_text], ) ) for supplemental_image in report.get("supplemental_extra_images") or []: supplemental_text = _text(supplemental_image) if not supplemental_text: continue review_items.append( _reference_review_item( image_id=_reference_image_id(Path(supplemental_text)), role="supplemental_extra", canonical_image="", supplemental_image=supplemental_text, reference_images_template=[], ) ) return { "schema": REFERENCE_CUE_REVIEW_SHEET_SCHEMA, "variant_key": report["variant_key"], "atlas_root": report["atlas_root"], "canonical_folders": report["canonical_folders"], "supplemental_folders": report["supplemental_folders"], "catalog_reference_count": report["catalog_reference_count"], "canonical_image_count": report["canonical_image_count"], "supplemental_image_count": report["supplemental_image_count"], "matched_image_count": report["matched_image_count"], "supplemental_extra_count": report["supplemental_extra_count"], "review_item_count": len(review_items), "instructions": ( "Fill observed_positive_cues and cue_axes from visual review only. " "Use canonical/catalog items for sidecar reference_images; use supplemental_extra items as cue-mining evidence until promoted." ), "review_items": review_items, } def _review_cue_axes(raw: Any, *, field: str) -> dict[str, Any]: values = _cue_axes() if not isinstance(raw, dict): return values for key in CUE_AXIS_KEYS: value = _text(raw.get(key)) if value: _validate_no_negative_channel(value, field=f"{field}.{key}") values[key] = value return values def _prompt_variant_id_from_review_item(item: dict[str, Any], *, field: str) -> str: variant_id = _text(item.get("prompt_variant_id")) template = item.get("prompt_variant_template") if not variant_id and isinstance(template, dict): variant_id = _text(template.get("id")) if variant_id: _validate_no_negative_channel(variant_id, field=f"{field}.prompt_variant_id") return variant_id def build_reference_cue_candidate_draft(reference_cue_review_sheet: dict[str, Any]) -> dict[str, Any]: if not isinstance(reference_cue_review_sheet, dict): raise ValueError("reference cue review sheet must be an object") schema = _text(reference_cue_review_sheet.get("schema")) if schema and schema != REFERENCE_CUE_REVIEW_SHEET_SCHEMA: raise ValueError(f"reference cue review sheet schema must be {REFERENCE_CUE_REVIEW_SHEET_SCHEMA}") review_items = reference_cue_review_sheet.get("review_items") if not isinstance(review_items, list): raise ValueError("reference cue review sheet review_items must be a list") variant_key = _text(reference_cue_review_sheet.get("variant_key")) candidates: list[dict[str, Any]] = [] skipped: list[dict[str, Any]] = [] seen_variant_ids: set[str] = set() for index, item in enumerate(review_items): if not isinstance(item, dict): skipped.append({"index": index, "id": "", "reason": "invalid_review_item"}) continue field = f"review_items[{index}]" image_id = _text(item.get("id")) role = _text(item.get("role")) canonical_image = _text(item.get("canonical_image")) supplemental_image = _text(item.get("supplemental_image")) cues = _string_list(item.get("observed_positive_cues"), field=f"{field}.observed_positive_cues") if not cues: skipped.append( { "index": index, "id": image_id, "role": role, "canonical_image": canonical_image, "supplemental_image": supplemental_image, "reason": "no_observed_positive_cues", } ) continue variant_id = _prompt_variant_id_from_review_item(item, field=field) template = item.get("prompt_variant_template") template = template if isinstance(template, dict) else {} exact_text = _text(template.get("text")) prompt_noise_issues: list[dict[str, Any]] = [] for cue_index, cue in enumerate(cues): prompt_noise_issues.extend( _prompt_noise_issues( cue, context="reference_cue_observed_positive_cue", prompt_variant_id=variant_id, cue_index=cue_index, ) ) if exact_text: prompt_noise_issues.extend( _prompt_noise_issues( exact_text, context="reference_cue_exact_text", prompt_variant_id=variant_id, ) ) if prompt_noise_issues: skipped.append( { "index": index, "id": image_id, "role": role, "canonical_image": canonical_image, "supplemental_image": supplemental_image, "reason": "prompt_noise_issue", "prompt_noise_issues": prompt_noise_issues, "prompt_noise_code_counts": _prompt_noise_code_counts(prompt_noise_issues), } ) continue reference_images_template = _reference_images( item.get("reference_images_template"), field=f"{field}.reference_images_template", ) if role == "supplemental_extra" or not canonical_image: skipped.append( { "index": index, "id": image_id, "role": role, "canonical_image": canonical_image, "supplemental_image": supplemental_image, "reason": "supplemental_extra_needs_canonical_reference", "observed_positive_cues": cues, "cue_axes": _review_cue_axes(item.get("cue_axes"), field=f"{field}.cue_axes"), } ) continue if not reference_images_template: skipped.append( { "index": index, "id": image_id, "role": role, "canonical_image": canonical_image, "supplemental_image": supplemental_image, "reason": "missing_reference_images_template", "observed_positive_cues": cues, } ) continue if not variant_id: skipped.append( { "index": index, "id": image_id, "role": role, "canonical_image": canonical_image, "supplemental_image": supplemental_image, "reason": "missing_prompt_variant_id", "observed_positive_cues": cues, } ) continue if variant_id in seen_variant_ids: skipped.append( { "index": index, "id": image_id, "role": role, "canonical_image": canonical_image, "supplemental_image": supplemental_image, "prompt_variant_id": variant_id, "reason": "duplicate_prompt_variant_id", "observed_positive_cues": cues, } ) continue seen_variant_ids.add(variant_id) prompt_order = _text(template.get("prompt_order") or "subject_first") if prompt_order not in PROMPT_ORDERS: raise ValueError(f"{field}.prompt_variant_template.prompt_order must be one of {sorted(PROMPT_ORDERS)}") cue_axes = _review_cue_axes(item.get("cue_axes"), field=f"{field}.cue_axes") seed_metadata = _merge_known_values(_seed_metadata(), template.get("seed_metadata")) notes = _text(template.get("notes") or item.get("review_notes")) _validate_no_negative_channel(notes, field=f"{field}.notes") prompt_variant = { "id": variant_id, "prompt_order": prompt_order, "reference_images": reference_images_template, "cue_axes": cue_axes, "seed_metadata": seed_metadata, "notes": notes, } if exact_text: _validate_no_negative_channel(exact_text, field=f"{field}.prompt_variant_template.text") prompt_variant["text"] = exact_text else: prompt_variant["append_cues"] = cues candidates.append( { "variant_key": variant_key, "reference_item_id": image_id, "role": role, "canonical_image": canonical_image, "supplemental_image": supplemental_image, "prompt_variant_id": variant_id, "reference_images": reference_images_template, "observed_positive_cues": cues, "cue_axes": cue_axes, "review_notes": _text(item.get("review_notes")), "prompt_variant": prompt_variant, } ) return { "schema": REFERENCE_CUE_CANDIDATE_DRAFT_SCHEMA, "variant_key": variant_key, "ready_candidate_count": len(candidates), "skipped_count": len(skipped), "instructions": ( "Copy reviewed prompt_variant objects into same-stem sidecars only after choosing the matching baseline deck; " "raw-only supplemental rows remain cue-mining evidence until paired with a canonical reference." ), "candidates": candidates, "skipped": skipped, } def build_reference_cue_sidecar_author_draft( manifest: dict[str, Any], reference_cue_candidate_draft: dict[str, Any], *, variant_key: str = "", ) -> dict[str, Any]: entries = manifest.get("entries") if not isinstance(entries, list): raise ValueError("manifest entries must be a list") schema = _text(reference_cue_candidate_draft.get("schema")) if schema and schema != REFERENCE_CUE_CANDIDATE_DRAFT_SCHEMA: raise ValueError(f"reference cue candidate draft schema must be {REFERENCE_CUE_CANDIDATE_DRAFT_SCHEMA}") requested_variant_key = _text(variant_key or reference_cue_candidate_draft.get("variant_key")) if not requested_variant_key: raise ValueError("variant_key is required") candidate_variants: list[dict[str, Any]] = [] skipped: list[dict[str, Any]] = [] for candidate_index, candidate in enumerate(reference_cue_candidate_draft.get("candidates") or []): if not isinstance(candidate, dict): skipped.append({"candidate_index": candidate_index, "reason": "invalid_candidate"}) continue candidate_variant_key = _text(candidate.get("variant_key") or reference_cue_candidate_draft.get("variant_key")) if candidate_variant_key and candidate_variant_key != requested_variant_key: skipped.append( { "candidate_index": candidate_index, "prompt_variant_id": _text(candidate.get("prompt_variant_id")), "variant_key": candidate_variant_key, "reason": "variant_key_mismatch", } ) continue prompt_variant = candidate.get("prompt_variant") if not isinstance(prompt_variant, dict): skipped.append( { "candidate_index": candidate_index, "prompt_variant_id": _text(candidate.get("prompt_variant_id")), "reason": "missing_prompt_variant", } ) continue variant_copy = dict(prompt_variant) variant_id = _text(variant_copy.get("id")) append_cues = _string_list(variant_copy.get("append_cues"), field=f"candidate prompt_variant {variant_id}.append_cues") exact_text = _text(variant_copy.get("text")) if variant_id and append_cues: variant_copy.setdefault( "prompt_source", { "kind": "append_cues", "prompt_variant_id": variant_id, "append_cues": list(append_cues), }, ) elif variant_id and exact_text: variant_copy.setdefault( "prompt_source", { "kind": "text", "prompt_variant_id": variant_id, "tested_text_sha256": _sha256_text(exact_text), }, ) candidate_variants.append(variant_copy) updates: list[dict[str, Any]] = [] matching_entry_count = 0 for entry in entries: if not isinstance(entry, dict): continue entry_variant_key = _text(entry.get("variant_key")) if entry_variant_key != requested_variant_key: continue matching_entry_count += 1 entry_id = _text(entry.get("id")) source_stem = _text(entry.get("source_stem") or entry_id) if not bool(entry.get("known_variant")): skipped.append( { "variant_key": entry_variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "reason": "unknown_variant", } ) continue if not candidate_variants: skipped.append( { "variant_key": entry_variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "reason": "no_ready_candidates", } ) continue updates.append( { "variant_key": entry_variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "sidecar_filename": f"{source_stem}{SIDECAR_SUFFIX}", "source_prompt_sha256": _text(entry.get("prompt_sha256")), "prompt_path": _text(entry.get("prompt_path")), "image_path": _text(entry.get("image_path")), "prompt_variants": [dict(variant) for variant in candidate_variants], "notes": "Pre-test sidecar variants from reviewed atlas reference cue candidates.", } ) if matching_entry_count == 0: skipped.append( { "variant_key": requested_variant_key, "reason": "no_matching_manifest_entry", } ) return { "schema": REFERENCE_CUE_SIDECAR_AUTHOR_DRAFT_SCHEMA, "subject_id": _text(manifest.get("subject_id")), "variant_key": requested_variant_key, "candidate_count": len(candidate_variants), "update_count": len(updates), "skipped_count": len(skipped), "instructions": ( "Validate, apply to the same manifest folder, then rebuild the manifest and run MCP fixed-seed prompt batches before promotion." ), "updates": updates, "skipped": skipped, } def _prompt_variant_evidence(raw: Any, *, field: str) -> dict[str, Any]: if raw is None: return {} if not isinstance(raw, dict): raise ValueError(f"{field} must be an object") evidence: dict[str, Any] = {} if "seed" in raw: evidence["seed"] = _int_seed(raw.get("seed"), field=f"{field}.seed") if "turn" in raw: turn = raw.get("turn") if turn is not None and (not isinstance(turn, int) or isinstance(turn, bool)): raise ValueError(f"{field}.turn must be an integer when present") evidence["turn"] = turn if "image_path" in raw: evidence["image_path"] = _image_path(raw.get("image_path"), field=f"{field}.image_path") if "score" in raw: evidence["score"] = _merge_known_values(_score_template(), raw.get("score")) reference_images = _reference_images(raw.get("reference_images"), field=f"{field}.reference_images") if reference_images: evidence["reference_images"] = reference_images return evidence def _stable_matrix_evidence(raw: Any) -> dict[str, Any]: if not isinstance(raw, dict) or raw.get("stable") is not True: return {} try: selection_seed = _int_seed(raw.get("selection_seed"), field="matrix_evidence.selection_seed") seed_slot = _text(raw.get("seed_slot")) if seed_slot not in SEED_SELECTION_SLOT_KEYS: return {} sampler_seeds_raw = raw.get("sampler_seeds") if not isinstance(sampler_seeds_raw, list) or not sampler_seeds_raw: return {} sampler_seeds = [ _int_seed(seed, field=f"matrix_evidence.sampler_seeds[{index}]") for index, seed in enumerate(sampler_seeds_raw) ] if len(set(sampler_seeds)) != len(sampler_seeds): return {} if len(sampler_seeds) < MIN_STABLE_MATRIX_SAMPLER_SEEDS: return {} jobs_raw = raw.get("jobs") if not isinstance(jobs_raw, list) or not jobs_raw: return {} if raw.get("job_count") != len(jobs_raw) or raw.get("promotion_ready_count") != len(jobs_raw) or raw.get("blocked_count") != 0: return {} seen_job_ids: set[str] = set() job_sampler_seeds: list[int] = [] for job_index, job in enumerate(jobs_raw): if not isinstance(job, dict): return {} job_id = _text(job.get("id")) if not job_id or job_id in seen_job_ids: return {} seen_job_ids.add(job_id) if _text(job.get("decision")) != "seedable_candidate": return {} job_sampler_seed = _int_seed(job.get("sampler_seed"), field=f"matrix_evidence.jobs[{job_index}].sampler_seed") if job_sampler_seed in job_sampler_seeds: return {} job_sampler_seeds.append(job_sampler_seed) if _int_seed(job.get("selection_seed"), field=f"matrix_evidence.jobs[{job_index}].selection_seed") != selection_seed: return {} _image_path(job.get("image_path"), field=f"matrix_evidence.jobs[{job_index}].image_path") turn = job.get("turn") if not isinstance(turn, int) or isinstance(turn, bool): return {} decision, _blockers = _promotion_blockers(_merge_known_values(_score_template(), job.get("score"))) if decision != "seedable_candidate": return {} if sorted(job_sampler_seeds) != sorted(sampler_seeds): return {} except ValueError: return {} return dict(raw) def _stable_matrix_evidence_for_variant(variant: dict[str, Any], *, field: str) -> dict[str, Any]: matrix_evidence = _stable_matrix_evidence(variant.get("matrix_evidence")) if not matrix_evidence: return {} try: seed_slot = _text(matrix_evidence.get("seed_slot")) selection_seed = _int_seed(matrix_evidence.get("selection_seed"), field=f"{field}.matrix_evidence.selection_seed") seed_metadata = _merge_known_values(_seed_metadata(), variant.get("seed_metadata")) if _int_seed(seed_metadata.get(seed_slot), field=f"{field}.seed_metadata.{seed_slot}") != selection_seed: return {} except ValueError: return {} return matrix_evidence def _prompt_source(raw: Any, *, field: str) -> dict[str, Any]: if raw is None: return {} if not isinstance(raw, dict): raise ValueError(f"{field} must be an object") kind = _text(raw.get("kind")) if kind not in {"baseline", "text", "append_cues"}: raise ValueError(f"{field}.kind must be baseline, text, or append_cues") source: dict[str, Any] = {"kind": kind} prompt_variant_id = _text(raw.get("prompt_variant_id")) if prompt_variant_id: _validate_no_negative_channel(prompt_variant_id, field=f"{field}.prompt_variant_id") source["prompt_variant_id"] = prompt_variant_id append_cues = _string_list(raw.get("append_cues"), field=f"{field}.append_cues") if kind == "append_cues": if not append_cues: raise ValueError(f"{field}.append_cues is required when kind is append_cues") source["append_cues"] = append_cues elif append_cues: source["append_cues"] = append_cues tested_hash = _text(raw.get("tested_text_sha256")) if tested_hash: source["tested_text_sha256"] = tested_hash return source def _prompt_source_for_variant(variant: dict[str, Any], *, variant_id: str, text: str, append_cues: list[str]) -> dict[str, Any]: source = _prompt_source(variant.get("prompt_source"), field=f"prompt variant {variant_id}.prompt_source") if source: source.setdefault("prompt_variant_id", variant_id) source.setdefault("tested_text_sha256", _sha256_text(text)) return source if append_cues: return { "kind": "append_cues", "prompt_variant_id": variant_id, "append_cues": list(append_cues), "tested_text_sha256": _sha256_text(text), } return { "kind": "text", "prompt_variant_id": variant_id, "tested_text_sha256": _sha256_text(text), } def _prompt_variants(raw: Any) -> list[dict[str, Any]]: if raw is None: return [] if not isinstance(raw, list): raise ValueError("prompt_variants must be a list") variants: list[dict[str, Any]] = [] seen_variant_ids: set[str] = set() for index, item in enumerate(raw): if not isinstance(item, dict): raise ValueError(f"prompt_variants[{index}] must be an object") for forbidden in FORBIDDEN_PROMPT_FIELDS: if forbidden in item: raise ValueError(f"prompt_variants[{index}] must not contain {forbidden}") variant_id = _text(item.get("id")) if not variant_id: raise ValueError(f"prompt_variants[{index}].id is required") _validate_no_negative_channel(variant_id, field=f"prompt_variants[{index}].id") if variant_id in seen_variant_ids: raise ValueError(f"prompt_variants[{index}].id {variant_id!r} is duplicated") seen_variant_ids.add(variant_id) prompt_order = _text(item.get("prompt_order") or "subject_first") if prompt_order not in PROMPT_ORDERS: raise ValueError(f"prompt_variants[{index}].prompt_order must be one of {sorted(PROMPT_ORDERS)}") text = _text(item.get("text")) append_cues = _string_list(item.get("append_cues"), field=f"prompt_variants[{index}].append_cues") if text: _validate_no_negative_channel(text, field=f"prompt_variants[{index}].text") if bool(text) == bool(append_cues): raise ValueError(f"prompt_variants[{index}] must provide exactly one of text or append_cues") notes = _text(item.get("notes")) _validate_no_negative_channel(notes, field=f"prompt_variants[{index}].notes") variant: dict[str, Any] = { "id": variant_id, "prompt_order": prompt_order, "cue_axes": _merge_known_values(_cue_axes(), item.get("cue_axes")), "seed_metadata": _merge_known_values(_seed_metadata(), item.get("seed_metadata")), "notes": notes, } evidence = _prompt_variant_evidence(item.get("evidence"), field=f"prompt_variants[{index}].evidence") if evidence: variant["evidence"] = evidence reference_images = _reference_images(item.get("reference_images"), field=f"prompt_variants[{index}].reference_images") if reference_images: variant["reference_images"] = reference_images matrix_evidence = item.get("matrix_evidence") if isinstance(matrix_evidence, dict): variant["matrix_evidence"] = dict(matrix_evidence) prompt_source = _prompt_source(item.get("prompt_source"), field=f"prompt_variants[{index}].prompt_source") if prompt_source: source_variant_id = _text(prompt_source.get("prompt_variant_id")) if source_variant_id and source_variant_id != variant_id: raise ValueError( f"prompt_variants[{index}].prompt_source.prompt_variant_id {source_variant_id!r} must match id {variant_id!r}" ) variant["prompt_source"] = prompt_source if text: variant["text"] = text else: variant["append_cues"] = append_cues variants.append(variant) return variants def _sidecar_for_stem(folder: Path, stem: str) -> dict[str, Any]: path = folder / f"{stem}{SIDECAR_SUFFIX}" if not path.is_file(): return {} with path.open("r", encoding="utf-8") as handle: data = json.load(handle) return data if isinstance(data, dict) else {} def build_manifest(folder: str | Path, *, subject_id: str = "") -> dict[str, Any]: root = Path(folder).resolve() if not root.is_dir(): raise FileNotFoundError(f"atlas refine folder does not exist: {root}") prompt_files = _files_by_stem(root, PROMPT_SUFFIXES) image_files = _files_by_stem(root, IMAGE_SUFFIXES) known_keys = _known_variant_keys() known_key_set = set(known_keys) paired_stems = sorted(set(prompt_files) & set(image_files)) missing_stems = sorted(set(prompt_files) ^ set(image_files)) entries: list[dict[str, Any]] = [] for stem in paired_stems: prompt_path = prompt_files[stem].resolve() image_path = image_files[stem].resolve() prompt_text = prompt_path.read_text(encoding="utf-8").strip() variant_key = _variant_key_from_stem(stem, known_keys) sidecar = _sidecar_for_stem(root, stem) entries.append( { "id": stem.rstrip("_"), "source_stem": stem, "variant_key": variant_key, "known_variant": variant_key in known_key_set, "prompt_path": str(prompt_path), "image_path": str(image_path), "prompt_text": prompt_text, "prompt_sha256": _sha256_text(prompt_text), "image_size_bytes": image_path.stat().st_size, "seed_metadata": _merge_known_values(_seed_metadata(), sidecar.get("seed_metadata")), "cue_axes": _merge_known_values(_cue_axes(), sidecar.get("cue_axes")), "score": _merge_known_values(_score_template(), sidecar.get("score")), "prompt_variants": _prompt_variants(sidecar.get("prompt_variants")), "notes": str(sidecar.get("notes") or ""), } ) missing_pairs: list[dict[str, str]] = [] for stem in missing_stems: prompt_path = prompt_files.get(stem) image_path = image_files.get(stem) missing_pairs.append( { "stem": stem, "prompt_path": str(prompt_path.resolve()) if prompt_path else "", "image_path": str(image_path.resolve()) if image_path else "", } ) return { "schema": SCHEMA, "root": str(root), "subject_id": subject_id or root.name, "entry_count": len(entries), "missing_pair_count": len(missing_pairs), "unknown_variant_count": sum(1 for entry in entries if not entry["known_variant"]), "entries": entries, "missing_pairs": missing_pairs, } def _int_seed(value: Any, *, field: str) -> int: if not isinstance(value, int) or isinstance(value, bool): raise ValueError(f"{field} must be an integer sampler seed") return value def _probe_list(raw: Any, *, field: str) -> list[dict[str, Any]]: if not isinstance(raw, list) or not raw: raise ValueError(f"{field} must be a non-empty list") probes: list[dict[str, Any]] = [] for index, item in enumerate(raw): if not isinstance(item, dict): raise ValueError(f"{field}[{index}] must be an object") probes.append(item) return probes def _image_path(value: Any, *, field: str) -> str: path_text = _text(value) if not path_text: raise ValueError(f"{field} is required") path = Path(path_text) if not path.is_absolute(): raise ValueError(f"{field} must be absolute") if path.suffix.lower() != ".png": raise ValueError(f"{field} must reference a PNG artifact") return path_text def _entry_for_variant(manifest: dict[str, Any], variant_key: str) -> dict[str, Any]: entries = manifest.get("entries") if not isinstance(entries, list): raise ValueError("manifest entries must be a list") for entry in entries: if isinstance(entry, dict) and entry.get("variant_key") == variant_key: return entry raise ValueError(f"manifest does not contain variant_key {variant_key!r}") def _append_cues(base_text: str, cues: list[str]) -> str: text = _text(base_text) if not text: raise ValueError("source prompt text is required") _validate_no_negative_channel(text, field="source prompt text") for cue in cues: if text[-1] not in ".!?": text += "." text += f" {cue}" return re.sub(r"\s+", " ", text).strip() def _probe_id(entry_id: Any, variant_id: str) -> str: base_id = _text(entry_id) if not base_id: raise ValueError("source entry id is required") return f"{base_id}__{variant_id}" def _variant_id_from_probe_id(probe_id: str, source_entry_id: str) -> str: prefix = f"{source_entry_id}__" if source_entry_id and probe_id.startswith(prefix): return probe_id[len(prefix):] if "__" in probe_id: return probe_id.rsplit("__", 1)[-1] return probe_id def _variant_prompt_text(base_prompt: str, variant: dict[str, Any], *, field: str) -> str: text = _text(variant.get("text")) if text: _validate_no_negative_channel(text, field=f"{field}.text") return text append_cues = _string_list(variant.get("append_cues"), field=f"{field}.append_cues") return _append_cues(base_prompt, append_cues) def build_prompt_batch( manifest: dict[str, Any], variant_key: str, *, sampler_seed: int | None = None, include_baseline: bool = True, ) -> dict[str, Any]: entry = _entry_for_variant(manifest, variant_key) seed_metadata = _merge_known_values(_seed_metadata(), entry.get("seed_metadata")) seed = _int_seed(sampler_seed if sampler_seed is not None else seed_metadata.get("sampler_seed"), field="sampler_seed") seed_metadata["sampler_seed"] = seed prompt_text = _text(entry.get("prompt_text")) _validate_no_negative_channel(prompt_text, field="prompt_text") entry_id = _text(entry.get("id")) source_stem = _text(entry.get("source_stem") or entry_id) cue_axes = _merge_known_values(_cue_axes(), entry.get("cue_axes")) probes: list[dict[str, Any]] = [] if include_baseline: probes.append( { "id": _probe_id(entry_id, "baseline"), "prompt_order": "subject_first", "text": prompt_text, "variant_key": variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "cue_axes": cue_axes, "seed_metadata": seed_metadata, "prompt_source": { "kind": "baseline", "tested_text_sha256": _sha256_text(prompt_text), }, "notes": "baseline", } ) for variant in entry.get("prompt_variants") or []: if not isinstance(variant, dict): raise ValueError("entry prompt_variants must contain objects") variant_id = _text(variant.get("id")) if not variant_id: raise ValueError("entry prompt variant id is required") prompt_order = _text(variant.get("prompt_order") or "subject_first") if prompt_order not in PROMPT_ORDERS: raise ValueError(f"entry prompt variant prompt_order must be one of {sorted(PROMPT_ORDERS)}") exact_text = _text(variant.get("text")) append_cues = _string_list(variant.get("append_cues"), field=f"entry prompt variant {variant_id}.append_cues") if bool(exact_text) == bool(append_cues): raise ValueError(f"entry prompt variant {variant_id} must provide exactly one of text or append_cues") text = _variant_prompt_text(prompt_text, variant, field=f"entry prompt variant {variant_id}") _validate_no_negative_channel(text, field=f"entry prompt variant {variant_id}.text") prompt_source = _prompt_source_for_variant( variant, variant_id=variant_id, text=text, append_cues=append_cues, ) variant_seed_metadata = _merge_non_null_known_values(seed_metadata, variant.get("seed_metadata")) variant_seed_metadata["sampler_seed"] = seed probe = { "id": _probe_id(entry_id, variant_id), "prompt_order": prompt_order, "text": text, "variant_key": variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "cue_axes": _merge_non_null_known_values(cue_axes, variant.get("cue_axes")), "seed_metadata": variant_seed_metadata, "evidence": _prompt_variant_evidence(variant.get("evidence"), field=f"entry prompt variant {variant_id}.evidence"), "prompt_source": prompt_source, "notes": _text(variant.get("notes")), } reference_images = _reference_images(variant.get("reference_images"), field=f"entry prompt variant {variant_id}.reference_images") if reference_images: probe["reference_images"] = reference_images matrix_evidence = _stable_matrix_evidence_for_variant(variant, field=f"entry prompt variant {variant_id}") if matrix_evidence: probe["matrix_evidence"] = matrix_evidence probes.append(probe) if not probes: raise ValueError("prompt batch would contain no probes") return { "schema": BATCH_SCHEMA, "seed": seed, "channel_out": DEFAULT_OUT_CHANNEL, "channel_in": DEFAULT_IN_CHANNEL, "subject_id": _text(manifest.get("subject_id")), "variant_key": variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "source_prompt_sha256": _text(entry.get("prompt_sha256")), "probes": probes, } def select_seeded_prompt_variant( manifest: dict[str, Any], variant_key: str, *, selection_seed: int, seed_slot: str = "atlas_cue_seed", ) -> dict[str, Any]: seed = _int_seed(selection_seed, field="selection_seed") if seed_slot not in SEED_SELECTION_SLOT_KEYS: raise ValueError(f"seed_slot must be one of {list(SEED_SELECTION_SLOT_KEYS)} and must not be sampler_seed") entry = _entry_for_variant(manifest, variant_key) prompt_text = _text(entry.get("prompt_text")) entry_id = _text(entry.get("id")) source_stem = _text(entry.get("source_stem") or entry_id) eligible: list[dict[str, Any]] = [] ineligible: list[dict[str, Any]] = [] for variant in entry.get("prompt_variants") or []: if not isinstance(variant, dict): continue variant_id = _text(variant.get("id")) if not variant_id: continue evidence = _prompt_variant_evidence(variant.get("evidence"), field=f"prompt variant {variant_id}.evidence") score = _merge_known_values(_score_template(), evidence.get("score")) decision, blockers = _promotion_blockers(score) if decision != "seedable_candidate": reason = "missing_seedable_evidence" if blockers else "not_seedable" if blockers: reason += f": {', '.join(blockers)}" ineligible.append( { "prompt_variant_id": variant_id, "reason": reason, "cue_axes": _merge_known_values(_cue_axes(), variant.get("cue_axes")), "evidence": evidence, } ) continue matrix_evidence = _stable_matrix_evidence_for_variant(variant, field=f"prompt variant {variant_id}") if "matrix_evidence" in variant and not matrix_evidence: ineligible_item = { "prompt_variant_id": variant_id, "reason": "unstable_matrix_evidence", "cue_axes": _merge_known_values(_cue_axes(), variant.get("cue_axes")), "evidence": evidence, } if isinstance(variant.get("matrix_evidence"), dict): ineligible_item["matrix_evidence"] = dict(variant["matrix_evidence"]) ineligible.append(ineligible_item) continue append_cues = _string_list(variant.get("append_cues"), field=f"prompt variant {variant_id}.append_cues") text = _variant_prompt_text(prompt_text, variant, field=f"prompt variant {variant_id}") prompt_source = _prompt_source_for_variant( variant, variant_id=variant_id, text=text, append_cues=append_cues, ) candidate = { "prompt_variant_id": variant_id, "prompt_order": _text(variant.get("prompt_order") or "subject_first"), "text": text, "variant_key": variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "cue_axes": _merge_known_values(_cue_axes(), variant.get("cue_axes")), "seed_metadata": _merge_known_values(_seed_metadata(), variant.get("seed_metadata")), "evidence": evidence, "prompt_source": prompt_source, "notes": _text(variant.get("notes")), } reference_images = _reference_images(variant.get("reference_images"), field=f"prompt variant {variant_id}.reference_images") if reference_images: candidate["reference_images"] = reference_images if matrix_evidence: candidate["matrix_evidence"] = matrix_evidence eligible.append(candidate) eligible.sort(key=lambda candidate: _text(candidate.get("prompt_variant_id"))) ineligible.sort(key=lambda candidate: _text(candidate.get("prompt_variant_id"))) selected: dict[str, Any] = {} selected_index = None if eligible: selected_index = seed % len(eligible) selected = eligible[selected_index] return { "schema": SEED_SELECTION_SCHEMA, "subject_id": _text(manifest.get("subject_id")), "variant_key": variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "selection_seed": seed, "seed_slot": seed_slot, "eligible_candidate_count": len(eligible), "ineligible_candidate_count": len(ineligible), "selected_index": selected_index, "selected": selected, "eligible": eligible, "ineligible": ineligible, } def build_seed_selected_prompt_batch( manifest: dict[str, Any], variant_key: str, *, selection_seed: int, sampler_seed: int, seed_slot: str = "atlas_cue_seed", include_baseline: bool = True, ) -> dict[str, Any]: seed = _int_seed(sampler_seed, field="sampler_seed") selection = select_seeded_prompt_variant( manifest, variant_key, selection_seed=selection_seed, seed_slot=seed_slot, ) selected = selection.get("selected") if not isinstance(selected, dict) or not selected: raise ValueError(f"no seedable prompt variant is available for {variant_key!r}") entry = _entry_for_variant(manifest, variant_key) entry_id = _text(entry.get("id")) source_stem = _text(entry.get("source_stem") or entry_id) prompt_text = _text(entry.get("prompt_text")) _validate_no_negative_channel(prompt_text, field="prompt_text") entry_seed_metadata = _merge_known_values(_seed_metadata(), entry.get("seed_metadata")) entry_seed_metadata["sampler_seed"] = seed selected_seed_metadata = _merge_known_values(entry_seed_metadata, selected.get("seed_metadata")) selected_seed_metadata["sampler_seed"] = seed selected_seed_metadata[seed_slot] = selection["selection_seed"] probes: list[dict[str, Any]] = [] if include_baseline: probes.append( { "id": _probe_id(entry_id, "baseline"), "prompt_order": "subject_first", "text": prompt_text, "variant_key": variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "cue_axes": _merge_known_values(_cue_axes(), entry.get("cue_axes")), "seed_metadata": entry_seed_metadata, "prompt_source": { "kind": "baseline", "tested_text_sha256": _sha256_text(prompt_text), }, "notes": "baseline", } ) selected_id = _text(selected.get("prompt_variant_id")) selected_text = _text(selected.get("text")) if not selected_id or not selected_text: raise ValueError("selected prompt variant id and text are required") _validate_no_negative_channel(selected_text, field="selected prompt text") selected_probe = { "id": _probe_id(entry_id, selected_id), "prompt_order": _text(selected.get("prompt_order") or "subject_first"), "text": selected_text, "variant_key": variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "cue_axes": _merge_known_values(_cue_axes(), selected.get("cue_axes")), "seed_metadata": selected_seed_metadata, "evidence": _prompt_variant_evidence(selected.get("evidence"), field=f"selected prompt variant {selected_id}.evidence"), "prompt_source": _prompt_source(selected.get("prompt_source"), field=f"selected prompt variant {selected_id}.prompt_source"), "selection": { "selection_seed": selection["selection_seed"], "seed_slot": selection["seed_slot"], "selected_index": selection["selected_index"], "prompt_variant_id": selected_id, }, "notes": _text(selected.get("notes")), } reference_images = _reference_images(selected.get("reference_images"), field=f"selected prompt variant {selected_id}.reference_images") if reference_images: selected_probe["reference_images"] = reference_images matrix_evidence = _stable_matrix_evidence_for_variant(selected, field=f"selected prompt variant {selected_id}") if matrix_evidence: selected_probe["matrix_evidence"] = matrix_evidence probes.append(selected_probe) return { "schema": BATCH_SCHEMA, "seed": seed, "channel_out": DEFAULT_OUT_CHANNEL, "channel_in": DEFAULT_IN_CHANNEL, "subject_id": _text(manifest.get("subject_id")), "variant_key": variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "source_prompt_sha256": _text(entry.get("prompt_sha256")), "selection": selection, "probes": probes, } def build_seed_matrix( manifest: dict[str, Any], variant_key: str, *, selection_seeds: list[int], sampler_seeds: list[int], seed_slot: str = "atlas_cue_seed", ) -> dict[str, Any]: if not selection_seeds: raise ValueError("selection_seeds must contain at least one cue seed") if not sampler_seeds: raise ValueError("sampler_seeds must contain at least one sampler seed") if len(set(selection_seeds)) != len(selection_seeds): raise ValueError("selection_seeds must not contain duplicate cue seeds") if len(set(sampler_seeds)) != len(sampler_seeds): raise ValueError("sampler_seeds must not contain duplicate sampler seeds") jobs: list[dict[str, Any]] = [] for sampler_index, sampler_seed in enumerate(sampler_seeds): sampler_seed_value = _int_seed(sampler_seed, field=f"sampler_seeds[{sampler_index}]") for selection_index, selection_seed in enumerate(selection_seeds): selection_seed_value = _int_seed(selection_seed, field=f"selection_seeds[{selection_index}]") batch = build_seed_selected_prompt_batch( manifest, variant_key, selection_seed=selection_seed_value, sampler_seed=sampler_seed_value, seed_slot=seed_slot, ) probes = [probe for probe in batch.get("probes") or [] if isinstance(probe, dict)] candidate_probe = probes[-1] if probes else {} selection = dict(batch.get("selection")) if isinstance(batch.get("selection"), dict) else {} selected = dict(selection.get("selected")) if isinstance(selection.get("selected"), dict) else {} jobs.append( { "id": f"{variant_key}__sampler_{sampler_seed_value}__{seed_slot}_{selection_seed_value}", "variant_key": variant_key, "sampler_seed": sampler_seed_value, "selection_seed": selection_seed_value, "seed_slot": seed_slot, "selected": selected, "candidate_probe": candidate_probe, "batch": batch, } ) return { "schema": SEED_MATRIX_SCHEMA, "subject_id": _text(manifest.get("subject_id")), "variant_key": variant_key, "seed_slot": seed_slot, "sampler_seeds": list(sampler_seeds), "selection_seeds": list(selection_seeds), "sampler_seed_count": len(sampler_seeds), "selection_seed_count": len(selection_seeds), "job_count": len(jobs), "jobs": jobs, } def _score_value(score: dict[str, Any], key: str) -> str: return _text(score.get(key)).lower() def _promotion_blockers(score: dict[str, Any]) -> tuple[str, list[str]]: missing: list[str] = [] failed: list[str] = [] for key in PROMOTION_REQUIRED_PASS_KEYS: value = _score_value(score, key) if not value: missing.append(key) elif value not in PROMOTION_PASS_VALUES: failed.append(f"{key}={value}") for key in PROMOTION_REQUIRED_PROGRESS_KEYS: value = _score_value(score, key) if not value: missing.append(key) elif value not in PROMOTION_PROGRESS_VALUES: failed.append(f"{key}={value}") if missing: return "needs_visual_score", missing if failed: return "rejected", failed return "seedable_candidate", [] def build_promotion_report(result_sheet: dict[str, Any]) -> dict[str, Any]: probes = _probe_list(result_sheet.get("probes"), field="result sheet probes") seed = _int_seed(result_sheet.get("seed"), field="result sheet seed") baseline_probe_id = _text(result_sheet.get("baseline_probe_id") or probes[0].get("id")) source_entry_id = _text(result_sheet.get("source_entry_id")) source_stem = _text(result_sheet.get("source_stem") or source_entry_id) candidates: list[dict[str, Any]] = [] for probe in probes: probe_id = _text(probe.get("id")) if not probe_id: raise ValueError("result sheet probe id is required") if probe_id == baseline_probe_id: continue text = _text(probe.get("text")) if not text: raise ValueError(f"result sheet probe {probe_id}.text is required") _validate_no_negative_channel(text, field=f"result sheet probe {probe_id}.text") probe_source_entry_id = _text(probe.get("source_entry_id") or source_entry_id) prompt_variant_id = _variant_id_from_probe_id(probe_id, probe_source_entry_id) prompt_noise_issues = _prompt_noise_issues( text, context="result_sheet_probe", prompt_variant_id=prompt_variant_id, ) score = _merge_known_values(_score_template(), probe.get("score")) decision, blockers = _promotion_blockers(score) matrix_evidence = _stable_matrix_evidence_for_variant(probe, field=f"result sheet probe {probe_id}") if decision == "seedable_candidate" and prompt_noise_issues: decision = "rejected" blockers = ["prompt_noise_issue"] if decision == "seedable_candidate" and "matrix_evidence" in probe and not matrix_evidence: decision = "rejected" blockers = ["unstable_matrix_evidence"] probe_source_stem = _text(probe.get("source_stem") or source_stem or probe_source_entry_id) candidate = { "id": probe_id, "prompt_variant_id": prompt_variant_id, "decision": decision, "blockers": blockers, "variant_key": _text(probe.get("variant_key") or result_sheet.get("variant_key")), "source_entry_id": probe_source_entry_id, "source_stem": probe_source_stem, "seed": seed, "prompt_order": _text(probe.get("prompt_order") or "subject_first"), "text": text, "turn": probe.get("turn"), "image_path": _image_path(probe.get("image_path"), field=f"result sheet probe {probe_id}.image_path"), "cue_axes": _merge_known_values(_cue_axes(), probe.get("cue_axes")), "seed_metadata": _merge_known_values(_seed_metadata(), probe.get("seed_metadata")), "score": score, "prompt_source": _prompt_source(probe.get("prompt_source"), field=f"result sheet probe {probe_id}.prompt_source"), "analysis_notes": _text(probe.get("analysis_notes")), } reference_images = _reference_images(probe.get("reference_images"), field=f"result sheet probe {probe_id}.reference_images") if reference_images: candidate["reference_images"] = reference_images if prompt_noise_issues: candidate["prompt_noise_issues"] = prompt_noise_issues candidate["prompt_noise_code_counts"] = _prompt_noise_code_counts(prompt_noise_issues) if matrix_evidence: candidate["matrix_evidence"] = matrix_evidence candidates.append(candidate) return { "schema": PROMOTION_REPORT_SCHEMA, "seed": seed, "subject_id": _text(result_sheet.get("subject_id")), "variant_key": _text(result_sheet.get("variant_key")), "source_entry_id": source_entry_id, "source_stem": source_stem, "baseline_probe_id": baseline_probe_id, "candidate_count": len(candidates), "promotion_ready_count": sum(1 for candidate in candidates if candidate["decision"] == "seedable_candidate"), "blocked_count": sum(1 for candidate in candidates if candidate["decision"] != "seedable_candidate"), "required_pass_keys": list(PROMOTION_REQUIRED_PASS_KEYS), "required_progress_keys": list(PROMOTION_REQUIRED_PROGRESS_KEYS), "candidates": candidates, } def build_sidecar_update_draft(promotion_report: dict[str, Any]) -> dict[str, Any]: candidates = _probe_list(promotion_report.get("candidates"), field="promotion report candidates") seed = _int_seed(promotion_report.get("seed"), field="promotion report seed") ready_candidates = [candidate for candidate in candidates if candidate.get("decision") == "seedable_candidate"] updates_by_stem: dict[str, dict[str, Any]] = {} for candidate in ready_candidates: candidate_id = _text(candidate.get("id")) prompt_variant_id = _text(candidate.get("prompt_variant_id")) if not candidate_id or not prompt_variant_id: raise ValueError("seedable candidate id and prompt_variant_id are required") text = _text(candidate.get("text")) if not text: raise ValueError(f"seedable candidate {candidate_id}.text is required") _validate_no_negative_channel(text, field=f"seedable candidate {candidate_id}.text") source_entry_id = _text(candidate.get("source_entry_id") or promotion_report.get("source_entry_id")) source_stem = _text(candidate.get("source_stem") or promotion_report.get("source_stem") or source_entry_id) if not source_stem: raise ValueError(f"seedable candidate {candidate_id}.source_stem is required") update = updates_by_stem.setdefault( source_stem, { "source_entry_id": source_entry_id, "source_stem": source_stem, "sidecar_filename": f"{source_stem}{SIDECAR_SUFFIX}", "variant_key": _text(candidate.get("variant_key") or promotion_report.get("variant_key")), "prompt_variants": [], }, ) prompt_variant = { "id": prompt_variant_id, "prompt_order": _text(candidate.get("prompt_order") or "subject_first"), "text": text, "cue_axes": _merge_known_values(_cue_axes(), candidate.get("cue_axes")), "seed_metadata": _merge_known_values(_seed_metadata(), candidate.get("seed_metadata")), "notes": _text(candidate.get("analysis_notes")), "prompt_source": _prompt_source(candidate.get("prompt_source"), field=f"seedable candidate {candidate_id}.prompt_source"), "evidence": { "seed": seed, "turn": candidate.get("turn"), "image_path": _image_path(candidate.get("image_path"), field=f"seedable candidate {candidate_id}.image_path"), "score": _merge_known_values(_score_template(), candidate.get("score")), }, } reference_images = _reference_images(candidate.get("reference_images"), field=f"seedable candidate {candidate_id}.reference_images") if reference_images: prompt_variant["reference_images"] = reference_images prompt_variant["evidence"]["reference_images"] = reference_images matrix_evidence = _stable_matrix_evidence_for_variant(candidate, field=f"seedable candidate {candidate_id}") if matrix_evidence: prompt_variant["matrix_evidence"] = matrix_evidence update["prompt_variants"].append(prompt_variant) updates = [updates_by_stem[key] for key in sorted(updates_by_stem)] return { "schema": SIDECAR_UPDATE_DRAFT_SCHEMA, "seed": seed, "subject_id": _text(promotion_report.get("subject_id")), "variant_key": _text(promotion_report.get("variant_key")), "ready_candidate_count": len(ready_candidates), "skipped_candidate_count": len(candidates) - len(ready_candidates), "update_count": len(updates), "updates": updates, } def build_matrix_sidecar_update_draft(matrix_promotion_report: dict[str, Any]) -> dict[str, Any]: schema = _text(matrix_promotion_report.get("schema")) if schema and schema != SEED_MATRIX_PROMOTION_REPORT_SCHEMA: raise ValueError(f"seed matrix promotion report schema must be {SEED_MATRIX_PROMOTION_REPORT_SCHEMA}") jobs = [job for job in matrix_promotion_report.get("jobs") or [] if isinstance(job, dict)] jobs_by_id = {_text(job.get("id")): job for job in jobs if _text(job.get("id"))} updates_by_stem: dict[str, dict[str, Any]] = {} skipped: list[dict[str, Any]] = [] ready_group_count = 0 for group in matrix_promotion_report.get("groups") or []: if not isinstance(group, dict): continue prompt_variant_id = _text(group.get("prompt_variant_id")) selection_seed = group.get("selection_seed") blockers = [_text(blocker) for blocker in group.get("blockers") or [] if _text(blocker)] group_context = { "variant_key": _text(group.get("variant_key") or matrix_promotion_report.get("variant_key")), "source_entry_id": _text(group.get("source_entry_id")), "source_stem": _text(group.get("source_stem") or group.get("source_entry_id")), "prompt_variant_id": prompt_variant_id, "prompt_text_sha256": _text(group.get("prompt_text_sha256")), "selection_seed": selection_seed, "seed_slot": _text(group.get("seed_slot") or matrix_promotion_report.get("seed_slot")), "sampler_seeds": list(group.get("sampler_seeds") or []), "blockers": blockers, } if group.get("stable") is not True: skipped.append({**group_context, "reason": "unstable_matrix_group"}) continue group_job_ids = [_text(job_id) for job_id in group.get("job_ids") or [] if _text(job_id)] duplicate_job_ids = sorted({job_id for job_id in group_job_ids if group_job_ids.count(job_id) > 1}) if duplicate_job_ids: raise ValueError( f"stable matrix group {prompt_variant_id!r} job_ids contain duplicated ids: {', '.join(duplicate_job_ids)}" ) missing_job_ids = [job_id for job_id in group_job_ids if job_id not in jobs_by_id] if missing_job_ids: raise ValueError( f"stable matrix group {prompt_variant_id!r} job_ids reference missing jobs: {', '.join(missing_job_ids)}" ) group_jobs = [jobs_by_id[job_id] for job_id in group_job_ids if job_id in jobs_by_id] expected_selection_seed = _int_seed(selection_seed, field=f"stable matrix group {prompt_variant_id}.selection_seed") expected_prompt_text_sha256 = group_context["prompt_text_sha256"] if not expected_prompt_text_sha256 and group_jobs: first_candidate = group_jobs[0].get("candidate") if isinstance(group_jobs[0].get("candidate"), dict) else {} first_text = _text(first_candidate.get("text")) if isinstance(first_candidate, dict) else "" expected_prompt_text_sha256 = _sha256_text(first_text) if first_text else "" for job in group_jobs: job_id = _text(job.get("id")) job_candidate = job.get("candidate") if isinstance(job.get("candidate"), dict) else {} job_text = _text(job_candidate.get("text")) if isinstance(job_candidate, dict) else "" job_prompt_text_sha256 = _sha256_text(job_text) if job_text else _text(job.get("prompt_text_sha256")) declared_job_text_sha256 = _text(job.get("prompt_text_sha256")) if declared_job_text_sha256 and job_prompt_text_sha256 and declared_job_text_sha256 != job_prompt_text_sha256: raise ValueError( f"stable matrix group {prompt_variant_id!r} job {job_id!r} candidate prompt text " f"{job_prompt_text_sha256!r} does not match job prompt_text_sha256; " f"expected {declared_job_text_sha256!r}" ) identity_checks = ( ("prompt_variant_id", prompt_variant_id, _text(job.get("prompt_variant_id"))), ("prompt text", expected_prompt_text_sha256, job_prompt_text_sha256), ("selection_seed", expected_selection_seed, _int_seed(job.get("selection_seed"), field=f"matrix job {job_id}.selection_seed")), ("seed_slot", group_context["seed_slot"], _text(job.get("seed_slot"))), ("variant_key", group_context["variant_key"], _text(job.get("variant_key"))), ("source_entry_id", group_context["source_entry_id"], _text(job.get("source_entry_id"))), ("source_stem", group_context["source_stem"], _text(job.get("source_stem") or job.get("source_entry_id"))), ) for field, expected_value, actual_value in identity_checks: if expected_value and actual_value and actual_value != expected_value: raise ValueError( f"stable matrix group {prompt_variant_id!r} job_ids include job {job_id!r} " f"with {field} {actual_value!r}, expected {expected_value!r}" ) declared_sampler_seeds = sorted( {_int_seed(seed, field=f"stable matrix group {prompt_variant_id}.sampler_seeds") for seed in group_context["sampler_seeds"]} ) job_sampler_seeds = sorted( {_int_seed(job.get("sampler_seed"), field=f"stable matrix group {prompt_variant_id}.job_ids sampler_seed") for job in group_jobs} ) if declared_sampler_seeds != job_sampler_seeds: raise ValueError( f"stable matrix group {prompt_variant_id!r} sampler_seeds {declared_sampler_seeds} " f"do not match job_ids sampler coverage {job_sampler_seeds}" ) if len(job_sampler_seeds) < MIN_STABLE_MATRIX_SAMPLER_SEEDS: raise ValueError( f"stable matrix group {prompt_variant_id!r} sampler_seeds must include at least " f"{MIN_STABLE_MATRIX_SAMPLER_SEEDS} unique sampler seeds" ) actual_job_count = len(group_jobs) actual_promotion_ready_count = sum(1 for job in group_jobs if job.get("decision") == "seedable_candidate") actual_blocked_count = actual_job_count - actual_promotion_ready_count count_mismatches: list[str] = [] for field, actual_value in ( ("job_count", actual_job_count), ("promotion_ready_count", actual_promotion_ready_count), ("blocked_count", actual_blocked_count), ): if field in group and group.get(field) is not None: try: declared_value = int(group.get(field)) except (TypeError, ValueError) as exc: raise ValueError(f"stable matrix group {prompt_variant_id!r} {field} must be an integer") from exc if declared_value != actual_value: count_mismatches.append(f"{field} {declared_value} != job_ids count {actual_value}") if count_mismatches: raise ValueError(f"stable matrix group {prompt_variant_id!r} count mismatch: {'; '.join(count_mismatches)}") ready_jobs = [job for job in group_jobs if job.get("decision") == "seedable_candidate"] if not ready_jobs: skipped.append({**group_context, "reason": "no_seedable_jobs"}) continue representative_job = ready_jobs[0] candidate = representative_job.get("candidate") if not isinstance(candidate, dict): skipped.append({**group_context, "reason": "missing_representative_candidate"}) continue source_entry_id = _text(candidate.get("source_entry_id")) source_stem = _text(candidate.get("source_stem") or source_entry_id) if not source_stem: skipped.append({**group_context, "reason": "missing_source_stem"}) continue text = _text(candidate.get("text")) if not text: skipped.append({**group_context, "reason": "missing_candidate_text"}) continue _validate_no_negative_channel(text, field=f"matrix candidate {prompt_variant_id}.text") matrix_jobs: list[dict[str, Any]] = [] for job in ready_jobs: job_candidate = job.get("candidate") if isinstance(job.get("candidate"), dict) else {} matrix_jobs.append( { "id": _text(job.get("id")), "sampler_seed": _int_seed(job.get("sampler_seed"), field=f"matrix job {job.get('id')}.sampler_seed"), "selection_seed": _int_seed(job.get("selection_seed"), field=f"matrix job {job.get('id')}.selection_seed"), "decision": _text(job.get("decision")), "turn": job_candidate.get("turn"), "image_path": _image_path(job_candidate.get("image_path"), field=f"matrix job {job.get('id')}.image_path"), "score": _merge_known_values(_score_template(), job_candidate.get("score")), } ) matrix_evidence = { "stable": True, "selection_seed": expected_selection_seed, "seed_slot": group_context["seed_slot"], "sampler_seeds": declared_sampler_seeds, "job_count": actual_job_count, "promotion_ready_count": actual_promotion_ready_count, "blocked_count": actual_blocked_count, "jobs": matrix_jobs, } update = updates_by_stem.setdefault( source_stem, { "source_entry_id": source_entry_id, "source_stem": source_stem, "sidecar_filename": f"{source_stem}{SIDECAR_SUFFIX}", "variant_key": group_context["variant_key"], "prompt_variants": [], }, ) update["prompt_variants"].append( { "id": prompt_variant_id, "prompt_order": _text(candidate.get("prompt_order") or "subject_first"), "text": text, "cue_axes": _merge_known_values(_cue_axes(), candidate.get("cue_axes")), "seed_metadata": _merge_known_values(_seed_metadata(), candidate.get("seed_metadata")), "notes": f"stable matrix evidence for {group_context['seed_slot']}={matrix_evidence['selection_seed']}", "prompt_source": _prompt_source(candidate.get("prompt_source"), field=f"matrix candidate {prompt_variant_id}.prompt_source"), "evidence": { "seed": _int_seed(representative_job.get("sampler_seed"), field="representative matrix sampler_seed"), "turn": candidate.get("turn"), "image_path": _image_path(candidate.get("image_path"), field=f"matrix candidate {prompt_variant_id}.image_path"), "score": _merge_known_values(_score_template(), candidate.get("score")), }, "matrix_evidence": matrix_evidence, } ) reference_images = _reference_images(candidate.get("reference_images"), field=f"matrix candidate {prompt_variant_id}.reference_images") if reference_images: update["prompt_variants"][-1]["reference_images"] = reference_images update["prompt_variants"][-1]["evidence"]["reference_images"] = reference_images ready_group_count += 1 updates = [updates_by_stem[key] for key in sorted(updates_by_stem)] return { "schema": MATRIX_SIDECAR_UPDATE_DRAFT_SCHEMA, "subject_id": _text(matrix_promotion_report.get("subject_id")), "variant_key": _text(matrix_promotion_report.get("variant_key")), "ready_group_count": ready_group_count, "skipped_group_count": len(skipped), "update_count": len(updates), "updates": updates, "skipped": skipped, } def build_catalog_cue_draft(manifest: dict[str, Any], *, variant_key: str = "") -> dict[str, Any]: entries = manifest.get("entries") if not isinstance(entries, list): raise ValueError("manifest entries must be a list") requested_variant_key = _text(variant_key) candidates: list[dict[str, Any]] = [] skipped: list[dict[str, Any]] = [] for entry in entries: entry_variant_key = _text(entry.get("variant_key")) if requested_variant_key and entry_variant_key != requested_variant_key: continue prompt_text = _text(entry.get("prompt_text")) entry_id = _text(entry.get("id")) source_stem = _text(entry.get("source_stem") or entry_id) for variant in entry.get("prompt_variants") or []: if not isinstance(variant, dict): continue variant_id = _text(variant.get("id")) if not variant_id: continue append_cues = _string_list(variant.get("append_cues"), field=f"catalog cue variant {variant_id}.append_cues") tested_text = _variant_prompt_text(prompt_text, variant, field=f"catalog cue variant {variant_id}") prompt_source = _prompt_source_for_variant( variant, variant_id=variant_id, text=tested_text, append_cues=append_cues, ) evidence = _prompt_variant_evidence(variant.get("evidence"), field=f"catalog cue variant {variant_id}.evidence") score = _merge_known_values(_score_template(), evidence.get("score")) decision, blockers = _promotion_blockers(score) if decision != "seedable_candidate": skipped.append( { "variant_key": entry_variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "prompt_variant_id": variant_id, "reason": "missing_seedable_evidence" if blockers else "not_seedable", "blockers": blockers, } ) continue matrix_evidence = _stable_matrix_evidence_for_variant(variant, field=f"catalog cue variant {variant_id}") if "matrix_evidence" in variant and not matrix_evidence: skipped_item = { "variant_key": entry_variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "prompt_variant_id": variant_id, "reason": "unstable_matrix_evidence", "blockers": ["unstable_matrix_evidence"], } if isinstance(variant.get("matrix_evidence"), dict): skipped_item["matrix_evidence"] = dict(variant["matrix_evidence"]) skipped.append(skipped_item) continue if prompt_source.get("kind") != "append_cues" or not prompt_source.get("append_cues"): skipped.append( { "variant_key": entry_variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "prompt_variant_id": variant_id, "reason": "not_append_cues", } ) continue candidate = { "variant_key": entry_variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "sidecar_filename": f"{source_stem}{SIDECAR_SUFFIX}", "prompt_variant_id": variant_id, "prompt_variant_cues": list(prompt_source.get("append_cues") or []), "tested_text": tested_text, "tested_text_sha256": _sha256_text(tested_text), "cue_axes": _merge_known_values(_cue_axes(), variant.get("cue_axes")), "seed_metadata": _merge_known_values(_seed_metadata(), variant.get("seed_metadata")), "evidence": evidence, "notes": _text(variant.get("notes")), } reference_images = _reference_images(variant.get("reference_images"), field=f"catalog cue variant {variant_id}.reference_images") if reference_images: candidate["reference_images"] = reference_images if matrix_evidence: candidate["matrix_evidence"] = matrix_evidence candidates.append(candidate) return { "schema": CATALOG_CUE_DRAFT_SCHEMA, "subject_id": _text(manifest.get("subject_id")), "variant_key": requested_variant_key, "ready_cue_count": len(candidates), "skipped_count": len(skipped), "candidates": candidates, "skipped": skipped, } def _coverage_state( *, known_variant: bool, prompt_noise_issue_count: int, prompt_variant_count: int, seedable_count: int, catalog_cue_count: int, unscored_count: int, rejected_count: int, ) -> tuple[str, str]: if not known_variant: return "unknown_variant", "map the prompt/image stem to a catalog variant before seed testing" if prompt_noise_issue_count: return "needs_prompt_cleanup", "clean option/meta/negative prompt wording before visual scoring or seed promotion" if prompt_variant_count == 0: return "baseline_only", "add reviewed sidecar prompt_variants from MCP atlas probes" if catalog_cue_count: return "ready_for_catalog_review", "review catalog cue draft before editing prompt_variant_cues" if seedable_count: return "ready_for_seed_selection", "use atlas_cue_seed selection or create catalog cue draft if append_cues are available" if unscored_count: return "needs_visual_score", "score returned images against atlas preservation gates" if rejected_count: return "rejected_only", "try new prompt variants; current variants failed preservation gates" return "needs_prompt_variants", "add explicit prompt variants before seed selection" def _score_state(score: dict[str, Any]) -> str: decision, _blockers = _promotion_blockers(score) if decision == "seedable_candidate": return "scored_pass" if decision == "needs_visual_score": if any(_text(score.get(key)) for key in SCORE_KEYS): return "partially_scored" return "needs_visual_score" return "scored_rejected" def build_baseline_score_sheet(manifest: dict[str, Any], *, variant_key: str = "") -> dict[str, Any]: entries = manifest.get("entries") if not isinstance(entries, list): raise ValueError("manifest entries must be a list") requested_variant_key = _text(variant_key) sheet_entries: list[dict[str, Any]] = [] state_counts = { "scored_pass_count": 0, "needs_visual_score_count": 0, "partially_scored_count": 0, "scored_rejected_count": 0, } for entry in entries: if not isinstance(entry, dict): continue entry_variant_key = _text(entry.get("variant_key")) if requested_variant_key and entry_variant_key != requested_variant_key: continue score = _merge_known_values(_score_template(), entry.get("score")) score_state = _score_state(score) if score_state == "scored_pass": state_counts["scored_pass_count"] += 1 elif score_state == "needs_visual_score": state_counts["needs_visual_score_count"] += 1 elif score_state == "partially_scored": state_counts["partially_scored_count"] += 1 else: state_counts["scored_rejected_count"] += 1 entry_id = _text(entry.get("id")) sheet_entries.append( { "id": entry_id, "source_stem": _text(entry.get("source_stem") or entry_id), "variant_key": entry_variant_key, "known_variant": bool(entry.get("known_variant")), "prompt_path": _text(entry.get("prompt_path")), "image_path": _text(entry.get("image_path")), "prompt_text": _text(entry.get("prompt_text")), "prompt_sha256": _text(entry.get("prompt_sha256")), "seed_metadata": _merge_known_values(_seed_metadata(), entry.get("seed_metadata")), "cue_axes": _merge_known_values(_cue_axes(), entry.get("cue_axes")), "score": score, "score_state": score_state, "analysis_notes": "", } ) return { "schema": BASELINE_SCORE_SHEET_SCHEMA, "subject_id": _text(manifest.get("subject_id")), "variant_key": requested_variant_key, "entry_count": len(sheet_entries), "score_keys": list(SCORE_KEYS), "unscored_count": state_counts["needs_visual_score_count"], **state_counts, "entries": sheet_entries, } def _prompt_noise_excerpt(text: str, start: int, end: int, *, radius: int = 56) -> str: prefix_start = max(0, start - radius) suffix_end = min(len(text), end + radius) excerpt = text[prefix_start:suffix_end].strip() if prefix_start: excerpt = f"...{excerpt}" if suffix_end < len(text): excerpt = f"{excerpt}..." return re.sub(r"\s+", " ", excerpt) def _normalized_prompt_phrase(text: str) -> str: phrase = re.sub(r"[.!?;]+$", "", _text(text).lower()).strip() return re.sub(r"\s+", " ", phrase) def _prompt_noise_issues( text: str, *, context: str, prompt_variant_id: str = "", cue_index: int | None = None, ) -> list[dict[str, Any]]: prompt_text = _text(text) if not prompt_text: return [] issues: list[dict[str, Any]] = [] for match in PROMPT_OPTION_WORD_RE.finditer(prompt_text): issues.append( { "context": context, "prompt_variant_id": prompt_variant_id, "cue_index": cue_index, "code": "option_word", "match": match.group(0), "message": "option-list wording makes atlas geometry ambiguous for Krea2", "excerpt": _prompt_noise_excerpt(prompt_text, match.start(), match.end()), } ) for match in PROMPT_NEGATIVE_CONDITIONING_RE.finditer(prompt_text): issues.append( { "context": context, "prompt_variant_id": prompt_variant_id, "cue_index": cue_index, "code": "negative_conditioning", "match": match.group(0), "message": "negative or policy wording should not be placed in positive atlas conditioning", "excerpt": _prompt_noise_excerpt(prompt_text, match.start(), match.end()), } ) lower_text = prompt_text.lower() for phrase in PROMPT_META_PHRASES: start = lower_text.find(phrase) while start != -1: end = start + len(phrase) issues.append( { "context": context, "prompt_variant_id": prompt_variant_id, "cue_index": cue_index, "code": "meta_instruction", "match": prompt_text[start:end], "message": "meta or policy wording should be rewritten as direct visible image description", "excerpt": _prompt_noise_excerpt(prompt_text, start, end), } ) start = lower_text.find(phrase, end) seen_phrases: dict[str, tuple[int, int, str]] = {} for match in PROMPT_DUPLICATE_PHRASE_RE.finditer(prompt_text): phrase_text = match.group(0).strip() normalized = _normalized_prompt_phrase(phrase_text) if not normalized: continue word_count = len(re.findall(r"[a-z0-9']+", normalized)) if word_count < PROMPT_DUPLICATE_MIN_WORDS: continue if normalized not in seen_phrases: seen_phrases[normalized] = (match.start(), match.end(), phrase_text) continue issues.append( { "context": context, "prompt_variant_id": prompt_variant_id, "cue_index": cue_index, "code": "duplicate_phrase", "match": phrase_text, "message": "repeated prompt phrase makes atlas geometry noisy for Krea2", "excerpt": _prompt_noise_excerpt(prompt_text, match.start(), match.end()), } ) return issues def _prompt_noise_issues_for_entry(entry: dict[str, Any]) -> list[dict[str, Any]]: entry_issues: list[dict[str, Any]] = [] entry_issues.extend( _prompt_noise_issues( _text(entry.get("prompt_text")), context="baseline_prompt", ) ) for variant in entry.get("prompt_variants") or []: if not isinstance(variant, dict): continue prompt_variant_id = _text(variant.get("id")) exact_text = _text(variant.get("text")) if exact_text: entry_issues.extend( _prompt_noise_issues( exact_text, context="prompt_variant_text", prompt_variant_id=prompt_variant_id, ) ) for cue_index, cue in enumerate(_string_list(variant.get("append_cues"), field=f"prompt noise variant {prompt_variant_id}.append_cues")): entry_issues.extend( _prompt_noise_issues( cue, context="prompt_variant_append_cue", prompt_variant_id=prompt_variant_id, cue_index=cue_index, ) ) return entry_issues def _prompt_noise_code_counts(issues: list[dict[str, Any]]) -> dict[str, int]: counts = {code: 0 for code in PROMPT_NOISE_CODES} for issue in issues: code = _text(issue.get("code")) if code in counts: counts[code] += 1 return counts def build_prompt_noise_report(manifest: dict[str, Any], *, variant_key: str = "") -> dict[str, Any]: entries = manifest.get("entries") if not isinstance(entries, list): raise ValueError("manifest entries must be a list") requested_variant_key = _text(variant_key) report_entries: list[dict[str, Any]] = [] issue_code_counts = {code: 0 for code in PROMPT_NOISE_CODES} scanned_entry_count = 0 for entry in entries: if not isinstance(entry, dict): continue entry_variant_key = _text(entry.get("variant_key")) if requested_variant_key and entry_variant_key != requested_variant_key: continue scanned_entry_count += 1 entry_id = _text(entry.get("id")) source_stem = _text(entry.get("source_stem") or entry_id) entry_issues = _prompt_noise_issues_for_entry(entry) if not entry_issues: continue for code, count in _prompt_noise_code_counts(entry_issues).items(): issue_code_counts[code] += count report_entries.append( { "variant_key": entry_variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "known_variant": bool(entry.get("known_variant")), "issue_count": len(entry_issues), "issues": entry_issues, } ) issue_count = sum(entry.get("issue_count", 0) for entry in report_entries) return { "schema": PROMPT_NOISE_REPORT_SCHEMA, "subject_id": _text(manifest.get("subject_id")), "variant_key": requested_variant_key, "entry_count": scanned_entry_count, "clean_entry_count": scanned_entry_count - len(report_entries), "issue_entry_count": len(report_entries), "issue_count": issue_count, "issue_code_counts": issue_code_counts, "entries": report_entries, } def _sidecar_path_text(manifest: dict[str, Any], source_stem: str) -> str: root_text = _text(manifest.get("root")) if not root_text or not source_stem: return "" return str((Path(root_text).resolve() / f"{source_stem}{SIDECAR_SUFFIX}")) def _cleanup_source_type(context: str) -> str: if context == "baseline_prompt": return "prompt_file" if context == "prompt_variant_text": return "sidecar_prompt_variant_text" if context == "prompt_variant_append_cue": return "sidecar_prompt_variant_append_cue" return "unknown" def _cleanup_item_for_context( *, manifest: dict[str, Any], entry: dict[str, Any], context: str, prompt_variant_id: str = "", cue_index: int | None = None, ) -> dict[str, Any]: entry_id = _text(entry.get("id")) source_stem = _text(entry.get("source_stem") or entry_id) sidecar_filename = f"{source_stem}{SIDECAR_SUFFIX}" if source_stem else "" source_type = _cleanup_source_type(context) current_text = "" source_path = "" if context == "baseline_prompt": current_text = _text(entry.get("prompt_text")) source_path = _text(entry.get("prompt_path")) sidecar_filename = "" else: source_path = _sidecar_path_text(manifest, source_stem) for variant in entry.get("prompt_variants") or []: if not isinstance(variant, dict): continue if _text(variant.get("id")) != prompt_variant_id: continue if context == "prompt_variant_text": current_text = _text(variant.get("text")) elif context == "prompt_variant_append_cue": cues = _string_list(variant.get("append_cues"), field=f"cleanup prompt variant {prompt_variant_id}.append_cues") if cue_index is not None and 0 <= cue_index < len(cues): current_text = cues[cue_index] break return { "variant_key": _text(entry.get("variant_key")), "source_entry_id": entry_id, "source_stem": source_stem, "source_prompt_sha256": _text(entry.get("prompt_sha256")), "context": context, "source_type": source_type, "source_path": source_path, "sidecar_filename": sidecar_filename, "prompt_variant_id": prompt_variant_id, "cue_index": cue_index, "current_text": current_text, "current_text_sha256": _sha256_text(current_text), "replacement_text": "", "cleanup_notes": "", "manual_review_required": True, "issues": [], } def build_prompt_cleanup_sheet(manifest: dict[str, Any], *, variant_key: str = "") -> dict[str, Any]: entries = manifest.get("entries") if not isinstance(entries, list): raise ValueError("manifest entries must be a list") requested_variant_key = _text(variant_key) cleanup_items: list[dict[str, Any]] = [] issue_code_counts = {code: 0 for code in PROMPT_NOISE_CODES} for entry in entries: if not isinstance(entry, dict): continue entry_variant_key = _text(entry.get("variant_key")) if requested_variant_key and entry_variant_key != requested_variant_key: continue issues = _prompt_noise_issues_for_entry(entry) if not issues: continue for code, count in _prompt_noise_code_counts(issues).items(): issue_code_counts[code] += count item_map: dict[tuple[str, str, int | None], dict[str, Any]] = {} for issue in issues: context = _text(issue.get("context")) prompt_variant_id = _text(issue.get("prompt_variant_id")) raw_cue_index = issue.get("cue_index") cue_index = raw_cue_index if isinstance(raw_cue_index, int) and not isinstance(raw_cue_index, bool) else None key = (context, prompt_variant_id, cue_index) if key not in item_map: item_map[key] = _cleanup_item_for_context( manifest=manifest, entry=entry, context=context, prompt_variant_id=prompt_variant_id, cue_index=cue_index, ) item_map[key]["issues"].append(issue) for key in sorted(item_map): item = item_map[key] item["issue_count"] = len(item.get("issues") or []) cleanup_items.append(item) return { "schema": PROMPT_CLEANUP_SHEET_SCHEMA, "subject_id": _text(manifest.get("subject_id")), "variant_key": requested_variant_key, "cleanup_item_count": len(cleanup_items), "issue_count": sum(item.get("issue_count", 0) for item in cleanup_items), "issue_code_counts": issue_code_counts, "instructions": "Fill replacement_text manually with direct positive visual wording; do not use this sheet to auto-invent cues.", "cleanup_items": cleanup_items, } def validate_prompt_cleanup_sheet(sheet: dict[str, Any]) -> dict[str, Any]: errors: list[str] = [] warnings: list[str] = [] schema = _text(sheet.get("schema")) if schema and schema != PROMPT_CLEANUP_SHEET_SCHEMA: errors.append(f"schema must be {PROMPT_CLEANUP_SHEET_SCHEMA}") cleanup_items_raw = sheet.get("cleanup_items") if not isinstance(cleanup_items_raw, list): errors.append("cleanup_items must be a list") cleanup_items_raw = [] validated_item_count = 0 for item_index, item in enumerate(cleanup_items_raw): prefix = f"cleanup_items[{item_index}]" if not isinstance(item, dict): errors.append(f"{prefix} must be an object") continue validated_item_count += 1 context = _text(item.get("context")) source_type = _text(item.get("source_type")) expected_source_type = _cleanup_source_type(context) if expected_source_type == "unknown": errors.append(f"{prefix}.context is unsupported") elif source_type != expected_source_type: errors.append(f"{prefix}.source_type must be {expected_source_type}") if not _text(item.get("variant_key")): errors.append(f"{prefix}.variant_key is required") if not _text(item.get("source_stem")): errors.append(f"{prefix}.source_stem is required") source_prompt_hash = _text(item.get("source_prompt_sha256")) if not source_prompt_hash: errors.append(f"{prefix}.source_prompt_sha256 is required") current_text = _text(item.get("current_text")) if not current_text: errors.append(f"{prefix}.current_text is required") current_text_hash = _text(item.get("current_text_sha256")) if not current_text_hash: errors.append(f"{prefix}.current_text_sha256 is required") elif current_text and current_text_hash != _sha256_text(current_text): errors.append(f"{prefix}.current_text_sha256 must match current_text") if context == "baseline_prompt" and source_prompt_hash and current_text_hash and source_prompt_hash != current_text_hash: errors.append(f"{prefix}.source_prompt_sha256 must match current_text_sha256 for baseline prompt cleanup") replacement_text = _text(item.get("replacement_text")) if not replacement_text: errors.append(f"{prefix}.replacement_text is required") elif replacement_text == current_text: errors.append(f"{prefix}.replacement_text must change current_text") else: replacement_issues = _prompt_noise_issues( replacement_text, context=context or "cleanup_replacement", prompt_variant_id=_text(item.get("prompt_variant_id")), cue_index=item.get("cue_index") if isinstance(item.get("cue_index"), int) and not isinstance(item.get("cue_index"), bool) else None, ) if replacement_issues: errors.append(f"{prefix}.replacement_text still has prompt-noise issues") if context == "baseline_prompt": source_path = _text(item.get("source_path")) if not source_path: errors.append(f"{prefix}.source_path is required for baseline prompt cleanup") elif Path(source_path).suffix.lower() not in PROMPT_SUFFIXES: errors.append(f"{prefix}.source_path must reference a prompt file") elif context == "prompt_variant_text": if not _text(item.get("prompt_variant_id")): errors.append(f"{prefix}.prompt_variant_id is required for sidecar text cleanup") if not _text(item.get("sidecar_filename")): errors.append(f"{prefix}.sidecar_filename is required for sidecar text cleanup") elif context == "prompt_variant_append_cue": if not _text(item.get("prompt_variant_id")): errors.append(f"{prefix}.prompt_variant_id is required for sidecar append-cue cleanup") cue_index = item.get("cue_index") if not isinstance(cue_index, int) or isinstance(cue_index, bool) or cue_index < 0: errors.append(f"{prefix}.cue_index must be a non-negative integer") if not _text(item.get("sidecar_filename")): errors.append(f"{prefix}.sidecar_filename is required for sidecar append-cue cleanup") if not item.get("manual_review_required"): warnings.append(f"{prefix}.manual_review_required is not true") return { "schema": PROMPT_CLEANUP_VALIDATION_SCHEMA, "valid": not errors, "error_count": len(errors), "warning_count": len(warnings), "cleanup_item_count": len(cleanup_items_raw), "validated_item_count": validated_item_count, "errors": errors, "warnings": warnings, } def _path_is_under_root(path: Path, root: Path) -> bool: try: path.resolve().relative_to(root.resolve()) except ValueError: return False return True def _cleanup_target_path(item: dict[str, Any], root: Path) -> Path: context = _text(item.get("context")) if context == "baseline_prompt": path = Path(_text(item.get("source_path"))).resolve() else: sidecar_filename = _text(item.get("sidecar_filename")) if not sidecar_filename or Path(sidecar_filename).name != sidecar_filename: raise ValueError(f"sidecar filename must be a plain filename: {sidecar_filename!r}") path = (root / sidecar_filename).resolve() if not _path_is_under_root(path, root): raise ValueError(f"cleanup target must be inside {root}: {path}") return path def _replace_sidecar_prompt_variant_text(sidecar: dict[str, Any], item: dict[str, Any]) -> tuple[dict[str, Any], str]: variants = sidecar.get("prompt_variants") if not isinstance(variants, list): raise ValueError("sidecar prompt_variants must be a list") prompt_variant_id = _text(item.get("prompt_variant_id")) current_text = _text(item.get("current_text")) replacement_text = _text(item.get("replacement_text")) context = _text(item.get("context")) for variant in variants: if not isinstance(variant, dict) or _text(variant.get("id")) != prompt_variant_id: continue if context == "prompt_variant_text": actual_text = _text(variant.get("text")) if actual_text not in {current_text, replacement_text}: raise ValueError(f"sidecar variant {prompt_variant_id}.text has drifted") variant["text"] = replacement_text return sidecar, "sidecar_prompt_variant_text" if context == "prompt_variant_append_cue": cues = _string_list(variant.get("append_cues"), field=f"cleanup sidecar variant {prompt_variant_id}.append_cues") cue_index = item.get("cue_index") if not isinstance(cue_index, int) or isinstance(cue_index, bool) or cue_index < 0 or cue_index >= len(cues): raise ValueError(f"sidecar variant {prompt_variant_id}.append_cues index is out of range") if cues[cue_index] not in {current_text, replacement_text}: raise ValueError(f"sidecar variant {prompt_variant_id}.append_cues[{cue_index}] has drifted") cues[cue_index] = replacement_text variant["append_cues"] = cues return sidecar, "sidecar_prompt_variant_append_cue" raise ValueError(f"sidecar prompt variant {prompt_variant_id!r} was not found") def apply_prompt_cleanup_sheet(sheet: dict[str, Any], folder: str | Path) -> dict[str, Any]: validation = validate_prompt_cleanup_sheet(sheet) if not validation["valid"]: return { "schema": PROMPT_CLEANUP_APPLY_REPORT_SCHEMA, "applied": False, "root": str(Path(folder).resolve()), "updated_file_count": 0, "updated_files": [], "validation": validation, } root = Path(folder).resolve() if not root.is_dir(): raise FileNotFoundError(f"cleanup folder does not exist: {root}") updated_by_path: dict[str, dict[str, Any]] = {} for item in sheet.get("cleanup_items", []): if not isinstance(item, dict): continue target_path = _cleanup_target_path(item, root) context = _text(item.get("context")) current_text = _text(item.get("current_text")) replacement_text = _text(item.get("replacement_text")) if context == "baseline_prompt": actual_text = target_path.read_text(encoding="utf-8").strip() if actual_text not in {current_text, replacement_text}: raise ValueError(f"prompt file has drifted: {target_path}") target_path.write_text(replacement_text, encoding="utf-8") source_type = "prompt_file" else: sidecar = _read_json_object_if_present(target_path) sidecar, source_type = _replace_sidecar_prompt_variant_text(sidecar, item) target_path.write_text(json.dumps(sidecar, ensure_ascii=True, indent=2, sort_keys=True) + "\n", encoding="utf-8") path_key = str(target_path) if path_key not in updated_by_path: updated_by_path[path_key] = { "path": path_key, "source_type": source_type, "cleanup_item_count": 0, } updated_by_path[path_key]["cleanup_item_count"] += 1 updated_files = list(updated_by_path.values()) return { "schema": PROMPT_CLEANUP_APPLY_REPORT_SCHEMA, "applied": True, "root": str(root), "updated_file_count": len(updated_files), "updated_files": updated_files, "validation": validation, } def build_coverage_report(manifest: dict[str, Any]) -> dict[str, Any]: entries = manifest.get("entries") if not isinstance(entries, list): raise ValueError("manifest entries must be a list") report_entries: list[dict[str, Any]] = [] totals = { "baseline_only_count": 0, "needs_prompt_cleanup_count": 0, "needs_visual_score_count": 0, "ready_for_seed_selection_count": 0, "ready_for_catalog_review_count": 0, "unknown_variant_count": 0, "rejected_only_count": 0, "prompt_variant_count": 0, "seedable_variant_count": 0, "catalog_cue_candidate_count": 0, "unscored_variant_count": 0, "rejected_variant_count": 0, "prompt_noise_issue_count": 0, "prompt_noise_entry_count": 0, } for entry in entries: if not isinstance(entry, dict): continue variant_key = _text(entry.get("variant_key")) entry_id = _text(entry.get("id")) source_stem = _text(entry.get("source_stem") or entry_id) known_variant = bool(entry.get("known_variant")) prompt_text = _text(entry.get("prompt_text")) prompt_variants = [variant for variant in entry.get("prompt_variants") or [] if isinstance(variant, dict)] prompt_noise_issues = _prompt_noise_issues_for_entry(entry) prompt_noise_issue_count = len(prompt_noise_issues) prompt_noise_code_counts = _prompt_noise_code_counts(prompt_noise_issues) seedable_count = 0 catalog_cue_count = 0 unscored_count = 0 rejected_count = 0 prompt_variant_summaries: list[dict[str, Any]] = [] for variant in prompt_variants: variant_id = _text(variant.get("id")) if not variant_id: continue append_cues = _string_list(variant.get("append_cues"), field=f"coverage prompt variant {variant_id}.append_cues") tested_text = _variant_prompt_text(prompt_text, variant, field=f"coverage prompt variant {variant_id}") prompt_source = _prompt_source_for_variant( variant, variant_id=variant_id, text=tested_text, append_cues=append_cues, ) evidence = _prompt_variant_evidence(variant.get("evidence"), field=f"coverage prompt variant {variant_id}.evidence") score = _merge_known_values(_score_template(), evidence.get("score")) decision, blockers = _promotion_blockers(score) matrix_evidence = _stable_matrix_evidence_for_variant(variant, field=f"coverage prompt variant {variant_id}") if decision == "seedable_candidate" and "matrix_evidence" in variant and not matrix_evidence: decision = "rejected" blockers = ["unstable_matrix_evidence"] if decision == "seedable_candidate": seedable_count += 1 if prompt_source.get("kind") == "append_cues" and prompt_source.get("append_cues"): catalog_cue_count += 1 elif decision == "needs_visual_score": unscored_count += 1 elif decision == "rejected": rejected_count += 1 prompt_variant_summaries.append( { "prompt_variant_id": variant_id, "decision": decision, "blockers": blockers, "prompt_source_kind": prompt_source.get("kind") or "", "has_append_cues": bool(prompt_source.get("append_cues")), "has_evidence": bool(evidence), "has_matrix_evidence": "matrix_evidence" in variant, "matrix_evidence_stable": bool(matrix_evidence), } ) state, next_action = _coverage_state( known_variant=known_variant, prompt_noise_issue_count=prompt_noise_issue_count, prompt_variant_count=len(prompt_variants), seedable_count=seedable_count, catalog_cue_count=catalog_cue_count, unscored_count=unscored_count, rejected_count=rejected_count, ) totals["prompt_variant_count"] += len(prompt_variants) totals["seedable_variant_count"] += seedable_count totals["catalog_cue_candidate_count"] += catalog_cue_count totals["unscored_variant_count"] += unscored_count totals["rejected_variant_count"] += rejected_count totals["prompt_noise_issue_count"] += prompt_noise_issue_count if prompt_noise_issue_count: totals["prompt_noise_entry_count"] += 1 if state == "baseline_only": totals["baseline_only_count"] += 1 elif state == "needs_prompt_cleanup": totals["needs_prompt_cleanup_count"] += 1 elif state == "needs_visual_score": totals["needs_visual_score_count"] += 1 elif state == "ready_for_seed_selection": totals["ready_for_seed_selection_count"] += 1 elif state == "ready_for_catalog_review": totals["ready_for_catalog_review_count"] += 1 elif state == "unknown_variant": totals["unknown_variant_count"] += 1 elif state == "rejected_only": totals["rejected_only_count"] += 1 report_entries.append( { "id": entry_id, "source_stem": source_stem, "variant_key": variant_key, "known_variant": known_variant, "state": state, "next_action": next_action, "prompt_variant_count": len(prompt_variants), "seedable_variant_count": seedable_count, "catalog_cue_candidate_count": catalog_cue_count, "unscored_variant_count": unscored_count, "rejected_variant_count": rejected_count, "prompt_noise_issue_count": prompt_noise_issue_count, "prompt_noise_code_counts": prompt_noise_code_counts, "prompt_variants": prompt_variant_summaries, } ) return { "schema": COVERAGE_REPORT_SCHEMA, "subject_id": _text(manifest.get("subject_id")), "entry_count": len(report_entries), "missing_pair_count": int(manifest.get("missing_pair_count") or 0), "manifest_unknown_variant_count": int(manifest.get("unknown_variant_count") or 0), **totals, "entries": report_entries, } def build_sidecar_scaffold(manifest: dict[str, Any], *, variant_key: str = "") -> dict[str, Any]: entries = manifest.get("entries") if not isinstance(entries, list): raise ValueError("manifest entries must be a list") requested_variant_key = _text(variant_key) scaffolds: list[dict[str, Any]] = [] skipped: list[dict[str, Any]] = [] for entry in entries: if not isinstance(entry, dict): continue entry_variant_key = _text(entry.get("variant_key")) if requested_variant_key and entry_variant_key != requested_variant_key: continue entry_id = _text(entry.get("id")) source_stem = _text(entry.get("source_stem") or entry_id) prompt_variant_count = len([variant for variant in entry.get("prompt_variants") or [] if isinstance(variant, dict)]) if not bool(entry.get("known_variant")): skipped.append( { "variant_key": entry_variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "reason": "unknown_variant", } ) continue if prompt_variant_count: skipped.append( { "variant_key": entry_variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "reason": "has_prompt_variants", "prompt_variant_count": prompt_variant_count, } ) continue seed_metadata = _merge_known_values(_seed_metadata(), entry.get("seed_metadata")) cue_axes = _merge_known_values(_cue_axes(), entry.get("cue_axes")) score = _merge_known_values(_score_template(), entry.get("score")) scaffolds.append( { "variant_key": entry_variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "sidecar_filename": f"{source_stem}{SIDECAR_SUFFIX}", "source_prompt_sha256": _text(entry.get("prompt_sha256")), "prompt_path": _text(entry.get("prompt_path")), "image_path": _text(entry.get("image_path")), "sidecar_json": { "seed_metadata": seed_metadata, "cue_axes": cue_axes, "score": score, "prompt_variants": [], "notes": "Add user-authored prompt_variants here; do not add negative-conditioning fields.", }, "prompt_variant_template": { "id": "", "prompt_order": "subject_first", "append_cues": [], "reference_images": [], "cue_axes": _cue_axes(), "seed_metadata": _seed_metadata(), "notes": "", }, } ) return { "schema": SIDECAR_SCAFFOLD_SCHEMA, "subject_id": _text(manifest.get("subject_id")), "variant_key": requested_variant_key, "scaffold_count": len(scaffolds), "skipped_count": len(skipped), "scaffolds": scaffolds, "skipped": skipped, } def _has_filled_axis(values: dict[str, Any], keys: tuple[str, ...]) -> bool: return any(values.get(key) not in (None, "", [], {}) for key in keys) def build_baseline_score_update_draft(baseline_score_sheet: dict[str, Any]) -> dict[str, Any]: schema = _text(baseline_score_sheet.get("schema")) if schema and schema != BASELINE_SCORE_SHEET_SCHEMA: raise ValueError(f"baseline score sheet schema must be {BASELINE_SCORE_SHEET_SCHEMA}") entries = baseline_score_sheet.get("entries") if not isinstance(entries, list): raise ValueError("baseline score sheet entries must be a list") updates: list[dict[str, Any]] = [] skipped: list[dict[str, Any]] = [] requested_variant_key = _text(baseline_score_sheet.get("variant_key")) for index, entry in enumerate(entries): if not isinstance(entry, dict): skipped.append({"entry_index": index, "reason": "not_object"}) continue entry_id = _text(entry.get("id")) source_stem = _text(entry.get("source_stem") or entry_id) variant_key = _text(entry.get("variant_key")) skip_context = { "entry_index": index, "variant_key": variant_key, "source_entry_id": entry_id, "source_stem": source_stem, } if not source_stem: skipped.append({**skip_context, "reason": "missing_source_stem"}) continue if not bool(entry.get("known_variant")): skipped.append({**skip_context, "reason": "unknown_variant"}) continue score = _merge_known_values(_score_template(), entry.get("score")) if not _has_filled_axis(score, SCORE_KEYS): skipped.append({**skip_context, "reason": "no_score"}) continue analysis_notes = _text(entry.get("analysis_notes")) _validate_no_negative_channel(analysis_notes, field=f"baseline score entry {source_stem}.analysis_notes") score_state = _score_state(score) updates.append( { "variant_key": variant_key, "source_entry_id": entry_id, "source_stem": source_stem, "sidecar_filename": f"{source_stem}{SIDECAR_SUFFIX}", "source_prompt_sha256": _text(entry.get("prompt_sha256") or entry.get("source_prompt_sha256")), "prompt_path": _text(entry.get("prompt_path")), "image_path": _text(entry.get("image_path")), "seed_metadata": _merge_known_values(_seed_metadata(), entry.get("seed_metadata")), "cue_axes": _merge_known_values(_cue_axes(), entry.get("cue_axes")), "score": score, "score_state": score_state, "analysis_notes": analysis_notes, } ) return { "schema": BASELINE_SCORE_UPDATE_DRAFT_SCHEMA, "subject_id": _text(baseline_score_sheet.get("subject_id")), "variant_key": requested_variant_key, "update_count": len(updates), "skipped_count": len(skipped), "updates": updates, "skipped": skipped, } def validate_baseline_score_update_draft(draft: dict[str, Any]) -> dict[str, Any]: errors: list[str] = [] warnings: list[str] = [] schema = _text(draft.get("schema")) if schema and schema != BASELINE_SCORE_UPDATE_DRAFT_SCHEMA: errors.append(f"schema must be {BASELINE_SCORE_UPDATE_DRAFT_SCHEMA}") updates_raw = draft.get("updates") if not isinstance(updates_raw, list): errors.append("updates must be a list") updates_raw = [] validated_update_count = 0 for update_index, update in enumerate(updates_raw): if not isinstance(update, dict): errors.append(f"updates[{update_index}] must be an object") continue validated_update_count += 1 prefix = f"updates[{update_index}]" for forbidden in (*FORBIDDEN_PROMPT_FIELDS, "prompt_variants"): if forbidden in update: errors.append(f"{prefix} must not contain {forbidden}") variant_key = _text(update.get("variant_key")) if not variant_key: errors.append(f"{prefix}.variant_key is required") source_stem = _text(update.get("source_stem")) if not source_stem: errors.append(f"{prefix}.source_stem is required") expected_sidecar = f"{source_stem}{SIDECAR_SUFFIX}" if source_stem else "" sidecar_filename = _text(update.get("sidecar_filename")) if not sidecar_filename: errors.append(f"{prefix}.sidecar_filename is required") elif Path(sidecar_filename).name != sidecar_filename: errors.append(f"{prefix}.sidecar_filename must be a plain filename") elif expected_sidecar and sidecar_filename != expected_sidecar: errors.append(f"{prefix}.sidecar_filename must be {expected_sidecar}") if not _text(update.get("source_prompt_sha256")): errors.append(f"{prefix}.source_prompt_sha256 is required") image_path = _text(update.get("image_path")) if image_path: try: _image_path(image_path, field=f"{prefix}.image_path") except ValueError as exc: errors.append(str(exc)) score = _merge_known_values(_score_template(), update.get("score")) if not _has_filled_axis(score, SCORE_KEYS): errors.append(f"{prefix}.score must include at least one filled score") continue score_state = _score_state(score) declared_score_state = _text(update.get("score_state")) if declared_score_state and declared_score_state != score_state: errors.append(f"{prefix}.score_state must be {score_state}") if score_state == "partially_scored": warnings.append(f"{prefix}.score is partially scored") elif score_state == "scored_rejected": warnings.append(f"{prefix}.score is rejected baseline evidence") analysis_notes = _text(update.get("analysis_notes")) try: _validate_no_negative_channel(analysis_notes, field=f"{prefix}.analysis_notes") except ValueError as exc: errors.append(str(exc)) return { "schema": BASELINE_SCORE_UPDATE_VALIDATION_SCHEMA, "valid": not errors, "error_count": len(errors), "warning_count": len(warnings), "update_count": len(updates_raw), "validated_update_count": validated_update_count, "errors": errors, "warnings": warnings, } def validate_reference_cue_sidecar_author_draft(draft: dict[str, Any]) -> dict[str, Any]: errors: list[str] = [] warnings: list[str] = [] schema = _text(draft.get("schema")) if schema and schema != REFERENCE_CUE_SIDECAR_AUTHOR_DRAFT_SCHEMA: errors.append(f"schema must be {REFERENCE_CUE_SIDECAR_AUTHOR_DRAFT_SCHEMA}") updates_raw = draft.get("updates") if not isinstance(updates_raw, list): errors.append("updates must be a list") updates_raw = [] validated_variant_count = 0 for update_index, update in enumerate(updates_raw): if not isinstance(update, dict): errors.append(f"updates[{update_index}] must be an object") continue prefix = f"updates[{update_index}]" variant_key = _text(update.get("variant_key")) if not variant_key: errors.append(f"{prefix}.variant_key is required") source_stem = _text(update.get("source_stem")) if not source_stem: errors.append(f"{prefix}.source_stem is required") expected_sidecar = f"{source_stem}{SIDECAR_SUFFIX}" if source_stem else "" sidecar_filename = _text(update.get("sidecar_filename")) if not sidecar_filename: errors.append(f"{prefix}.sidecar_filename is required") elif Path(sidecar_filename).name != sidecar_filename: errors.append(f"{prefix}.sidecar_filename must be a plain filename") elif expected_sidecar and sidecar_filename != expected_sidecar: errors.append(f"{prefix}.sidecar_filename must be {expected_sidecar}") if not _text(update.get("source_prompt_sha256")): errors.append(f"{prefix}.source_prompt_sha256 is required") image_path = _text(update.get("image_path")) if image_path: try: _image_path(image_path, field=f"{prefix}.image_path") except ValueError as exc: errors.append(str(exc)) variants_raw = update.get("prompt_variants") if not isinstance(variants_raw, list) or not variants_raw: errors.append(f"{prefix}.prompt_variants must be a non-empty list") continue seen_variant_ids: set[str] = set() for variant_index, variant in enumerate(variants_raw): variant_prefix = f"{prefix}.prompt_variants[{variant_index}]" if not isinstance(variant, dict): errors.append(f"{variant_prefix} must be an object") continue validated_variant_count += 1 for forbidden in FORBIDDEN_PROMPT_FIELDS: if forbidden in variant: errors.append(f"{variant_prefix} must not contain {forbidden}") variant_id = _text(variant.get("id")) if not variant_id: errors.append(f"{variant_prefix}.id is required") elif variant_id in seen_variant_ids: errors.append(f"{variant_prefix}.id {variant_id!r} is duplicated in this sidecar author draft") seen_variant_ids.add(variant_id) if variant_id: _validate_prompt_source_identity(variant, variant_id=variant_id, prefix=variant_prefix, errors=errors) prompt_order = _text(variant.get("prompt_order") or "subject_first") if prompt_order not in PROMPT_ORDERS: errors.append(f"{variant_prefix}.prompt_order must be one of {sorted(PROMPT_ORDERS)}") text = _text(variant.get("text")) append_cues: list[str] = [] try: append_cues = _string_list(variant.get("append_cues"), field=f"{variant_prefix}.append_cues") except ValueError as exc: errors.append(str(exc)) if bool(text) == bool(append_cues): errors.append(f"{variant_prefix} must provide exactly one of text or append_cues") if text: try: _validate_no_negative_channel(text, field=f"{variant_prefix}.text") except ValueError as exc: errors.append(str(exc)) for cue_index, cue in enumerate(append_cues): prompt_noise_issues = _prompt_noise_issues( cue, context="reference_cue_sidecar_author_append_cue", prompt_variant_id=variant_id, cue_index=cue_index, ) for issue in prompt_noise_issues: errors.append( f"{variant_prefix}.append_cues[{cue_index}] prompt_noise {issue.get('code')}: {issue.get('match')}" ) reference_images = _reference_images(variant.get("reference_images"), field=f"{variant_prefix}.reference_images") if not reference_images: errors.append(f"{variant_prefix}.reference_images must include at least one canonical atlas reference") cue_axes = _merge_known_values(_cue_axes(), variant.get("cue_axes")) if not _has_filled_axis(cue_axes, CUE_AXIS_KEYS): errors.append(f"{variant_prefix}.cue_axes must include at least one filled cue axis") if not _text(variant.get("notes")): warnings.append(f"{variant_prefix}.notes is empty") return { "schema": REFERENCE_CUE_SIDECAR_AUTHOR_VALIDATION_SCHEMA, "valid": not errors, "error_count": len(errors), "warning_count": len(warnings), "update_count": len(updates_raw), "validated_variant_count": validated_variant_count, "errors": errors, "warnings": warnings, } def validate_sidecar_update_draft(draft: dict[str, Any]) -> dict[str, Any]: errors: list[str] = [] warnings: list[str] = [] schema = _text(draft.get("schema")) if schema and schema != SIDECAR_UPDATE_DRAFT_SCHEMA: errors.append(f"schema must be {SIDECAR_UPDATE_DRAFT_SCHEMA}") seed = draft.get("seed") if not isinstance(seed, int) or isinstance(seed, bool): errors.append("seed must be an integer sampler seed") updates_raw = draft.get("updates") if not isinstance(updates_raw, list): errors.append("updates must be a list") updates_raw = [] validated_variant_count = 0 for update_index, update in enumerate(updates_raw): if not isinstance(update, dict): errors.append(f"updates[{update_index}] must be an object") continue source_stem = _text(update.get("source_stem")) if not source_stem: errors.append(f"updates[{update_index}].source_stem is required") expected_sidecar = f"{source_stem}{SIDECAR_SUFFIX}" if source_stem else "" sidecar_filename = _text(update.get("sidecar_filename")) if not sidecar_filename: errors.append(f"updates[{update_index}].sidecar_filename is required") elif expected_sidecar and sidecar_filename != expected_sidecar: errors.append(f"updates[{update_index}].sidecar_filename must be {expected_sidecar}") variants_raw = update.get("prompt_variants") if not isinstance(variants_raw, list) or not variants_raw: errors.append(f"updates[{update_index}].prompt_variants must be a non-empty list") continue seen_variant_ids: set[str] = set() for variant_index, variant in enumerate(variants_raw): prefix = f"updates[{update_index}].prompt_variants[{variant_index}]" if not isinstance(variant, dict): errors.append(f"{prefix} must be an object") continue validated_variant_count += 1 for forbidden in FORBIDDEN_PROMPT_FIELDS: if forbidden in variant: errors.append(f"{prefix} must not contain {forbidden}") variant_id = _text(variant.get("id")) if not variant_id: errors.append(f"{prefix}.id is required") elif variant_id in seen_variant_ids: errors.append(f"{prefix}.id {variant_id!r} is duplicated in this sidecar update") seen_variant_ids.add(variant_id) if variant_id: _validate_prompt_source_identity(variant, variant_id=variant_id, prefix=prefix, errors=errors) prompt_order = _text(variant.get("prompt_order") or "subject_first") if prompt_order not in PROMPT_ORDERS: errors.append(f"{prefix}.prompt_order must be one of {sorted(PROMPT_ORDERS)}") text = _text(variant.get("text")) if not text: errors.append(f"{prefix}.text is required") elif NEGATIVE_OUT_CHANNEL in text: errors.append(f"{prefix}.text must not mention {NEGATIVE_OUT_CHANNEL}") cue_axes = _merge_known_values(_cue_axes(), variant.get("cue_axes")) if not _has_filled_axis(cue_axes, CUE_AXIS_KEYS): errors.append(f"{prefix}.cue_axes must include at least one filled cue axis") evidence = variant.get("evidence") if not isinstance(evidence, dict): errors.append(f"{prefix}.evidence is required") continue evidence_seed = evidence.get("seed") if not isinstance(evidence_seed, int) or isinstance(evidence_seed, bool): errors.append(f"{prefix}.evidence.seed must be an integer sampler seed") elif isinstance(seed, int) and not isinstance(seed, bool) and evidence_seed != seed: errors.append(f"{prefix}.evidence.seed {evidence_seed} does not match draft seed {seed}") try: _image_path(evidence.get("image_path"), field=f"{prefix}.evidence.image_path") except ValueError as exc: errors.append(str(exc)) score = _merge_known_values(_score_template(), evidence.get("score")) decision, blockers = _promotion_blockers(score) if decision != "seedable_candidate": for blocker in blockers: errors.append(f"{prefix}.evidence.score failed promotion gate: {blocker}") if not _text(variant.get("notes")): warnings.append(f"{prefix}.notes is empty") return { "schema": SIDECAR_UPDATE_VALIDATION_SCHEMA, "valid": not errors, "error_count": len(errors), "warning_count": len(warnings), "update_count": len(updates_raw), "validated_variant_count": validated_variant_count, "errors": errors, "warnings": warnings, } def validate_matrix_sidecar_update_draft(draft: dict[str, Any]) -> dict[str, Any]: errors: list[str] = [] warnings: list[str] = [] schema = _text(draft.get("schema")) if schema and schema != MATRIX_SIDECAR_UPDATE_DRAFT_SCHEMA: errors.append(f"schema must be {MATRIX_SIDECAR_UPDATE_DRAFT_SCHEMA}") updates_raw = draft.get("updates") if not isinstance(updates_raw, list): errors.append("updates must be a list") updates_raw = [] validated_variant_count = 0 for update_index, update in enumerate(updates_raw): if not isinstance(update, dict): errors.append(f"updates[{update_index}] must be an object") continue source_stem = _text(update.get("source_stem")) if not source_stem: errors.append(f"updates[{update_index}].source_stem is required") expected_sidecar = f"{source_stem}{SIDECAR_SUFFIX}" if source_stem else "" sidecar_filename = _text(update.get("sidecar_filename")) if not sidecar_filename: errors.append(f"updates[{update_index}].sidecar_filename is required") elif Path(sidecar_filename).name != sidecar_filename: errors.append(f"updates[{update_index}].sidecar_filename must be a plain filename") elif expected_sidecar and sidecar_filename != expected_sidecar: errors.append(f"updates[{update_index}].sidecar_filename must be {expected_sidecar}") variants_raw = update.get("prompt_variants") if not isinstance(variants_raw, list) or not variants_raw: errors.append(f"updates[{update_index}].prompt_variants must be a non-empty list") continue seen_variant_ids: set[str] = set() for variant_index, variant in enumerate(variants_raw): prefix = f"updates[{update_index}].prompt_variants[{variant_index}]" if not isinstance(variant, dict): errors.append(f"{prefix} must be an object") continue validated_variant_count += 1 for forbidden in FORBIDDEN_PROMPT_FIELDS: if forbidden in variant: errors.append(f"{prefix} must not contain {forbidden}") variant_id = _text(variant.get("id")) if not variant_id: errors.append(f"{prefix}.id is required") elif variant_id in seen_variant_ids: errors.append(f"{prefix}.id {variant_id!r} is duplicated in this sidecar update") seen_variant_ids.add(variant_id) if variant_id: _validate_prompt_source_identity(variant, variant_id=variant_id, prefix=prefix, errors=errors) prompt_order = _text(variant.get("prompt_order") or "subject_first") if prompt_order not in PROMPT_ORDERS: errors.append(f"{prefix}.prompt_order must be one of {sorted(PROMPT_ORDERS)}") text = _text(variant.get("text")) if not text: errors.append(f"{prefix}.text is required") else: try: _validate_no_negative_channel(text, field=f"{prefix}.text") except ValueError as exc: errors.append(str(exc)) cue_axes = _merge_known_values(_cue_axes(), variant.get("cue_axes")) if not _has_filled_axis(cue_axes, CUE_AXIS_KEYS): errors.append(f"{prefix}.cue_axes must include at least one filled cue axis") evidence = variant.get("evidence") evidence_seed: int | None = None evidence_image_path = "" evidence_turn: Any = None evidence_score: dict[str, Any] | None = None if not isinstance(evidence, dict): errors.append(f"{prefix}.evidence is required") else: try: evidence_seed = _int_seed(evidence.get("seed"), field=f"{prefix}.evidence.seed") except ValueError as exc: errors.append(str(exc)) evidence_turn = evidence.get("turn") if not isinstance(evidence_turn, int) or isinstance(evidence_turn, bool): errors.append(f"{prefix}.evidence.turn must be an integer") try: evidence_image_path = _image_path(evidence.get("image_path"), field=f"{prefix}.evidence.image_path") except ValueError as exc: errors.append(str(exc)) evidence_score = _merge_known_values(_score_template(), evidence.get("score")) decision, blockers = _promotion_blockers(evidence_score) if decision != "seedable_candidate": for blocker in blockers: errors.append(f"{prefix}.evidence.score failed promotion gate: {blocker}") matrix_evidence = variant.get("matrix_evidence") if not isinstance(matrix_evidence, dict): errors.append(f"{prefix}.matrix_evidence is required") continue if matrix_evidence.get("stable") is not True: errors.append(f"{prefix}.matrix_evidence.stable must be true") try: selection_seed = _int_seed(matrix_evidence.get("selection_seed"), field=f"{prefix}.matrix_evidence.selection_seed") except ValueError as exc: errors.append(str(exc)) selection_seed = None seed_slot = _text(matrix_evidence.get("seed_slot")) if seed_slot not in SEED_SELECTION_SLOT_KEYS: errors.append( f"{prefix}.matrix_evidence.seed_slot must be one of {list(SEED_SELECTION_SLOT_KEYS)} and must not be sampler_seed" ) elif selection_seed is not None: seed_metadata = _merge_known_values(_seed_metadata(), variant.get("seed_metadata")) try: seed_metadata_value = _int_seed( seed_metadata.get(seed_slot), field=f"{prefix}.seed_metadata.{seed_slot}", ) except ValueError as exc: errors.append(str(exc)) else: if seed_metadata_value != selection_seed: errors.append( f"{prefix}.seed_metadata.{seed_slot} {seed_metadata_value} " f"must match matrix_evidence.selection_seed {selection_seed}" ) sampler_seeds_raw = matrix_evidence.get("sampler_seeds") sampler_seeds: list[int] = [] if not isinstance(sampler_seeds_raw, list) or not sampler_seeds_raw: errors.append(f"{prefix}.matrix_evidence.sampler_seeds must be a non-empty list") else: seen_declared_sampler_seeds: set[int] = set() for seed_index, sampler_seed in enumerate(sampler_seeds_raw): try: declared_sampler_seed = _int_seed( sampler_seed, field=f"{prefix}.matrix_evidence.sampler_seeds[{seed_index}]", ) sampler_seeds.append(declared_sampler_seed) if declared_sampler_seed in seen_declared_sampler_seeds: errors.append( f"{prefix}.matrix_evidence.sampler_seeds value {declared_sampler_seed} is duplicated" ) seen_declared_sampler_seeds.add(declared_sampler_seed) except ValueError as exc: errors.append(str(exc)) if len(seen_declared_sampler_seeds) < MIN_STABLE_MATRIX_SAMPLER_SEEDS: errors.append( f"{prefix}.matrix_evidence.sampler_seeds must include at least " f"{MIN_STABLE_MATRIX_SAMPLER_SEEDS} unique sampler seeds" ) jobs_raw = matrix_evidence.get("jobs") if not isinstance(jobs_raw, list) or not jobs_raw: errors.append(f"{prefix}.matrix_evidence.jobs must be a non-empty list") jobs_raw = [] for count_field, expected_count in ( ("job_count", len(jobs_raw)), ("promotion_ready_count", len(jobs_raw)), ): count_value = matrix_evidence.get(count_field) if not isinstance(count_value, int) or isinstance(count_value, bool): errors.append(f"{prefix}.matrix_evidence.{count_field} must be an integer") elif count_value != expected_count: errors.append(f"{prefix}.matrix_evidence.{count_field} must equal matrix_evidence.jobs count") blocked_count = matrix_evidence.get("blocked_count") if blocked_count != 0: errors.append(f"{prefix}.matrix_evidence.blocked_count must be 0") job_sampler_seeds: list[int] = [] seen_job_ids: set[str] = set() seen_job_sampler_seeds: set[int] = set() jobs_by_sampler_seed: dict[int, dict[str, Any]] = {} for job_index, job in enumerate(jobs_raw): job_prefix = f"{prefix}.matrix_evidence.jobs[{job_index}]" if not isinstance(job, dict): errors.append(f"{job_prefix} must be an object") continue job_id = _text(job.get("id")) if not job_id: errors.append(f"{job_prefix}.id is required") elif job_id in seen_job_ids: errors.append(f"{prefix}.matrix_evidence.jobs id {job_id!r} is duplicated") seen_job_ids.add(job_id) if _text(job.get("decision")) != "seedable_candidate": errors.append(f"{job_prefix}.decision must be seedable_candidate") try: job_sampler_seed = _int_seed(job.get("sampler_seed"), field=f"{job_prefix}.sampler_seed") job_sampler_seeds.append(job_sampler_seed) if job_sampler_seed in seen_job_sampler_seeds: errors.append(f"{prefix}.matrix_evidence.jobs sampler_seed {job_sampler_seed} is duplicated") else: jobs_by_sampler_seed[job_sampler_seed] = job seen_job_sampler_seeds.add(job_sampler_seed) if sampler_seeds and job_sampler_seed not in sampler_seeds: errors.append(f"{job_prefix}.sampler_seed must be listed in matrix_evidence.sampler_seeds") except ValueError as exc: errors.append(str(exc)) try: job_selection_seed = _int_seed(job.get("selection_seed"), field=f"{job_prefix}.selection_seed") if selection_seed is not None and job_selection_seed != selection_seed: errors.append(f"{job_prefix}.selection_seed must match matrix_evidence.selection_seed") except ValueError as exc: errors.append(str(exc)) try: _image_path(job.get("image_path"), field=f"{job_prefix}.image_path") except ValueError as exc: errors.append(str(exc)) turn = job.get("turn") if not isinstance(turn, int) or isinstance(turn, bool): errors.append(f"{job_prefix}.turn must be an integer") job_score = _merge_known_values(_score_template(), job.get("score")) decision, blockers = _promotion_blockers(job_score) if decision != "seedable_candidate": for blocker in blockers: errors.append(f"{job_prefix}.score failed promotion gate: {blocker}") if sampler_seeds and sorted(set(job_sampler_seeds)) != sorted(set(sampler_seeds)): errors.append(f"{prefix}.matrix_evidence.jobs must cover every sampler seed") if evidence_seed is not None and sampler_seeds and evidence_seed not in sampler_seeds: errors.append(f"{prefix}.evidence.seed must be one of matrix_evidence.sampler_seeds") if evidence_seed is not None: representative_job = jobs_by_sampler_seed.get(evidence_seed) if representative_job is None: errors.append(f"{prefix}.evidence.seed must match a matrix_evidence.jobs sampler_seed") else: representative_prefix = f"{prefix}.matrix_evidence.jobs entry for evidence.seed {evidence_seed}" try: representative_image_path = _image_path( representative_job.get("image_path"), field=f"{representative_prefix}.image_path", ) except ValueError: representative_image_path = "" if evidence_image_path and representative_image_path and evidence_image_path != representative_image_path: errors.append(f"{prefix}.evidence.image_path must match {representative_prefix}.image_path") if evidence_turn != representative_job.get("turn"): errors.append(f"{prefix}.evidence.turn must match {representative_prefix}.turn") representative_score = _merge_known_values(_score_template(), representative_job.get("score")) if evidence_score is not None and evidence_score != representative_score: errors.append(f"{prefix}.evidence.score must match {representative_prefix}.score") if not _text(variant.get("notes")): warnings.append(f"{prefix}.notes is empty") return { "schema": MATRIX_SIDECAR_UPDATE_VALIDATION_SCHEMA, "valid": not errors, "error_count": len(errors), "warning_count": len(warnings), "update_count": len(updates_raw), "validated_variant_count": validated_variant_count, "errors": errors, "warnings": warnings, } def _read_json_object_if_present(path: Path) -> dict[str, Any]: if not path.is_file(): return {} with path.open("r", encoding="utf-8") as handle: data = json.load(handle) if not isinstance(data, dict): raise ValueError(f"{path} must contain one JSON object") return data def _validate_prompt_source_identity(variant: dict[str, Any], *, variant_id: str, prefix: str, errors: list[str]) -> None: prompt_source = variant.get("prompt_source") if prompt_source is None: return if not isinstance(prompt_source, dict): errors.append(f"{prefix}.prompt_source must be an object") return source_variant_id = _text(prompt_source.get("prompt_variant_id")) if source_variant_id and source_variant_id != variant_id: errors.append(f"{prefix}.prompt_source.prompt_variant_id {source_variant_id!r} must match id {variant_id!r}") def apply_baseline_score_update_draft(draft: dict[str, Any], folder: str | Path) -> dict[str, Any]: validation = validate_baseline_score_update_draft(draft) if not validation["valid"]: return { "schema": BASELINE_SCORE_APPLY_REPORT_SCHEMA, "applied": False, "root": str(Path(folder).resolve()), "updated_file_count": 0, "updated_files": [], "validation": validation, } root = Path(folder).resolve() if not root.is_dir(): raise FileNotFoundError(f"sidecar folder does not exist: {root}") updated_files: list[dict[str, Any]] = [] for update in draft.get("updates", []): sidecar_filename = _text(update.get("sidecar_filename")) if not sidecar_filename or Path(sidecar_filename).name != sidecar_filename: raise ValueError(f"sidecar filename must be a plain filename: {sidecar_filename!r}") sidecar_path = root / sidecar_filename sidecar = _read_json_object_if_present(sidecar_path) score = _merge_known_values(_score_template(), update.get("score")) sidecar["seed_metadata"] = _merge_known_values(_seed_metadata(), update.get("seed_metadata")) sidecar["cue_axes"] = _merge_known_values(_cue_axes(), update.get("cue_axes")) sidecar["score"] = score sidecar["baseline_score_state"] = _score_state(score) sidecar["baseline_source_prompt_sha256"] = _text(update.get("source_prompt_sha256")) sidecar["baseline_analysis_notes"] = _text(update.get("analysis_notes")) sidecar_path.write_text(json.dumps(sidecar, ensure_ascii=True, indent=2, sort_keys=True) + "\n", encoding="utf-8") updated_files.append( { "sidecar_filename": sidecar_filename, "sidecar_path": str(sidecar_path), "score_state": sidecar["baseline_score_state"], } ) return { "schema": BASELINE_SCORE_APPLY_REPORT_SCHEMA, "applied": True, "root": str(root), "updated_file_count": len(updated_files), "updated_files": updated_files, "validation": validation, } def _upsert_prompt_variants(existing: Any, incoming: list[dict[str, Any]]) -> list[dict[str, Any]]: if existing is None: variants: list[dict[str, Any]] = [] elif not isinstance(existing, list): raise ValueError("existing sidecar prompt_variants must be a list") else: variants = [] seen_existing_ids: set[str] = set() for index, item in enumerate(existing): if not isinstance(item, dict): raise ValueError(f"existing sidecar prompt_variants[{index}] must be an object") variant_id = _text(item.get("id")) if not variant_id: raise ValueError(f"existing sidecar prompt_variants[{index}].id is required") if variant_id in seen_existing_ids: raise ValueError(f"existing sidecar prompt_variants[{index}].id {variant_id!r} is duplicated") seen_existing_ids.add(variant_id) variants.append(dict(item)) index_by_id = {_text(variant.get("id")): index for index, variant in enumerate(variants)} for variant in incoming: variant_copy = dict(variant) variant_id = _text(variant_copy.get("id")) if variant_id in index_by_id: variants[index_by_id[variant_id]] = variant_copy else: index_by_id[variant_id] = len(variants) variants.append(variant_copy) return variants def _prompt_path_for_source_stem(root: Path, source_stem: str) -> Path: for suffix in (".txt", ".prompt"): path = root / f"{source_stem}{suffix}" if path.is_file(): return path raise FileNotFoundError(f"prompt file for source stem {source_stem!r} does not exist in {root}") def apply_reference_cue_sidecar_author_draft(draft: dict[str, Any], folder: str | Path) -> dict[str, Any]: validation = validate_reference_cue_sidecar_author_draft(draft) if not validation["valid"]: return { "schema": REFERENCE_CUE_SIDECAR_AUTHOR_APPLY_REPORT_SCHEMA, "applied": False, "root": str(Path(folder).resolve()), "updated_file_count": 0, "updated_files": [], "validation": validation, } root = Path(folder).resolve() if not root.is_dir(): raise FileNotFoundError(f"sidecar folder does not exist: {root}") updated_files: list[dict[str, Any]] = [] for update in draft.get("updates", []): source_stem = _text(update.get("source_stem")) source_prompt_sha256 = _text(update.get("source_prompt_sha256")) prompt_path = _prompt_path_for_source_stem(root, source_stem) actual_prompt_sha256 = _sha256_text(prompt_path.read_text(encoding="utf-8").strip()) if source_prompt_sha256 and actual_prompt_sha256 != source_prompt_sha256: raise ValueError(f"prompt file has drifted for {source_stem}: {prompt_path}") sidecar_filename = _text(update.get("sidecar_filename")) if not sidecar_filename or Path(sidecar_filename).name != sidecar_filename: raise ValueError(f"sidecar filename must be a plain filename: {sidecar_filename!r}") sidecar_path = root / sidecar_filename sidecar = _read_json_object_if_present(sidecar_path) incoming_variants = [dict(variant) for variant in update.get("prompt_variants", []) if isinstance(variant, dict)] sidecar["prompt_variants"] = _upsert_prompt_variants(sidecar.get("prompt_variants"), incoming_variants) sidecar["reference_cue_author_source_prompt_sha256"] = source_prompt_sha256 sidecar["reference_cue_author_notes"] = _text(update.get("notes")) sidecar_path.write_text(json.dumps(sidecar, ensure_ascii=True, indent=2, sort_keys=True) + "\n", encoding="utf-8") updated_files.append( { "sidecar_filename": sidecar_filename, "sidecar_path": str(sidecar_path), "prompt_variant_count": len(incoming_variants), } ) return { "schema": REFERENCE_CUE_SIDECAR_AUTHOR_APPLY_REPORT_SCHEMA, "applied": True, "root": str(root), "updated_file_count": len(updated_files), "updated_files": updated_files, "validation": validation, } def apply_sidecar_update_draft(draft: dict[str, Any], folder: str | Path) -> dict[str, Any]: validation = validate_sidecar_update_draft(draft) if not validation["valid"]: return { "schema": SIDECAR_APPLY_REPORT_SCHEMA, "applied": False, "root": str(Path(folder).resolve()), "updated_file_count": 0, "updated_files": [], "validation": validation, } root = Path(folder).resolve() if not root.is_dir(): raise FileNotFoundError(f"sidecar folder does not exist: {root}") updated_files: list[dict[str, Any]] = [] for update in draft.get("updates", []): sidecar_filename = _text(update.get("sidecar_filename")) if not sidecar_filename or Path(sidecar_filename).name != sidecar_filename: raise ValueError(f"sidecar filename must be a plain filename: {sidecar_filename!r}") sidecar_path = root / sidecar_filename sidecar = _read_json_object_if_present(sidecar_path) incoming_variants = [dict(variant) for variant in update.get("prompt_variants", []) if isinstance(variant, dict)] sidecar["prompt_variants"] = _upsert_prompt_variants(sidecar.get("prompt_variants"), incoming_variants) sidecar_path.write_text(json.dumps(sidecar, ensure_ascii=True, indent=2, sort_keys=True) + "\n", encoding="utf-8") updated_files.append( { "sidecar_filename": sidecar_filename, "sidecar_path": str(sidecar_path), "prompt_variant_count": len(incoming_variants), } ) return { "schema": SIDECAR_APPLY_REPORT_SCHEMA, "applied": True, "root": str(root), "updated_file_count": len(updated_files), "updated_files": updated_files, "validation": validation, } def apply_matrix_sidecar_update_draft(draft: dict[str, Any], folder: str | Path) -> dict[str, Any]: validation = validate_matrix_sidecar_update_draft(draft) if not validation["valid"]: return { "schema": MATRIX_SIDECAR_APPLY_REPORT_SCHEMA, "applied": False, "root": str(Path(folder).resolve()), "updated_file_count": 0, "updated_files": [], "validation": validation, } root = Path(folder).resolve() if not root.is_dir(): raise FileNotFoundError(f"sidecar folder does not exist: {root}") updated_files: list[dict[str, Any]] = [] for update in draft.get("updates", []): sidecar_filename = _text(update.get("sidecar_filename")) if not sidecar_filename or Path(sidecar_filename).name != sidecar_filename: raise ValueError(f"sidecar filename must be a plain filename: {sidecar_filename!r}") sidecar_path = root / sidecar_filename sidecar = _read_json_object_if_present(sidecar_path) incoming_variants = [dict(variant) for variant in update.get("prompt_variants", []) if isinstance(variant, dict)] sidecar["prompt_variants"] = _upsert_prompt_variants(sidecar.get("prompt_variants"), incoming_variants) sidecar_path.write_text(json.dumps(sidecar, ensure_ascii=True, indent=2, sort_keys=True) + "\n", encoding="utf-8") updated_files.append( { "sidecar_filename": sidecar_filename, "sidecar_path": str(sidecar_path), "prompt_variant_count": len(incoming_variants), } ) return { "schema": MATRIX_SIDECAR_APPLY_REPORT_SCHEMA, "applied": True, "root": str(root), "updated_file_count": len(updated_files), "updated_files": updated_files, "validation": validation, } def build_result_sheet(batch: dict[str, Any], results: dict[str, Any], *, notes: str = "") -> dict[str, Any]: seed = _int_seed(batch.get("seed"), field="batch seed") result_seed = _int_seed(results.get("seed"), field="result seed") if result_seed != seed: raise ValueError(f"result seed {result_seed} does not match batch seed {seed}") channel_in = _text(batch.get("channel_in") or DEFAULT_IN_CHANNEL) result_channel_in = _text(results.get("channel_in") or DEFAULT_IN_CHANNEL) _validate_no_negative_channel(channel_in, field="batch channel_in") _validate_no_negative_channel(result_channel_in, field="result channel_in") if result_channel_in != channel_in: raise ValueError(f"result channel_in {result_channel_in!r} does not match batch channel_in {channel_in!r}") batch_probes = _probe_list(batch.get("probes"), field="batch probes") result_probes = _probe_list(results.get("probes"), field="result probes") if len(result_probes) != len(batch_probes): raise ValueError("result probe count must match batch probe count") sheet_probes: list[dict[str, Any]] = [] for index, (batch_probe, result_probe) in enumerate(zip(batch_probes, result_probes)): probe_id = _text(batch_probe.get("id")) if not probe_id: raise ValueError(f"batch probes[{index}].id is required") result_probe_id = _text(result_probe.get("id")) if result_probe_id != probe_id: raise ValueError(f"result probes[{index}].id {result_probe_id!r} does not match batch probe id {probe_id!r}") prompt_order = _text(batch_probe.get("prompt_order") or "subject_first") result_prompt_order = _text(result_probe.get("prompt_order") or "subject_first") if prompt_order not in PROMPT_ORDERS: raise ValueError(f"batch probes[{index}].prompt_order must be one of {sorted(PROMPT_ORDERS)}") if result_prompt_order != prompt_order: raise ValueError(f"result probes[{index}].prompt_order does not match batch prompt_order {prompt_order!r}") text = _text(batch_probe.get("text")) if not text: raise ValueError(f"batch probes[{index}].text is required") _validate_no_negative_channel(text, field=f"batch probes[{index}].text") turn = result_probe.get("turn") if not isinstance(turn, int) or isinstance(turn, bool): raise ValueError(f"result probes[{index}].turn must be an integer") returned_seed = _int_seed(result_probe.get("returned_seed"), field=f"result probes[{index}].returned_seed") if returned_seed != seed: raise ValueError(f"result probes[{index}].returned_seed {returned_seed} does not match batch seed {seed}") sheet_probe = { "id": probe_id, "variant_key": _text(batch_probe.get("variant_key") or batch.get("variant_key")), "source_entry_id": _text(batch_probe.get("source_entry_id") or batch.get("source_entry_id")), "source_stem": _text(batch_probe.get("source_stem") or batch.get("source_stem") or batch_probe.get("source_entry_id")), "prompt_order": prompt_order, "text": text, "turn": turn, "image_path": _image_path(result_probe.get("image_path"), field=f"result probes[{index}].image_path"), "returned_seed": returned_seed, "cue_axes": _merge_known_values(_cue_axes(), batch_probe.get("cue_axes")), "seed_metadata": _merge_known_values(_seed_metadata(), batch_probe.get("seed_metadata")), "prompt_source": _prompt_source(batch_probe.get("prompt_source"), field=f"batch probes[{index}].prompt_source"), "selection": dict(batch_probe.get("selection")) if isinstance(batch_probe.get("selection"), dict) else {}, "score": _score_template(), "analysis_notes": "", } reference_images = _reference_images(batch_probe.get("reference_images"), field=f"batch probes[{index}].reference_images") if reference_images: sheet_probe["reference_images"] = reference_images matrix_evidence = _stable_matrix_evidence_for_variant(batch_probe, field=f"batch probes[{index}]") if matrix_evidence: sheet_probe["matrix_evidence"] = matrix_evidence sheet_probes.append(sheet_probe) return { "schema": RESULT_SHEET_SCHEMA, "seed": seed, "channel_in": channel_in, "subject_id": _text(batch.get("subject_id")), "variant_key": _text(batch.get("variant_key")), "source_entry_id": _text(batch.get("source_entry_id")), "source_stem": _text(batch.get("source_stem") or batch.get("source_entry_id")), "source_prompt_sha256": _text(batch.get("source_prompt_sha256")), "selection": dict(batch.get("selection")) if isinstance(batch.get("selection"), dict) else {}, "baseline_probe_id": sheet_probes[0]["id"], "probe_count": len(sheet_probes), "score_keys": list(SCORE_KEYS), "notes": _text(notes), "probes": sheet_probes, } def _matrix_result_jobs(results: dict[str, Any]) -> dict[str, dict[str, Any]]: jobs_raw = results.get("jobs") if not isinstance(jobs_raw, list): raise ValueError("seed matrix results jobs must be a list") jobs: dict[str, dict[str, Any]] = {} for index, job in enumerate(jobs_raw): if not isinstance(job, dict): raise ValueError(f"seed matrix results jobs[{index}] must be an object") job_id = _text(job.get("id")) if not job_id: raise ValueError(f"seed matrix results jobs[{index}].id is required") if job_id in jobs: raise ValueError(f"seed matrix results job id {job_id!r} is duplicated") job_results = job.get("results") if not isinstance(job_results, dict): raise ValueError(f"seed matrix results jobs[{index}].results must be an object") jobs[job_id] = job_results return jobs def build_seed_matrix_result_sheet(seed_matrix: dict[str, Any], results: dict[str, Any], *, notes: str = "") -> dict[str, Any]: schema = _text(seed_matrix.get("schema")) if schema and schema != SEED_MATRIX_SCHEMA: raise ValueError(f"seed matrix schema must be {SEED_MATRIX_SCHEMA}") matrix_jobs_raw = seed_matrix.get("jobs") if not isinstance(matrix_jobs_raw, list) or not matrix_jobs_raw: raise ValueError("seed matrix jobs must be a non-empty list") result_jobs_by_id = _matrix_result_jobs(results) sheet_jobs: list[dict[str, Any]] = [] seen_matrix_ids: set[str] = set() for index, job in enumerate(matrix_jobs_raw): if not isinstance(job, dict): raise ValueError(f"seed matrix jobs[{index}] must be an object") job_id = _text(job.get("id")) if not job_id: raise ValueError(f"seed matrix jobs[{index}].id is required") if job_id in seen_matrix_ids: raise ValueError(f"seed matrix jobs[{index}].id {job_id!r} is duplicated") seen_matrix_ids.add(job_id) batch = job.get("batch") if not isinstance(batch, dict): raise ValueError(f"seed matrix jobs[{index}].batch must be an object") job_results = result_jobs_by_id.get(job_id) if not isinstance(job_results, dict): raise ValueError(f"seed matrix results missing job {job_id!r}") result_sheet = build_result_sheet(batch, job_results, notes=notes) sheet_jobs.append( { "id": job_id, "variant_key": _text(job.get("variant_key") or seed_matrix.get("variant_key")), "sampler_seed": _int_seed(job.get("sampler_seed"), field=f"seed matrix jobs[{index}].sampler_seed"), "selection_seed": _int_seed(job.get("selection_seed"), field=f"seed matrix jobs[{index}].selection_seed"), "seed_slot": _text(job.get("seed_slot") or seed_matrix.get("seed_slot")), "selected": dict(job.get("selected")) if isinstance(job.get("selected"), dict) else {}, "candidate_probe": dict(job.get("candidate_probe")) if isinstance(job.get("candidate_probe"), dict) else {}, "result_sheet": result_sheet, } ) extra_ids = sorted(set(result_jobs_by_id) - seen_matrix_ids) if extra_ids: raise ValueError(f"seed matrix results contain unknown job ids: {', '.join(extra_ids)}") return { "schema": SEED_MATRIX_RESULT_SHEET_SCHEMA, "subject_id": _text(seed_matrix.get("subject_id")), "variant_key": _text(seed_matrix.get("variant_key")), "seed_slot": _text(seed_matrix.get("seed_slot")), "sampler_seeds": list(seed_matrix.get("sampler_seeds") or []), "selection_seeds": list(seed_matrix.get("selection_seeds") or []), "job_count": len(sheet_jobs), "score_keys": list(SCORE_KEYS), "notes": _text(notes), "jobs": sheet_jobs, } def build_seed_matrix_promotion_report(matrix_result_sheet: dict[str, Any]) -> dict[str, Any]: schema = _text(matrix_result_sheet.get("schema")) if schema and schema != SEED_MATRIX_RESULT_SHEET_SCHEMA: raise ValueError(f"seed matrix result sheet schema must be {SEED_MATRIX_RESULT_SHEET_SCHEMA}") jobs_raw = matrix_result_sheet.get("jobs") if not isinstance(jobs_raw, list) or not jobs_raw: raise ValueError("seed matrix result sheet jobs must be a non-empty list") expected_seed_slot = _text(matrix_result_sheet.get("seed_slot")) if expected_seed_slot and expected_seed_slot not in SEED_SELECTION_SLOT_KEYS: raise ValueError(f"seed matrix result sheet seed_slot must be one of {list(SEED_SELECTION_SLOT_KEYS)}") expected_sampler_seeds_raw = matrix_result_sheet.get("sampler_seeds") expected_sampler_seeds: list[int] = [] if isinstance(expected_sampler_seeds_raw, list): expected_sampler_seeds = [ _int_seed(seed, field=f"seed matrix result sheet sampler_seeds[{index}]") for index, seed in enumerate(expected_sampler_seeds_raw) ] if len(set(expected_sampler_seeds)) != len(expected_sampler_seeds): raise ValueError("seed matrix result sheet sampler_seeds must not contain duplicate sampler seeds") expected_selection_seeds_raw = matrix_result_sheet.get("selection_seeds") expected_selection_seeds: list[int] = [] if isinstance(expected_selection_seeds_raw, list): expected_selection_seeds = [ _int_seed(seed, field=f"seed matrix result sheet selection_seeds[{index}]") for index, seed in enumerate(expected_selection_seeds_raw) ] if len(set(expected_selection_seeds)) != len(expected_selection_seeds): raise ValueError("seed matrix result sheet selection_seeds must not contain duplicate cue seeds") report_jobs: list[dict[str, Any]] = [] groups_by_key: dict[tuple[str, int], dict[str, Any]] = {} seen_job_ids: set[str] = set() for index, job in enumerate(jobs_raw): if not isinstance(job, dict): raise ValueError(f"seed matrix result sheet jobs[{index}] must be an object") job_id = _text(job.get("id")) if not job_id: raise ValueError(f"seed matrix result sheet jobs[{index}].id is required") if job_id in seen_job_ids: raise ValueError(f"seed matrix result sheet jobs[{index}].id {job_id!r} is duplicated") seen_job_ids.add(job_id) result_sheet = job.get("result_sheet") if not isinstance(result_sheet, dict): raise ValueError(f"seed matrix result sheet jobs[{index}].result_sheet must be an object") promotion_report = build_promotion_report(result_sheet) candidates = promotion_report.get("candidates") or [] if len(candidates) != 1 or not isinstance(candidates[0], dict): raise ValueError(f"seed matrix result sheet jobs[{index}] must contain exactly one candidate") candidate = candidates[0] sampler_seed = _int_seed(job.get("sampler_seed"), field=f"seed matrix result sheet jobs[{index}].sampler_seed") if expected_sampler_seeds and sampler_seed not in expected_sampler_seeds: raise ValueError( f"seed matrix result sheet jobs[{index}].sampler_seed {sampler_seed} must be listed in sampler_seeds" ) selection_seed = _int_seed(job.get("selection_seed"), field=f"seed matrix result sheet jobs[{index}].selection_seed") if expected_selection_seeds and selection_seed not in expected_selection_seeds: raise ValueError( f"seed matrix result sheet jobs[{index}].selection_seed {selection_seed} must be listed in selection_seeds" ) seed_slot = _text(job.get("seed_slot") or expected_seed_slot) if seed_slot not in SEED_SELECTION_SLOT_KEYS: raise ValueError(f"seed matrix result sheet jobs[{index}].seed_slot must be one of {list(SEED_SELECTION_SLOT_KEYS)}") if expected_seed_slot and seed_slot != expected_seed_slot: raise ValueError( f"seed matrix result sheet jobs[{index}].seed_slot {seed_slot!r} does not match matrix seed_slot {expected_seed_slot!r}" ) selected = job.get("selected") if isinstance(job.get("selected"), dict) else {} selected_prompt_variant_id = _text(selected.get("prompt_variant_id")) candidate_prompt_variant_id = _text(candidate.get("prompt_variant_id")) if selected_prompt_variant_id and candidate_prompt_variant_id and selected_prompt_variant_id != candidate_prompt_variant_id: raise ValueError( f"seed matrix result sheet jobs[{index}].selected.prompt_variant_id {selected_prompt_variant_id!r} " f"does not match candidate prompt_variant_id {candidate_prompt_variant_id!r}" ) prompt_variant_id = _text( candidate_prompt_variant_id or selected_prompt_variant_id ) if not prompt_variant_id: raise ValueError(f"seed matrix result sheet jobs[{index}] selected prompt_variant_id is required") source_entry_id = _text(candidate.get("source_entry_id")) source_stem = _text(candidate.get("source_stem") or source_entry_id) job_variant_key = _text(job.get("variant_key") or matrix_result_sheet.get("variant_key")) candidate_variant_key = _text(candidate.get("variant_key")) if job_variant_key and candidate_variant_key and candidate_variant_key != job_variant_key: raise ValueError( f"seed matrix result sheet jobs[{index}].candidate.variant_key {candidate_variant_key!r} " f"does not match job variant_key {job_variant_key!r}" ) candidate_text = _text(candidate.get("text")) candidate_text_sha256 = _sha256_text(candidate_text) if candidate_text else "" decision = _text(candidate.get("decision")) blockers = [_text(blocker) for blocker in candidate.get("blockers") or [] if _text(blocker)] report_job = { "id": job_id, "variant_key": job_variant_key or candidate_variant_key, "source_entry_id": source_entry_id, "source_stem": source_stem, "sampler_seed": sampler_seed, "selection_seed": selection_seed, "seed_slot": seed_slot, "prompt_variant_id": prompt_variant_id, "prompt_text_sha256": candidate_text_sha256, "decision": decision, "blockers": blockers, "candidate": candidate, } report_jobs.append(report_job) group_key = (prompt_variant_id, selection_seed) group = groups_by_key.get(group_key) if group is None: group = { "variant_key": report_job["variant_key"], "source_entry_id": source_entry_id, "source_stem": source_stem, "prompt_variant_id": prompt_variant_id, "prompt_text_sha256": candidate_text_sha256, "selection_seed": selection_seed, "seed_slot": report_job["seed_slot"], "sampler_seeds": [], "job_ids": [], "job_count": 0, "promotion_ready_count": 0, "blocked_count": 0, "blockers": [], } groups_by_key[group_key] = group else: for field, value in ( ("variant_key", report_job["variant_key"]), ("source_stem", source_stem), ("source_entry_id", source_entry_id), ("prompt_text_sha256", candidate_text_sha256), ): expected_value = _text(group.get(field)) if expected_value and value and value != expected_value: label = "prompt text" if field == "prompt_text_sha256" else field raise ValueError( f"seed matrix result sheet jobs[{index}].candidate.{label} {value!r} " f"does not match group {label} {expected_value!r}" ) if sampler_seed in group["sampler_seeds"]: raise ValueError( f"seed matrix result sheet jobs[{index}].sampler_seed {sampler_seed} is duplicated in this cue group" ) group["sampler_seeds"].append(sampler_seed) group["job_ids"].append(report_job["id"]) group["job_count"] += 1 if decision == "seedable_candidate": group["promotion_ready_count"] += 1 else: group["blocked_count"] += 1 for blocker in blockers: if blocker not in group["blockers"]: group["blockers"].append(blocker) groups = [] for key in sorted(groups_by_key, key=lambda item: (item[1], item[0])): group = groups_by_key[key] group["sampler_seeds"] = sorted(group["sampler_seeds"]) group["sampler_seed_count"] = len(set(group["sampler_seeds"])) missing_sampler_seeds = sorted(set(expected_sampler_seeds) - set(group["sampler_seeds"])) if missing_sampler_seeds: group["missing_sampler_seeds"] = missing_sampler_seeds if "missing_sampler_coverage" not in group["blockers"]: group["blockers"].append("missing_sampler_coverage") insufficient_sampler_coverage = group["sampler_seed_count"] < MIN_STABLE_MATRIX_SAMPLER_SEEDS if insufficient_sampler_coverage and "insufficient_sampler_coverage" not in group["blockers"]: group["blockers"].append("insufficient_sampler_coverage") group["stable"] = ( group["job_count"] > 0 and group["blocked_count"] == 0 and not missing_sampler_seeds and not insufficient_sampler_coverage ) groups.append(group) return { "schema": SEED_MATRIX_PROMOTION_REPORT_SCHEMA, "subject_id": _text(matrix_result_sheet.get("subject_id")), "variant_key": _text(matrix_result_sheet.get("variant_key")), "seed_slot": _text(matrix_result_sheet.get("seed_slot")), "job_count": len(report_jobs), "promotion_ready_job_count": sum(1 for job in report_jobs if job["decision"] == "seedable_candidate"), "blocked_job_count": sum(1 for job in report_jobs if job["decision"] != "seedable_candidate"), "stable_group_count": sum(1 for group in groups if group.get("stable") is True), "unstable_group_count": sum(1 for group in groups if group.get("stable") is False), "required_pass_keys": list(PROMOTION_REQUIRED_PASS_KEYS), "required_progress_keys": list(PROMOTION_REQUIRED_PROGRESS_KEYS), "minimum_stable_sampler_seed_count": MIN_STABLE_MATRIX_SAMPLER_SEEDS, "jobs": report_jobs, "groups": groups, } def _load_json_object(path: str | Path, *, field: str) -> dict[str, Any]: json_path = Path(path) with json_path.open("r", encoding="utf-8") as handle: data = json.load(handle) if not isinstance(data, dict): raise ValueError(f"{field} must contain one JSON object") return data def _parse_int_csv(value: str, *, field: str) -> list[int]: text = _text(value) if not text: raise ValueError(f"{field} must contain at least one integer") items: list[int] = [] for index, part in enumerate(text.split(",")): item = part.strip() if not item: raise ValueError(f"{field}[{index}] is empty") try: parsed = int(item) except ValueError as exc: raise ValueError(f"{field}[{index}] must be an integer") from exc items.append(_int_seed(parsed, field=f"{field}[{index}]")) return items def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser(description="Build a manifest for a same-subject Krea2 atlas-refine prompt/image deck.") parser.add_argument("--folder", help="Folder containing paired .txt/.png atlas refine artifacts.") parser.add_argument("--subject-id", default="", help="Stable subject id for this reference deck.") parser.add_argument("--indent", type=int, default=2, help="JSON indentation level.") parser.add_argument("--print-manifest", action="store_true", help="Print the atlas refine manifest explicitly.") parser.add_argument("--print-batch", action="store_true", help="Print an sxcp_prompt_batch-compatible probe batch instead of the manifest.") parser.add_argument("--print-seed-selection", action="store_true", help="Print a deterministic seed-selected prompt variant from a manifest.") parser.add_argument("--print-seed-selected-batch", action="store_true", help="Print an sxcp prompt batch containing baseline and a deterministic seed-selected candidate.") parser.add_argument("--print-seed-matrix", action="store_true", help="Print seed-selected batches for every sampler/cue seed pair.") parser.add_argument("--print-seed-matrix-result-sheet", action="store_true", help="Print visual scoring sheets for completed seed-matrix jobs.") parser.add_argument("--print-seed-matrix-promotion-report", action="store_true", help="Print stability/promotion gates from a scored seed-matrix result sheet.") parser.add_argument("--print-matrix-sidecar-update-draft", action="store_true", help="Print sidecar prompt-variant updates from stable seed-matrix groups.") parser.add_argument("--print-catalog-cue-draft", action="store_true", help="Print review-only catalog prompt_variant_cues candidates from seedable append-cue sidecars.") parser.add_argument("--print-reference-pool-report", action="store_true", help="Print canonical/supplemental atlas reference-pool coverage for cue expansion.") parser.add_argument("--print-reference-cue-review-sheet", action="store_true", help="Print blank atlas reference cue-labeling slots for prompt-variant review.") parser.add_argument("--print-reference-cue-candidate-draft", action="store_true", help="Print sidecar-ready prompt-variant candidates from a filled reference cue-review sheet.") parser.add_argument("--print-reference-cue-sidecar-author-draft", action="store_true", help="Print same-stem sidecar authoring updates from reviewed reference cue candidates.") parser.add_argument("--validate-reference-cue-sidecar-author-draft", action="store_true", help="Validate pre-test reference cue sidecar authoring updates without writing sidecars.") parser.add_argument("--apply-reference-cue-sidecar-author-draft", action="store_true", help="Apply pre-test reference cue sidecar authoring updates to a folder.") parser.add_argument("--print-coverage-report", action="store_true", help="Print atlas refine readiness coverage by variant.") parser.add_argument("--print-sidecar-scaffold", action="store_true", help="Print review-only same-stem sidecar JSON scaffolds for known baseline-only entries.") parser.add_argument("--print-baseline-score-sheet", action="store_true", help="Print baseline image/prompt scoring slots for manifest entries.") parser.add_argument("--print-prompt-noise-report", action="store_true", help="Print read-only option/meta/negative prompt-noise findings for atlas prompts.") parser.add_argument("--print-prompt-cleanup-sheet", action="store_true", help="Print manual cleanup slots for prompt-noise findings.") parser.add_argument("--validate-prompt-cleanup-sheet", action="store_true", help="Validate manually filled prompt cleanup replacements without writing files.") parser.add_argument("--apply-prompt-cleanup-sheet", action="store_true", help="Apply validated prompt cleanup replacements to prompt files or sidecars.") parser.add_argument("--print-baseline-score-update-draft", action="store_true", help="Print sidecar baseline score updates from a manually scored baseline sheet.") parser.add_argument("--validate-baseline-score-update-draft", action="store_true", help="Validate baseline score sidecar updates without writing files.") parser.add_argument("--apply-baseline-score-update-draft", action="store_true", help="Apply baseline score sidecar updates to a folder.") parser.add_argument("--variant-key", default="", help="Variant key to export when --print-batch is set.") parser.add_argument("--reference-pool-folder", action="append", default=[], help="Supplemental atlas-root-relative folder for --print-reference-pool-report. Can be repeated.") parser.add_argument("--sampler-seed", type=int, default=None, help="Override sampler seed for --print-batch.") parser.add_argument("--selection-seed", type=int, default=None, help="Cue seed for --print-seed-selection.") parser.add_argument("--sampler-seeds", default="", help="Comma-separated sampler seeds for --print-seed-matrix.") parser.add_argument("--selection-seeds", default="", help="Comma-separated cue seeds for --print-seed-matrix.") parser.add_argument("--seed-slot", default="atlas_cue_seed", help="Seed slot label for --print-seed-selection.") parser.add_argument("--print-result-sheet", action="store_true", help="Print a visual scoring sheet from a batch JSON and result JSON.") parser.add_argument("--print-promotion-report", action="store_true", help="Print conservative seedable-candidate gates from a scored result sheet.") parser.add_argument("--print-sidecar-update-draft", action="store_true", help="Print reviewable sidecar prompt_variants from a promotion report.") parser.add_argument("--validate-sidecar-update-draft", action="store_true", help="Validate a sidecar update draft without writing sidecar files.") parser.add_argument("--apply-sidecar-update-draft", action="store_true", help="Apply a validated sidecar update draft to a folder.") parser.add_argument("--validate-matrix-sidecar-update-draft", action="store_true", help="Validate a matrix sidecar update draft without writing sidecar files.") parser.add_argument("--apply-matrix-sidecar-update-draft", action="store_true", help="Apply a validated matrix sidecar update draft to a folder.") parser.add_argument("--batch-json", default="", help="Prompt batch JSON path for --print-result-sheet.") parser.add_argument("--result-json", default="", help="Result JSON path for --print-result-sheet.") parser.add_argument("--seed-matrix-json", default="", help="Seed matrix JSON path for --print-seed-matrix-result-sheet.") parser.add_argument("--seed-matrix-results-json", default="", help="Seed matrix results JSON path for --print-seed-matrix-result-sheet.") parser.add_argument("--seed-matrix-result-sheet-json", default="", help="Scored seed matrix result sheet JSON path for --print-seed-matrix-promotion-report.") parser.add_argument("--seed-matrix-promotion-report-json", default="", help="Seed matrix promotion report JSON path for --print-matrix-sidecar-update-draft.") parser.add_argument("--result-sheet-json", default="", help="Scored result sheet JSON path for --print-promotion-report.") parser.add_argument("--promotion-report-json", default="", help="Promotion report JSON path for --print-sidecar-update-draft.") parser.add_argument("--sidecar-update-draft-json", default="", help="Sidecar update draft JSON path for --validate-sidecar-update-draft.") parser.add_argument("--matrix-sidecar-update-draft-json", default="", help="Matrix sidecar update draft JSON path for validation or apply.") parser.add_argument("--baseline-score-sheet-json", default="", help="Baseline score sheet JSON path for --print-baseline-score-update-draft.") parser.add_argument("--baseline-score-update-draft-json", default="", help="Baseline score update draft JSON path for validation or apply.") parser.add_argument("--prompt-cleanup-sheet-json", default="", help="Prompt cleanup sheet JSON path for validation or apply.") parser.add_argument("--reference-cue-review-sheet-json", default="", help="Filled reference cue-review sheet JSON path for --print-reference-cue-candidate-draft.") parser.add_argument("--reference-cue-candidate-draft-json", default="", help="Reference cue candidate draft JSON path for --print-reference-cue-sidecar-author-draft.") parser.add_argument("--reference-cue-sidecar-author-draft-json", default="", help="Reference cue sidecar author draft JSON path for validation or apply.") parser.add_argument("--notes", default="", help="Notes to include in --print-result-sheet output.") args = parser.parse_args(argv) if args.apply_reference_cue_sidecar_author_draft: if not args.reference_cue_sidecar_author_draft_json or not args.folder: parser.error("--reference-cue-sidecar-author-draft-json and --folder are required with --apply-reference-cue-sidecar-author-draft") reference_cue_sidecar_author_draft = _load_json_object( args.reference_cue_sidecar_author_draft_json, field="reference-cue-sidecar-author-draft-json", ) payload = apply_reference_cue_sidecar_author_draft(reference_cue_sidecar_author_draft, args.folder) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if payload["applied"] else 1 if args.validate_reference_cue_sidecar_author_draft: if not args.reference_cue_sidecar_author_draft_json: parser.error("--reference-cue-sidecar-author-draft-json is required with --validate-reference-cue-sidecar-author-draft") reference_cue_sidecar_author_draft = _load_json_object( args.reference_cue_sidecar_author_draft_json, field="reference-cue-sidecar-author-draft-json", ) payload = validate_reference_cue_sidecar_author_draft(reference_cue_sidecar_author_draft) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if payload["valid"] else 1 if args.apply_prompt_cleanup_sheet: if not args.prompt_cleanup_sheet_json or not args.folder: parser.error("--prompt-cleanup-sheet-json and --folder are required with --apply-prompt-cleanup-sheet") prompt_cleanup_sheet = _load_json_object(args.prompt_cleanup_sheet_json, field="prompt-cleanup-sheet-json") payload = apply_prompt_cleanup_sheet(prompt_cleanup_sheet, args.folder) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if payload["applied"] else 1 if args.validate_prompt_cleanup_sheet: if not args.prompt_cleanup_sheet_json: parser.error("--prompt-cleanup-sheet-json is required with --validate-prompt-cleanup-sheet") prompt_cleanup_sheet = _load_json_object(args.prompt_cleanup_sheet_json, field="prompt-cleanup-sheet-json") payload = validate_prompt_cleanup_sheet(prompt_cleanup_sheet) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if payload["valid"] else 1 if args.apply_baseline_score_update_draft: if not args.baseline_score_update_draft_json or not args.folder: parser.error("--baseline-score-update-draft-json and --folder are required with --apply-baseline-score-update-draft") baseline_score_update_draft = _load_json_object(args.baseline_score_update_draft_json, field="baseline-score-update-draft-json") payload = apply_baseline_score_update_draft(baseline_score_update_draft, args.folder) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if payload["applied"] else 1 if args.validate_baseline_score_update_draft: if not args.baseline_score_update_draft_json: parser.error("--baseline-score-update-draft-json is required with --validate-baseline-score-update-draft") baseline_score_update_draft = _load_json_object(args.baseline_score_update_draft_json, field="baseline-score-update-draft-json") payload = validate_baseline_score_update_draft(baseline_score_update_draft) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if payload["valid"] else 1 if args.print_baseline_score_update_draft: if not args.baseline_score_sheet_json: parser.error("--baseline-score-sheet-json is required with --print-baseline-score-update-draft") baseline_score_sheet = _load_json_object(args.baseline_score_sheet_json, field="baseline-score-sheet-json") payload = build_baseline_score_update_draft(baseline_score_sheet) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if args.apply_matrix_sidecar_update_draft: if not args.matrix_sidecar_update_draft_json or not args.folder: parser.error("--matrix-sidecar-update-draft-json and --folder are required with --apply-matrix-sidecar-update-draft") matrix_sidecar_update_draft = _load_json_object( args.matrix_sidecar_update_draft_json, field="matrix-sidecar-update-draft-json", ) payload = apply_matrix_sidecar_update_draft(matrix_sidecar_update_draft, args.folder) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if payload["applied"] else 1 if args.validate_matrix_sidecar_update_draft: if not args.matrix_sidecar_update_draft_json: parser.error("--matrix-sidecar-update-draft-json is required with --validate-matrix-sidecar-update-draft") matrix_sidecar_update_draft = _load_json_object( args.matrix_sidecar_update_draft_json, field="matrix-sidecar-update-draft-json", ) payload = validate_matrix_sidecar_update_draft(matrix_sidecar_update_draft) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if payload["valid"] else 1 if args.apply_sidecar_update_draft: if not args.sidecar_update_draft_json or not args.folder: parser.error("--sidecar-update-draft-json and --folder are required with --apply-sidecar-update-draft") sidecar_update_draft = _load_json_object(args.sidecar_update_draft_json, field="sidecar-update-draft-json") payload = apply_sidecar_update_draft(sidecar_update_draft, args.folder) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if payload["applied"] else 1 if args.validate_sidecar_update_draft: if not args.sidecar_update_draft_json: parser.error("--sidecar-update-draft-json is required with --validate-sidecar-update-draft") sidecar_update_draft = _load_json_object(args.sidecar_update_draft_json, field="sidecar-update-draft-json") payload = validate_sidecar_update_draft(sidecar_update_draft) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if payload["valid"] else 1 if args.print_sidecar_update_draft: if not args.promotion_report_json: parser.error("--promotion-report-json is required with --print-sidecar-update-draft") promotion_report = _load_json_object(args.promotion_report_json, field="promotion-report-json") payload = build_sidecar_update_draft(promotion_report) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if args.print_promotion_report: if not args.result_sheet_json: parser.error("--result-sheet-json is required with --print-promotion-report") result_sheet = _load_json_object(args.result_sheet_json, field="result-sheet-json") payload = build_promotion_report(result_sheet) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if args.print_result_sheet: if not args.batch_json or not args.result_json: parser.error("--batch-json and --result-json are required with --print-result-sheet") batch = _load_json_object(args.batch_json, field="batch-json") results = _load_json_object(args.result_json, field="result-json") payload = build_result_sheet(batch, results, notes=args.notes) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if args.print_seed_matrix_result_sheet: if not args.seed_matrix_json or not args.seed_matrix_results_json: parser.error("--seed-matrix-json and --seed-matrix-results-json are required with --print-seed-matrix-result-sheet") seed_matrix = _load_json_object(args.seed_matrix_json, field="seed-matrix-json") seed_matrix_results = _load_json_object(args.seed_matrix_results_json, field="seed-matrix-results-json") payload = build_seed_matrix_result_sheet(seed_matrix, seed_matrix_results, notes=args.notes) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if args.print_seed_matrix_promotion_report: if not args.seed_matrix_result_sheet_json: parser.error("--seed-matrix-result-sheet-json is required with --print-seed-matrix-promotion-report") seed_matrix_result_sheet = _load_json_object(args.seed_matrix_result_sheet_json, field="seed-matrix-result-sheet-json") payload = build_seed_matrix_promotion_report(seed_matrix_result_sheet) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if args.print_matrix_sidecar_update_draft: if not args.seed_matrix_promotion_report_json: parser.error("--seed-matrix-promotion-report-json is required with --print-matrix-sidecar-update-draft") seed_matrix_promotion_report = _load_json_object(args.seed_matrix_promotion_report_json, field="seed-matrix-promotion-report-json") payload = build_matrix_sidecar_update_draft(seed_matrix_promotion_report) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if args.print_reference_pool_report: if not args.variant_key: parser.error("--variant-key is required with --print-reference-pool-report") payload = build_reference_pool_report( args.variant_key, supplemental_folders=list(args.reference_pool_folder or []), ) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if args.print_reference_cue_review_sheet: if not args.variant_key: parser.error("--variant-key is required with --print-reference-cue-review-sheet") payload = build_reference_cue_review_sheet( args.variant_key, supplemental_folders=list(args.reference_pool_folder or []), ) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if args.print_reference_cue_candidate_draft: if not args.reference_cue_review_sheet_json: parser.error("--reference-cue-review-sheet-json is required with --print-reference-cue-candidate-draft") reference_cue_review_sheet = _load_json_object( args.reference_cue_review_sheet_json, field="reference-cue-review-sheet-json", ) payload = build_reference_cue_candidate_draft(reference_cue_review_sheet) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if not args.folder: parser.error("--folder is required unless a JSON-only output mode is set") manifest = build_manifest(args.folder, subject_id=args.subject_id) payload = manifest if args.print_seed_selection: if not args.variant_key: parser.error("--variant-key is required with --print-seed-selection") if args.selection_seed is None: parser.error("--selection-seed is required with --print-seed-selection") payload = select_seeded_prompt_variant( manifest, args.variant_key, selection_seed=args.selection_seed, seed_slot=args.seed_slot, ) elif args.print_seed_selected_batch: if not args.variant_key: parser.error("--variant-key is required with --print-seed-selected-batch") if args.selection_seed is None or args.sampler_seed is None: parser.error("--selection-seed and --sampler-seed are required with --print-seed-selected-batch") payload = build_seed_selected_prompt_batch( manifest, args.variant_key, selection_seed=args.selection_seed, sampler_seed=args.sampler_seed, seed_slot=args.seed_slot, ) elif args.print_seed_matrix: if not args.variant_key: parser.error("--variant-key is required with --print-seed-matrix") if not args.selection_seeds or not args.sampler_seeds: parser.error("--selection-seeds and --sampler-seeds are required with --print-seed-matrix") payload = build_seed_matrix( manifest, args.variant_key, selection_seeds=_parse_int_csv(args.selection_seeds, field="selection-seeds"), sampler_seeds=_parse_int_csv(args.sampler_seeds, field="sampler-seeds"), seed_slot=args.seed_slot, ) elif args.print_reference_cue_sidecar_author_draft: if not args.reference_cue_candidate_draft_json: parser.error("--reference-cue-candidate-draft-json is required with --print-reference-cue-sidecar-author-draft") reference_cue_candidate_draft = _load_json_object( args.reference_cue_candidate_draft_json, field="reference-cue-candidate-draft-json", ) payload = build_reference_cue_sidecar_author_draft( manifest, reference_cue_candidate_draft, variant_key=args.variant_key, ) elif args.print_catalog_cue_draft: payload = build_catalog_cue_draft(manifest, variant_key=args.variant_key) elif args.print_coverage_report: payload = build_coverage_report(manifest) elif args.print_sidecar_scaffold: payload = build_sidecar_scaffold(manifest, variant_key=args.variant_key) elif args.print_baseline_score_sheet: payload = build_baseline_score_sheet(manifest, variant_key=args.variant_key) elif args.print_prompt_noise_report: payload = build_prompt_noise_report(manifest, variant_key=args.variant_key) elif args.print_prompt_cleanup_sheet: payload = build_prompt_cleanup_sheet(manifest, variant_key=args.variant_key) elif args.print_batch: if not args.variant_key: parser.error("--variant-key is required with --print-batch") payload = build_prompt_batch(manifest, args.variant_key, sampler_seed=args.sampler_seed) print(json.dumps(payload, ensure_ascii=True, indent=args.indent, sort_keys=True)) return 0 if __name__ == "__main__": raise SystemExit(main())