feat: audio area length — remove the upper cap + step by 1s

The audio extract length is meant for visualizing/grabbing sequences that can run minutes long, but the control capped it and stepped in fiddly 0.10s increments. Raise the range to effectively unlimited (24h; ffmpeg stops cleanly at end-of-file if the source is shorter) and make the arrows step 1s — typing still allows sub-second precision. Widen the field for the larger values. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
fix: audio extract honored a silent length clamp — 30s near the end became 3s
2026-07-02 01:11:57 +02:00 · 2026-07-02 00:07:35 +02:00 · 2026-07-01 23:48:24 +02:00 · 2026-06-19 14:19:20 +02:00 · 2026-06-19 13:54:32 +02:00 · 2026-06-18 15:35:46 +02:00
36 changed files with 9226 additions and 799 deletions
@@ -3,6 +3,7 @@
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 ENV_NAME="8cut"
 CONDA_PREFIX_BASE="/media/p5/miniforge3"
+export LD_PRELOAD=/usr/lib/libstdc++.so.6

 # 1. Try .venv in project dir
 if [ -f "$SCRIPT_DIR/.venv/bin/activate" ]; then
@@ -30,6 +30,11 @@ mpv_dir = Path(os.environ.get("MPV_DIR", base))

 datas = []

+# Bundled assets (icons, logo) — must exist at runtime under sys._MEIPASS/assets
+assets_dir = base / "assets"
+if assets_dir.exists():
+    datas.append((str(assets_dir), "assets"))
+
 # YOLOv8 model (optional — large, skip if missing)
 yolo = base / "yolov8n.pt"
 if yolo.exists():
@@ -61,6 +61,16 @@ All clips are exactly 8 seconds — the standard length for foley sound datasets
 - **Subprofiles** — lightweight export folder variants for multiple output targets
 - **Review mode** — clean timeline view for navigating scan results without export clutter

+### Interface
+
+- **Menu bar** — File / Edit / Scan / View / Help hold the occasional actions (open files, train, scan all, profiles); the profile selector and shortcuts (`?`) sit in the top-right corner
+- **Control deck** — a compact tabbed panel under the video groups the settings into **Export** (label, name, folder, format, resize, duration/clips/spread, workers), **Crop & Track**, and **Scan** (model, threshold, fuse, scan/auto/speech/review)
+- **Side-by-side panels** — pin deck panels to view them as resizable columns: right-click a deck tab → *Show side-by-side*, or toggle them under *View ▸ Side-by-side panels*; drag the dividers to reallocate space, and the layout persists between sessions
+- **Per-tab export folder** — each file-list tab remembers its own output folder; switching tabs follows that tab's folder, and a guardrail warns when the loaded video doesn't match the destination
+- **Duplicate tab** — right-click a file-list tab → *Duplicate tab* to clone its files into a new tab with its own export folder
+- **LTX-2 export mode** — per-tab **Foley | LTX-2** toggle (right-click a tab, shown with an `[LTX2]` badge): LTX-2 clips are frame-exact (`frames % 8 == 1`), forced to 25 fps, and center-cropped so width & height are divisible by 32 — for LTX-2 video-to-audio datasets; applies to manual, re-export, and auto-export
+- **Status bar** — export/scan progress and messages, with the current file · profile · worker count always shown
+
 ## Keyboard shortcuts

 | Key | Action |
@@ -0,0 +1,14 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64">
+  <defs>
+    <linearGradient id="g8" x1="0" y1="0" x2="0" y2="1">
+      <stop offset="0%" stop-color="#ffd230"/>
+      <stop offset="100%" stop-color="#e6a800"/>
+    </linearGradient>
+  </defs>
+  <rect width="64" height="64" rx="13" fill="#161616"/>
+  <rect x="8" y="42" width="48" height="11" rx="2" fill="#2a2a2a" stroke="#333" stroke-width="1"/>
+  <rect x="26" y="42" width="16" height="11" fill="#3c82dc" fill-opacity="0.45"/>
+  <line x1="26" y1="38" x2="26" y2="55" stroke="#ffd230" stroke-width="2"/>
+  <polygon points="22,38 30,38 26,44" fill="#ffd230"/>
+  <text x="32" y="33" font-family="'Helvetica Neue',Helvetica,Arial,sans-serif" font-size="34" font-weight="bold" fill="url(#g8)" text-anchor="middle">8</text>
+</svg>
@@ -0,0 +1,6 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none">
+  <path d="M7.5 10 V7.5 a4.5 4.5 0 0 1 9 0 V10" stroke="#ffd230" stroke-width="2"/>
+  <rect x="5" y="10" width="14" height="10" rx="2" fill="#ffd230"/>
+  <circle cx="12" cy="14.3" r="1.4" fill="#161616"/>
+  <rect x="11.2" y="14.3" width="1.6" height="3.4" rx="0.8" fill="#161616"/>
+</svg>
@@ -0,0 +1,6 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none">
+  <path d="M7.5 10 V7.5 a4.5 4.5 0 0 1 8.6 -1.8" stroke="#8a8a8a" stroke-width="2"/>
+  <rect x="5" y="10" width="14" height="10" rx="2" fill="#8a8a8a"/>
+  <circle cx="12" cy="14.3" r="1.4" fill="#1e1e1e"/>
+  <rect x="11.2" y="14.3" width="1.6" height="3.4" rx="0.8" fill="#1e1e1e"/>
+</svg>
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
+  <rect x="6.5" y="5" width="4" height="14" rx="1.2" fill="#ffd230"/>
+  <rect x="13.5" y="5" width="4" height="14" rx="1.2" fill="#ffd230"/>
+</svg>
@@ -0,0 +1,3 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24">
+  <path d="M7 5 L19 12 L7 19 Z" fill="#ffd230" stroke="#ffd230" stroke-width="1.5" stroke-linejoin="round"/>
+</svg>
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="#aad4ff" stroke-width="2" stroke-linecap="round">
+  <circle cx="10.5" cy="10.5" r="6"/>
+  <line x1="15" y1="15" x2="20" y2="20"/>
+</svg>
@@ -0,0 +1,6 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="#ffd230" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+  <circle cx="6.5" cy="6.5" r="2.6"/>
+  <circle cx="6.5" cy="17.5" r="2.6"/>
+  <line x1="8.8" y1="8" x2="20" y2="17"/>
+  <line x1="8.8" y1="16" x2="20" y2="7"/>
+</svg>
@@ -0,0 +1,4 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="none" stroke="#ffd230" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+  <polyline points="4,17 10,11 14,14 20,6"/>
+  <polyline points="15,6 20,6 20,11"/>
+</svg>
@@ -1,2 +1,6 @@
 import sys, os
 sys.path.insert(0, os.path.dirname(__file__))
+
+
+def pytest_configure(config):
+    config.addinivalue_line("markers", "gui: constructs Qt widgets; needs a display")
@@ -45,6 +45,7 @@ os.environ.setdefault("HF_HOME", os.path.join(_DL_CACHE_DIR, "huggingface"))
 _w2v_model = None
 _w2v_device = None
 _w2v_model_name = None
+_ast_feature_extractor = None

 # Supported embedding models — name → embed_dim
 _EMBED_MODELS = {
@@ -55,8 +56,18 @@ _EMBED_MODELS = {
    "HUBERT_LARGE":        1024,
    "HUBERT_XLARGE":       1280,
    "BEATS":               768,
+    # Multi-layer variants (4 quartile layers concatenated)
+    "WAV2VEC2_BASE_ML":   3072,   # 768 * 4
+    "HUBERT_BASE_ML":     3072,   # 768 * 4
+    "HUBERT_LARGE_ML":    4096,   # 1024 * 4
+    "HUBERT_XLARGE_ML":   5120,   # 1280 * 4
+    # Transformers-based models
+    "AST":                 768,
+    "AST_ML":             3072,   # 768 * 4
+    "EAT":                 768,
+    "EAT_LARGE":          1024,
 }
-_DEFAULT_EMBED_MODEL = "WAV2VEC2_BASE"
+_DEFAULT_EMBED_MODEL = "EAT_LARGE"

 _BEATS_CHECKPOINT = os.path.join(
    _DL_CACHE_DIR, "huggingface", "hub",
@@ -70,11 +81,14 @@ def _get_w2v_model(model_name: str | None = None):
    global _w2v_model, _w2v_device, _w2v_model_name
    if model_name is None:
        model_name = _DEFAULT_EMBED_MODEL
-    if _w2v_model is None or _w2v_model_name != model_name:
+    # Multi-layer variants use the same base model weights
+    ml = _ml_config(model_name)
+    load_name = ml[0] if ml else model_name
+    if _w2v_model is None or _w2v_model_name != load_name:
        import torch
        _w2v_device = "cuda" if torch.cuda.is_available() else "cpu"

-        if model_name == "BEATS":
+        if load_name == "BEATS":
            from .beats_model import BEATs, BEATsConfig
            checkpoint = torch.load(_BEATS_CHECKPOINT, map_location=_w2v_device,
                                    weights_only=False)
@@ -82,17 +96,63 @@ def _get_w2v_model(model_name: str | None = None):
            _w2v_model = BEATs(cfg)
            _w2v_model.load_state_dict(checkpoint['model'])
            _w2v_model.to(_w2v_device)
+        elif load_name == "AST":
+            from transformers import ASTModel, ASTFeatureExtractor
+            _w2v_model = ASTModel.from_pretrained(
+                "MIT/ast-finetuned-audioset-10-10-0.4593"
+            ).to(_w2v_device)
+            global _ast_feature_extractor
+            _ast_feature_extractor = ASTFeatureExtractor.from_pretrained(
+                "MIT/ast-finetuned-audioset-10-10-0.4593"
+            )
+        elif load_name in ("EAT", "EAT_LARGE"):
+            from transformers import AutoModel
+            eat_repo = ("worstchan/EAT-large_epoch20_finetune_AS2M"
+                        if load_name == "EAT_LARGE"
+                        else "worstchan/EAT-base_epoch30_finetune_AS2M")
+            _w2v_model = AutoModel.from_pretrained(
+                eat_repo, trust_remote_code=True,
+            ).to(_w2v_device)
        else:
            import torchaudio
-            bundle = getattr(torchaudio.pipelines, model_name)
+            bundle = getattr(torchaudio.pipelines, load_name)
            _w2v_model = bundle.get_model().to(_w2v_device)

        _w2v_model.eval()
-        _w2v_model_name = model_name
-        _log(f"audio_scan: {model_name} loaded on {_w2v_device}")
+        _w2v_model_name = load_name
+        _log(f"audio_scan: {load_name} loaded on {_w2v_device}")
    return _w2v_model, _w2v_device


+def _eat_preprocess(chunks: list[np.ndarray], sr: int, device: str):
+    """Convert raw audio chunks to EAT mel spectrogram input.
+
+    Returns tensor of shape [B, 1, T, 128].
+    8s audio at 10ms frame shift produces ~798 frames, zero-padded to 1024.
+    """
+    import torch
+    import torchaudio.compliance.kaldi as kaldi
+
+    TARGET_LEN = 1024
+    MEAN, STD = -4.268, 4.569
+
+    mels = []
+    for chunk in chunks:
+        wav = torch.from_numpy(np.array(chunk)).unsqueeze(0).float()
+        fbank = kaldi.fbank(
+            wav, htk_compat=True, sample_frequency=sr, use_energy=False,
+            window_type='hanning', num_mel_bins=128, dither=0.0, frame_shift=10,
+        )
+        # Pad or truncate to TARGET_LEN
+        if fbank.shape[0] < TARGET_LEN:
+            fbank = torch.nn.functional.pad(fbank, (0, 0, 0, TARGET_LEN - fbank.shape[0]))
+        else:
+            fbank = fbank[:TARGET_LEN]
+        fbank = (fbank - MEAN) / (STD * 2)
+        mels.append(fbank)
+    return torch.stack(mels).unsqueeze(1).to(device)  # [B, 1, T, 128]
+
+
 def _embed_dim(model_name: str | None = None) -> int:
    """Return embedding dimension for a model name."""
    if model_name is None:
@@ -100,6 +160,31 @@ def _embed_dim(model_name: str | None = None) -> int:
    return _EMBED_MODELS.get(model_name, 768)


+def _ml_config(model_name: str) -> tuple[str, list[int]] | None:
+    """If model_name is a multi-layer variant, return (base_model, layer_indices).
+
+    Returns None for single-layer models.
+    Layer indices are 0-based into the list returned by extract_features().
+    """
+    if not model_name.endswith("_ML"):
+        return None
+    base = model_name[:-3]  # strip "_ML"
+    if base not in _EMBED_MODELS:
+        return None
+    # Layer counts per model family
+    layer_counts = {
+        "WAV2VEC2_BASE": 12, "WAV2VEC2_LARGE": 24, "WAV2VEC2_LARGE_LV60K": 24,
+        "HUBERT_BASE": 12, "HUBERT_LARGE": 24, "HUBERT_XLARGE": 48,
+        "AST": 12,
+    }
+    n = layer_counts.get(base)
+    if n is None:
+        return None
+    # Select 4 layers at quartile boundaries (0-indexed)
+    indices = [n // 4 - 1, n // 2 - 1, 3 * n // 4 - 1, n - 1]
+    return base, indices
+
+
 def _w2v_cache_path(video_path: str, hop: float, window: float,
                    model_name: str | None = None) -> str:
    """Return cache file path for a video's embeddings (includes model name)."""
@@ -171,6 +256,9 @@ def _extract_w2v_windows(y: np.ndarray, sr: int = _SR,
    import torch
    model, device = _get_w2v_model(model_name)
    is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
+    is_ast = (model_name or _DEFAULT_EMBED_MODEL) in ("AST", "AST_ML")
+    is_eat = (model_name or _DEFAULT_EMBED_MODEL) in ("EAT", "EAT_LARGE")
+    ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)
    # Auto-size batches based on available GPU memory
    batch_size = 16
    if device == "cuda":
@@ -195,10 +283,33 @@ def _extract_w2v_windows(y: np.ndarray, sr: int = _SR,
            start = i * hop_samples
            chunks.append(y[start:start + win_samples])
        with torch.no_grad():
+            if is_ast:
+                inputs = _ast_feature_extractor(
+                    list(chunks), sampling_rate=sr, return_tensors="pt",
+                    padding=True,
+                )
+                input_values = inputs.input_values.to(device)
+                if ml_cfg is not None:
+                    out = model(input_values, output_hidden_states=True)
+                    selected = [out.hidden_states[i].mean(dim=1) for i in ml_cfg[1]]
+                    batch_emb = torch.cat(selected, dim=1).cpu().numpy()
+                else:
+                    out = model(input_values)
+                    batch_emb = out.last_hidden_state.mean(dim=1).cpu().numpy()
+            elif is_eat:
+                mel_input = _eat_preprocess(chunks, sr, device)
+                features = model.extract_features(mel_input)
+                batch_emb = features[:, 1:, :].mean(dim=1).cpu().numpy()
+            else:
                waveforms = torch.from_numpy(np.stack(chunks)).float().to(device)
                if is_beats:
                    padding_mask = torch.zeros_like(waveforms, dtype=torch.bool)
                    features, _ = model.extract_features(waveforms, padding_mask=padding_mask)
+                    batch_emb = features.mean(dim=1).cpu().numpy()
+                elif ml_cfg is not None:
+                    all_layers, _ = model.extract_features(waveforms)
+                    selected = [all_layers[i].mean(dim=1) for i in ml_cfg[1]]
+                    batch_emb = torch.cat(selected, dim=1).cpu().numpy()
                else:
                    features, _ = model(waveforms)
                    batch_emb = features.mean(dim=1).cpu().numpy()
@@ -274,6 +385,9 @@ def _extract_w2v_targeted(y: np.ndarray, sr: int, gt_intense: list[float],
    embeddings_list: list[np.ndarray] = []

    is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
+    is_ast = (model_name or _DEFAULT_EMBED_MODEL) in ("AST", "AST_ML")
+    is_eat = (model_name or _DEFAULT_EMBED_MODEL) in ("EAT", "EAT_LARGE")
+    ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)

    for batch_start in range(0, len(valid_times), batch_size):
        batch_end = min(batch_start + batch_size, len(valid_times))
@@ -283,10 +397,33 @@ def _extract_w2v_targeted(y: np.ndarray, sr: int, gt_intense: list[float],
            chunks.append(y[start:start + win_samples])
            timestamps_list.append(float(t))
        with torch.no_grad():
+            if is_ast:
+                inputs = _ast_feature_extractor(
+                    list(chunks), sampling_rate=sr, return_tensors="pt",
+                    padding=True,
+                )
+                input_values = inputs.input_values.to(device)
+                if ml_cfg is not None:
+                    out = model(input_values, output_hidden_states=True)
+                    selected = [out.hidden_states[i].mean(dim=1) for i in ml_cfg[1]]
+                    batch_emb = torch.cat(selected, dim=1).cpu().numpy()
+                else:
+                    out = model(input_values)
+                    batch_emb = out.last_hidden_state.mean(dim=1).cpu().numpy()
+            elif is_eat:
+                mel_input = _eat_preprocess(chunks, sr, device)
+                features = model.extract_features(mel_input)
+                batch_emb = features[:, 1:, :].mean(dim=1).cpu().numpy()
+            else:
                waveforms = torch.from_numpy(np.stack(chunks)).float().to(device)
                if is_beats:
                    padding_mask = torch.zeros_like(waveforms, dtype=torch.bool)
                    features, _ = model.extract_features(waveforms, padding_mask=padding_mask)
+                    batch_emb = features.mean(dim=1).cpu().numpy()
+                elif ml_cfg is not None:
+                    all_layers, _ = model.extract_features(waveforms)
+                    selected = [all_layers[i].mean(dim=1) for i in ml_cfg[1]]
+                    batch_emb = torch.cat(selected, dim=1).cpu().numpy()
                else:
                    features, _ = model(waveforms)
                    batch_emb = features.mean(dim=1).cpu().numpy()
@@ -428,6 +565,17 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
    clf.fit(X[train_idx], y_arr[train_idx])
    _log("audio_scan: classifier trained")

+    # Calibrate probabilities for better threshold behavior
+    from sklearn.calibration import CalibratedClassifierCV
+    min_class = min(int(n_pos), int(n_neg_sample))
+    if min_class >= 6:
+        cal_clf = CalibratedClassifierCV(clf, cv=3, method='isotonic')
+        cal_clf.fit(X[train_idx], y_arr[train_idx])
+        clf = cal_clf
+        _log("audio_scan: classifier calibrated (isotonic, 3-fold)")
+    else:
+        _log(f"audio_scan: skipping calibration (min class size {min_class} < 6)")
+
    model = {"classifier": clf, "n_features": X.shape[1],
             "embed_model": embed_model or _DEFAULT_EMBED_MODEL}

@@ -526,9 +674,11 @@ def restore_model_version(version_path: str, profile_name: str = "default",


 def list_trained_models(profile_name: str = "default") -> list[str]:
-    """Return embedding model names that have a trained .joblib for *profile_name*.
+    """Return embedding model keys that have a trained .joblib for *profile_name*.

-    Looks for files matching ``{profile}_{MODEL}.joblib`` in the models dir.
+    Looks for files matching ``{profile}_{KEY}.joblib`` in the models dir.
+    KEY is either a bare embed model name (e.g. ``EAT_LARGE``) or
+    ``{MODEL}_{name}`` for user-named variants.
    """
    prefix = f"{profile_name}_"
    suffix = ".joblib"
@@ -537,13 +687,17 @@ def list_trained_models(profile_name: str = "default") -> list[str]:
        return result
    for fname in os.listdir(_MODEL_DIR):
        if fname.startswith(prefix) and fname.endswith(suffix):
-            model_name = fname[len(prefix):-len(suffix)]
-            if model_name in _EMBED_MODELS:
-                result.append(model_name)
+            key = fname[len(prefix):-len(suffix)]
+            if key in _EMBED_MODELS:
+                result.append(key)
+            else:
+                for m in _EMBED_MODELS:
+                    if key.startswith(m + "_"):
+                        result.append(key)
+                        break
    # Also check legacy {profile}.joblib
    legacy = os.path.join(_MODEL_DIR, f"{profile_name}.joblib")
    if os.path.exists(legacy) and not result:
-        # Legacy model — we don't know the embed model, but it's usable
        result.append("")
    return sorted(result)

@@ -589,7 +743,7 @@ def prefetch_audio(video_path: str, embed_model: str | None = None,
 def scan_video(
    video_path: str,
    model: dict = None,
-    threshold: float = 0.30,
+    threshold: float = 0.50,
    hop: float = 1.0,
    window: float = _WINDOW,
    cancel_flag: object = None,
@@ -78,6 +78,10 @@ def build_ffmpeg_command(
    crop_center: float = 0.5,
    image_sequence: bool = False,
    encoder: str = "libx264",
+    duration: float = 8.0,
+    target_fps: float | None = None,
+    snap32: bool = False,
+    frames: int | None = None,
 ) -> list[str]:
    # -ss before -i: fast input-seeking. Safe here because we always re-encode,
    # so there is no keyframe-alignment issue from pre-input seek.
@@ -96,7 +100,7 @@ def build_ffmpeg_command(
        "-threads", "0",
        "-ss", str(start),
        "-i", input_path,
-        "-t", "8",
+        "-t", str(duration),
    ]

    filters: list[str] = []
@@ -108,6 +112,13 @@ def build_ffmpeg_command(
            f"scale='if(lt(iw,ih),{short_side},-2)':'if(lt(iw,ih),-2,{short_side})':flags=lanczos"
        )

+    # LTX-2: centered crop to ÷32 (no rescale → no aspect distortion) then fps.
+    # Placed among CPU filters, after scale and before the VAAPI hwupload block.
+    if snap32:
+        filters.append("crop=trunc(iw/32)*32:trunc(ih/32)*32")
+    if target_fps is not None:
+        filters.append(f"fps={target_fps:g}")
+
    # VAAPI: decoded frames are GPU surfaces. CPU filters need hwdownload first.
    if use_hw_vaapi:
        if filters:
@@ -119,6 +130,12 @@ def build_ffmpeg_command(
    if filters:
        cmd += ["-vf", ",".join(filters)]

+    # LTX-2 output rate + exact frame cap (apply to both clip and webp-seq paths).
+    if target_fps is not None:
+        cmd += ["-r", f"{target_fps:g}"]
+    if frames is not None:
+        cmd += ["-frames:v", str(frames)]
+
    if image_sequence:
        cmd += [
            "-an",
@@ -128,24 +145,79 @@ def build_ffmpeg_command(
            os.path.join(output_path, "frame_%04d.webp"),
        ]
    else:
-        cmd += ["-c:v", encoder, "-c:a", "pcm_s16le", output_path]
+        cmd += ["-c:v", encoder]
+        if "nvenc" in encoder:
+            cmd += ["-preset", "p4", "-cq", "28"]
+        elif "vaapi" in encoder:
+            cmd += ["-qp", "28"]
+        elif "qsv" in encoder:
+            cmd += ["-global_quality", "28"]
+        elif "amf" in encoder:
+            cmd += ["-qp_i", "28", "-qp_p", "28"]
+        cmd += ["-c:a", "pcm_s16le", output_path]
    return cmd


-def build_audio_extract_command(input_path: str, start: float, sequence_dir: str) -> list[str]:
+def build_audio_extract_command(input_path: str, start: float, sequence_dir: str,
+                                duration: float = 8.0) -> list[str]:
    """Return an ffmpeg command that extracts audio to <sequence_dir>.wav."""
    audio_path = sequence_dir + ".wav"
    return [
        _bin("ffmpeg"), "-y",
        "-ss", str(start),
        "-i", input_path,
-        "-t", "8",
+        "-t", str(duration),
        "-vn",
        "-c:a", "pcm_s16le",
        audio_path,
    ]


+# Audio codec chosen per output extension for the manual "Extract audio area"
+# tool. Empty list -> let ffmpeg pick a default encoder from the extension.
+_AUDIO_CODEC_BY_EXT: dict[str, list[str]] = {
+    ".wav":  ["-c:a", "pcm_s16le"],
+    ".flac": ["-c:a", "flac"],
+    ".mp3":  ["-c:a", "libmp3lame", "-q:a", "2"],
+    ".m4a":  ["-c:a", "aac", "-b:a", "256k"],
+    ".aac":  ["-c:a", "aac", "-b:a", "256k"],
+    ".ogg":  ["-c:a", "libvorbis", "-q:a", "5"],
+    ".opus": ["-c:a", "libopus", "-b:a", "192k"],
+}
+
+
+def probe_duration(path: str) -> float | None:
+    """Return the media duration in seconds via ffprobe, or None on failure."""
+    try:
+        r = subprocess.run(
+            [_bin("ffprobe"), "-v", "error", "-show_entries", "format=duration",
+             "-of", "default=nw=1:nk=1", path],
+            capture_output=True, text=True, timeout=30,
+        )
+        if r.returncode == 0 and r.stdout.strip():
+            return float(r.stdout.strip())
+    except Exception:
+        pass
+    return None
+
+
+def build_audio_clip_command(input_path: str, start: float, duration: float,
+                             out_path: str) -> list[str]:
+    """ffmpeg command to extract exactly *duration* seconds of audio starting
+    at *start*, re-encoded per *out_path*'s extension (wav/mp3/flac/…)."""
+    ext = os.path.splitext(out_path)[1].lower()
+    codec = _AUDIO_CODEC_BY_EXT.get(ext, [])
+    return [
+        _bin("ffmpeg"), "-y",
+        "-ss", str(start),
+        "-i", input_path,
+        "-t", str(duration),
+        "-vn",
+        *codec,
+        out_path,
+    ]
+
+
 def detect_hw_encoders() -> list[str]:
    """Probe ffmpeg for available H.264 hardware encoders.

@@ -0,0 +1,26 @@
+"""LTX-2 frame-count math. Legal F satisfy F % 8 == 1 (8x temporal + 1)."""
+
+
+def is_legal_frames(f: int) -> bool:
+    return f >= 9 and f % 8 == 1
+
+
+def legal_frames(min_f: int = 9, max_f: int = 1000) -> list[int]:
+    start = max(9, min_f + ((1 - min_f) % 8))   # first 8k+1 >= min_f
+    return list(range(start, max_f + 1, 8))
+
+
+def nearest_legal_frames(f: int) -> int:
+    if f <= 9:
+        return 9
+    low = ((f - 1) // 8) * 8 + 1
+    high = low + 8
+    return low if (f - low) <= (high - f) else high
+
+
+def duration_for_frames(frames: int, fps: float) -> float:
+    return frames / fps
+
+
+def frames_for_duration(duration: float, fps: float) -> int:
+    return nearest_legal_frames(round(duration * fps))
@@ -24,16 +24,26 @@ def _log(*args) -> None:
    print(f"[8-cut {ts}]", *args, file=sys.stderr)


-def build_export_path(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
-    group = f"{basename}_{counter:03d}"
-    name = f"{group}_{sub}" if sub is not None else group
-    return os.path.join(folder, group, name + ".mp4")
+def build_export_path(folder: str, basename: str, counter: int,
+                      sub: int | None = None, tag: str | None = None) -> str:
+    """Build clip output path.  *folder* should be the vid folder (e.g. .../mp4/vid_001)."""
+    name = f"{basename}_{counter:03d}"
+    if tag is not None:
+        name = f"{name}_{tag}"
+    if sub is not None:
+        name = f"{name}_{sub}"
+    return os.path.join(folder, name + ".mp4")


-def build_sequence_dir(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
-    group = f"{basename}_{counter:03d}"
-    name = f"{group}_{sub}" if sub is not None else group
-    return os.path.join(folder, group, name)
+def build_sequence_dir(folder: str, basename: str, counter: int,
+                       sub: int | None = None, tag: str | None = None) -> str:
+    """Build WebP sequence output dir.  *folder* should be the vid folder."""
+    name = f"{basename}_{counter:03d}"
+    if tag is not None:
+        name = f"{name}_{tag}"
+    if sub is not None:
+        name = f"{name}_{sub}"
+    return os.path.join(folder, name)


 def format_time(seconds: float) -> str:
@@ -0,0 +1,98 @@
+# Audio Pipeline Improvements Design
+
+Date: 2026-04-19
+
+## Goal
+
+Improve audio scan classification accuracy, especially for non-speech sounds (suction, gagging, impacts), through three changes:
+
+1. Multi-layer feature extraction from existing HuBERT/Wav2Vec2 models
+2. Two new embedding models: AST (AudioSet-supervised) and EAT (self-supervised + AudioSet finetuned)
+3. Calibrated classifier for better threshold behavior
+
+## 1. Multi-Layer Feature Extraction
+
+### Current behavior
+
+`model(waveforms)` extracts embeddings from the **last transformer layer only**.
+
+### Change
+
+Use `model.extract_features(waveforms)` (torchaudio API) to get all layer outputs. Select layers at quartile boundaries, mean-pool each over time, concatenate.
+
+| Model | Layers | Single-layer dim | Multi-layer dim (4 quartiles) |
+|-------|--------|-------------------|-------------------------------|
+| HUBERT_XLARGE | 48 | 1280 | 5120 |
+| HUBERT_LARGE | 24 | 1024 | 4096 |
+| HUBERT_BASE | 12 | 768 | 3072 |
+| WAV2VEC2_BASE | 12 | 768 | 3072 |
+
+### Implementation
+
+- New entries in `_EMBED_MODELS`: `"HUBERT_XLARGE_ML"` -> 5120, etc.
+- `_extract_w2v_windows`: when model name ends with `_ML`, call `extract_features()` instead of `model()`, select quartile layers, concat
+- Cache key: model name includes `_ML` suffix -> separate cache files
+- No change to classifier or training pipeline (HistGBT handles high-dim fine)
+
+## 2. AST (Audio Spectrogram Transformer)
+
+### What
+
+`MIT/ast-finetuned-audioset-10-10-0.4593` via HuggingFace `transformers`. 86M params, 768-dim, supervised on AudioSet 527 sound classes.
+
+### Integration
+
+- Load: `ASTModel.from_pretrained()` + `ASTFeatureExtractor`
+- Preprocessing: `ASTFeatureExtractor` handles mel spectrogram from 16kHz raw audio
+- Batching: prepare `input_values` per window, stack into batch, forward through model
+- Multi-layer: `output_hidden_states=True` returns 13 layers; `AST_ML` variant concats quartile layers -> 3072-dim
+- Model cached via `_get_w2v_model()` same lazy-load pattern
+
+### Entries
+
+- `"AST"` -> 768
+- `"AST_ML"` -> 3072
+
+## 3. EAT (Efficient Audio Transformer)
+
+### What
+
+`worstchan/EAT-base_epoch30_finetune_AS2M` via HuggingFace with `trust_remote_code=True`. 88M params, 768-dim, self-supervised + AudioSet finetuned.
+
+### Integration
+
+- Load: `AutoModel.from_pretrained(..., trust_remote_code=True)`
+- Preprocessing: manual 128-bin Kaldi fbank mel spectrogram via torchaudio, normalize with EAT constants `(mel - (-4.268)) / (4.569 * 2)`, reshape to `[B, 1, T, 128]`
+- Feature extraction: `model.extract_features(mel)` returns `[B, seq, 768]`; CLS token `[:, 0, :]` for utterance-level, or mean-pool `[:, 1:, :]` for frame-level. Use mean-pool for consistency with other models.
+- Multi-layer: not natively supported, skip for now
+
+### Entry
+
+- `"EAT"` -> 768
+
+## 4. Calibrated Classifier
+
+Wrap `HistGradientBoostingClassifier` in `CalibratedClassifierCV(clf, cv=3, method='isotonic')` after fitting. Gives well-calibrated probabilities -> threshold slider maps more linearly to precision/recall.
+
+One change in `train_classifier()`, no UI changes needed.
+
+## 5. Requirements
+
+Add to `requirements.txt`:
+```
+transformers>=4.30
+timm>=0.9
+```
+
+Both AST and EAT need `transformers`. EAT additionally needs `timm` (used internally by its custom model code). Both setup scripts (`setup_env.sh`, `setup-windows.ps1`) install from `requirements.txt` so no changes needed there.
+
+## Cache Compatibility
+
+- All new model variants get distinct cache keys via model name in the hash
+- Existing caches for HUBERT_XLARGE, BEATs, etc. remain valid and untouched
+- New models create new `.npz` files in the same `cache/w2v/` directory
+
+## UI Changes
+
+- `_EMBED_MODELS` dict additions appear automatically in Train dialog model dropdown and scan model dropdown
+- No other UI changes needed
@@ -0,0 +1,588 @@
+# Audio Pipeline Improvements Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** Improve audio scan accuracy with multi-layer extraction, AST/EAT models, and calibrated classifier.
+
+**Architecture:** All changes are in `core/audio_scan.py`. The embedding extraction functions gain new model-type branches (AST, EAT, multi-layer). The classifier gets a calibration wrapper. `_EMBED_MODELS` dict and `_get_w2v_model()` are extended. No UI changes needed — new models appear automatically in dropdowns.
+
+**Tech Stack:** torchaudio (existing), transformers (new dep), timm (new dep), sklearn.calibration (existing dep)
+
+**Key design notes:**
+- `_get_w2v_model()` resolves `_ML` suffixed names to their base model for loading (e.g. `HUBERT_XLARGE_ML` loads `HUBERT_XLARGE`). Both share the same GPU model — only the extraction path differs (last-layer vs multi-layer). The global `_w2v_model_name` stores the **base** name so switching between `HUBERT_XLARGE` and `HUBERT_XLARGE_ML` does NOT trigger a reload.
+- Cache keys use the **full** model name (including `_ML`), so single-layer and multi-layer caches coexist as separate `.npz` files.
+- AST and EAT are separate model types that do NOT share the torchaudio loading path — they get their own `elif` branches in `_get_w2v_model()`.
+- Both `_extract_w2v_windows` and `_extract_w2v_targeted` need identical changes to their batch inference blocks. Keep them in sync.
+
+---
+
+### Task 1: Add transformers and timm to requirements
+
+**Files:**
+- Modify: `requirements.txt`
+
+**Step 1: Add dependencies**
+
+Add after the `torchaudio` line in `requirements.txt`:
+
+```
+transformers>=4.30
+timm>=0.9
+```
+
+**Step 2: Verify install**
+
+Run: `pip install transformers timm`
+
+**Step 3: Commit**
+
+```bash
+git add requirements.txt
+git commit -m "deps: add transformers and timm for AST/EAT models"
+```
+
+---
+
+### Task 2: Multi-layer extraction for torchaudio models
+
+**Files:**
+- Modify: `core/audio_scan.py:50-58` (_EMBED_MODELS dict)
+- Modify: `core/audio_scan.py:96-100` (_embed_dim)
+- Modify: `core/audio_scan.py:68-93` (_get_w2v_model)
+- Modify: `core/audio_scan.py:189-205` (_extract_w2v_windows batch loop)
+- Modify: `core/audio_scan.py:278-293` (_extract_w2v_targeted batch loop)
+- Test: `tests/test_audio_scan.py`
+
+**Step 1: Write failing test**
+
+Add to `tests/test_audio_scan.py`:
+
+```python
+def test_embed_dim_multi_layer():
+    from core.audio_scan import _embed_dim
+    # Multi-layer models should report concatenated dimension
+    assert _embed_dim("HUBERT_XLARGE_ML") == 5120
+    assert _embed_dim("HUBERT_LARGE_ML") == 4096
+    assert _embed_dim("HUBERT_BASE_ML") == 3072
+    # Single-layer unchanged
+    assert _embed_dim("HUBERT_XLARGE") == 1280
+```
+
+**Step 2: Run test to verify it fails**
+
+Run: `pytest tests/test_audio_scan.py::test_embed_dim_multi_layer -v`
+Expected: FAIL — `_embed_dim("HUBERT_XLARGE_ML")` returns 768 (default fallback)
+
+**Step 3: Add multi-layer entries to _EMBED_MODELS**
+
+In `core/audio_scan.py:50-58`, add after existing entries:
+
+```python
+_EMBED_MODELS = {
+    "WAV2VEC2_BASE":           768,
+    "WAV2VEC2_LARGE":         1024,
+    "WAV2VEC2_LARGE_LV60K":  1024,
+    "HUBERT_BASE":             768,
+    "HUBERT_LARGE":           1024,
+    "HUBERT_XLARGE":          1280,
+    "BEATS":                   768,
+    # Multi-layer variants (4 quartile layers concatenated)
+    "WAV2VEC2_BASE_ML":       3072,   # 768 * 4
+    "HUBERT_BASE_ML":         3072,   # 768 * 4
+    "HUBERT_LARGE_ML":        4096,   # 1024 * 4
+    "HUBERT_XLARGE_ML":       5120,   # 1280 * 4
+}
+```
+
+**Step 4: Run test to verify it passes**
+
+Run: `pytest tests/test_audio_scan.py::test_embed_dim_multi_layer -v`
+Expected: PASS
+
+**Step 5: Add helper to resolve base model and layer indices**
+
+Add after `_embed_dim()` (around line 101):
+
+```python
+def _ml_config(model_name: str) -> tuple[str, list[int]] | None:
+    """If model_name is a multi-layer variant, return (base_model, layer_indices).
+
+    Returns None for single-layer models.
+    Layer indices are 0-based into the list returned by extract_features().
+    """
+    if not model_name.endswith("_ML"):
+        return None
+    base = model_name[:-3]  # strip "_ML"
+    if base not in _EMBED_MODELS:
+        return None
+    # Layer counts per model family
+    layer_counts = {
+        "WAV2VEC2_BASE": 12, "WAV2VEC2_LARGE": 24, "WAV2VEC2_LARGE_LV60K": 24,
+        "HUBERT_BASE": 12, "HUBERT_LARGE": 24, "HUBERT_XLARGE": 48,
+        "AST": 12,
+    }
+    n = layer_counts.get(base)
+    if n is None:
+        return None
+    # Select 4 layers at quartile boundaries (0-indexed)
+    indices = [n // 4 - 1, n // 2 - 1, 3 * n // 4 - 1, n - 1]
+    return base, indices
+```
+
+Note: AST is included in the layer_counts dict here already so Task 3 doesn't need to modify it again.
+
+**Step 6: Write test for _ml_config**
+
+```python
+def test_ml_config():
+    from core.audio_scan import _ml_config
+    assert _ml_config("HUBERT_XLARGE") is None
+    assert _ml_config("BEATS_ML") is None  # BEATS has no ML variant
+    base, layers = _ml_config("HUBERT_XLARGE_ML")
+    assert base == "HUBERT_XLARGE"
+    assert layers == [11, 23, 35, 47]
+    base, layers = _ml_config("HUBERT_BASE_ML")
+    assert base == "HUBERT_BASE"
+    assert layers == [2, 5, 8, 11]
+```
+
+Run: `pytest tests/test_audio_scan.py::test_ml_config -v`
+Expected: PASS
+
+**Step 7: Modify _get_w2v_model to resolve ML base names**
+
+In `_get_w2v_model()` (line 68), the comparison key must use the resolved base name so that `HUBERT_XLARGE` and `HUBERT_XLARGE_ML` share the same loaded model without reloading:
+
+```python
+def _get_w2v_model(model_name: str | None = None):
+    """Lazy-load an embedding model. Reloads if model_name differs from cached."""
+    global _w2v_model, _w2v_device, _w2v_model_name
+    if model_name is None:
+        model_name = _DEFAULT_EMBED_MODEL
+    # Multi-layer variants use the same base model weights
+    ml = _ml_config(model_name)
+    load_name = ml[0] if ml else model_name
+    if _w2v_model is None or _w2v_model_name != load_name:
+        import torch
+        _w2v_device = "cuda" if torch.cuda.is_available() else "cpu"
+        if load_name == "BEATS":
+            ...  # existing BEATs code unchanged
+        else:
+            import torchaudio
+            bundle = getattr(torchaudio.pipelines, load_name)
+            _w2v_model = bundle.get_model().to(_w2v_device)
+        _w2v_model.eval()
+        _w2v_model_name = load_name
+        _log(f"audio_scan: {load_name} loaded on {_w2v_device}")
+    return _w2v_model, _w2v_device
+```
+
+**Step 8: Modify _extract_w2v_windows batch inference**
+
+In `_extract_w2v_windows`, compute `ml_cfg` **once** before the batch loop (after line 173 `is_beats = ...`):
+
+```python
+    ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)
+```
+
+Then replace the batch inference block (lines 197-204):
+
+```python
+        with torch.no_grad():
+            waveforms = torch.from_numpy(np.stack(chunks)).float().to(device)
+            if is_beats:
+                padding_mask = torch.zeros_like(waveforms, dtype=torch.bool)
+                features, _ = model.extract_features(waveforms, padding_mask=padding_mask)
+                batch_emb = features.mean(dim=1).cpu().numpy()
+            elif ml_cfg is not None:
+                all_layers, _ = model.extract_features(waveforms)
+                selected = [all_layers[i].mean(dim=1) for i in ml_cfg[1]]
+                batch_emb = torch.cat(selected, dim=1).cpu().numpy()
+            else:
+                features, _ = model(waveforms)
+                batch_emb = features.mean(dim=1).cpu().numpy()
+        embeddings.append(batch_emb)
+```
+
+**Step 9: Modify _extract_w2v_targeted batch inference (keep in sync)**
+
+In `_extract_w2v_targeted`, add `ml_cfg` computation after line 276 `is_beats = ...`:
+
+```python
+    ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)
+```
+
+Then replace the batch inference block (lines 285-292) with the same branching logic as Step 8:
+
+```python
+        with torch.no_grad():
+            waveforms = torch.from_numpy(np.stack(chunks)).float().to(device)
+            if is_beats:
+                padding_mask = torch.zeros_like(waveforms, dtype=torch.bool)
+                features, _ = model.extract_features(waveforms, padding_mask=padding_mask)
+                batch_emb = features.mean(dim=1).cpu().numpy()
+            elif ml_cfg is not None:
+                all_layers, _ = model.extract_features(waveforms)
+                selected = [all_layers[i].mean(dim=1) for i in ml_cfg[1]]
+                batch_emb = torch.cat(selected, dim=1).cpu().numpy()
+            else:
+                features, _ = model(waveforms)
+                batch_emb = features.mean(dim=1).cpu().numpy()
+        embeddings_list.append(batch_emb)
+```
+
+Note: `_extract_w2v_targeted` appends to `embeddings_list` (not `embeddings`).
+
+**Step 10: Run all tests**
+
+Run: `pytest tests/ -v`
+Expected: All pass
+
+**Step 11: Commit**
+
+```bash
+git add core/audio_scan.py tests/test_audio_scan.py
+git commit -m "feat: multi-layer extraction for HuBERT/Wav2Vec2 models"
+```
+
+---
+
+### Task 3: AST model integration
+
+**Files:**
+- Modify: `core/audio_scan.py:50-65` (_EMBED_MODELS, add AST entries)
+- Modify: `core/audio_scan.py:45-47` (add _ast_feature_extractor global)
+- Modify: `core/audio_scan.py:68-93` (_get_w2v_model, add AST loading branch)
+- Modify: `core/audio_scan.py` (_extract_w2v_windows and _extract_w2v_targeted, add AST inference branch)
+- Test: `tests/test_audio_scan.py`
+
+**Step 1: Write failing test**
+
+```python
+def test_embed_dim_ast():
+    from core.audio_scan import _embed_dim
+    assert _embed_dim("AST") == 768
+    assert _embed_dim("AST_ML") == 3072
+```
+
+Run: `pytest tests/test_audio_scan.py::test_embed_dim_ast -v`
+Expected: FAIL
+
+**Step 2: Add AST entries to _EMBED_MODELS**
+
+Add to the dict (after the ML entries):
+
+```python
+    # Transformers-based models
+    "AST":                     768,
+    "AST_ML":                 3072,   # 768 * 4
+```
+
+Run test again — should PASS now.
+
+**Step 3: Add module-level global for AST feature extractor**
+
+Near line 47 (after `_w2v_model_name = None`):
+
+```python
+_ast_feature_extractor = None
+```
+
+**Step 4: Add AST loading branch in _get_w2v_model**
+
+In `_get_w2v_model()`, add an `elif` branch **before** the torchaudio fallback `else`:
+
+```python
+        elif load_name == "AST":
+            from transformers import ASTModel, ASTFeatureExtractor
+            _w2v_model = ASTModel.from_pretrained(
+                "MIT/ast-finetuned-audioset-10-10-0.4593"
+            ).to(_w2v_device)
+            global _ast_feature_extractor
+            _ast_feature_extractor = ASTFeatureExtractor.from_pretrained(
+                "MIT/ast-finetuned-audioset-10-10-0.4593"
+            )
+```
+
+Note: `_ast_feature_extractor` is recreated on every model load (not cached separately) — simple and correct since the feature extractor is lightweight and model reloads are rare.
+
+**Step 5: Add AST inference branch in both extraction functions**
+
+In both `_extract_w2v_windows` and `_extract_w2v_targeted`, compute `is_ast` once before the loop:
+
+```python
+    is_ast = (model_name or _DEFAULT_EMBED_MODEL) in ("AST", "AST_ML")
+```
+
+Then in the batch inference block, add after the `elif ml_cfg` branch and before `else`:
+
+```python
+            elif is_ast:
+                # AST uses its own feature extractor for mel spectrogram
+                inputs = _ast_feature_extractor(
+                    list(chunks), sampling_rate=sr, return_tensors="pt",
+                    padding=True,
+                )
+                input_values = inputs.input_values.to(device)
+                if ml_cfg is not None:
+                    out = model(input_values, output_hidden_states=True)
+                    selected = [out.hidden_states[i].mean(dim=1) for i in ml_cfg[1]]
+                    batch_emb = torch.cat(selected, dim=1).cpu().numpy()
+                else:
+                    out = model(input_values)
+                    batch_emb = out.last_hidden_state.mean(dim=1).cpu().numpy()
+```
+
+Important: `chunks` is already a list of numpy arrays (built in the loop at lines 194-196). Pass it directly as `list(chunks)` — the `ASTFeatureExtractor` accepts a list of numpy arrays and handles batching/padding internally. Verified: `ASTFeatureExtractor([np.array, np.array, ...], sampling_rate=16000, return_tensors="pt", padding=True)` returns `input_values` of shape `[B, 1024, 128]`.
+
+**Step 6: Run all tests**
+
+Run: `pytest tests/ -v`
+Expected: All pass
+
+**Step 7: Commit**
+
+```bash
+git add core/audio_scan.py tests/test_audio_scan.py
+git commit -m "feat: add AST (Audio Spectrogram Transformer) embedding model"
+```
+
+---
+
+### Task 4: EAT model integration
+
+**Files:**
+- Modify: `core/audio_scan.py:50-65` (_EMBED_MODELS, add EAT entry)
+- Modify: `core/audio_scan.py:68-93` (_get_w2v_model, add EAT loading branch)
+- Add: `core/audio_scan.py` (_eat_preprocess helper function)
+- Modify: `core/audio_scan.py` (_extract_w2v_windows and _extract_w2v_targeted, add EAT inference branch)
+- Test: `tests/test_audio_scan.py`
+
+**Step 1: Write failing test**
+
+```python
+def test_embed_dim_eat():
+    from core.audio_scan import _embed_dim
+    assert _embed_dim("EAT") == 768
+```
+
+**Step 2: Add EAT entry to _EMBED_MODELS**
+
+```python
+    "EAT":                     768,
+```
+
+Note: No `EAT_ML` variant — EAT's `extract_features()` does not natively support multi-layer output. Can be added later if needed by monkey-patching.
+
+**Step 3: Add EAT loading branch in _get_w2v_model**
+
+Add after the AST branch, before the torchaudio `else`:
+
+```python
+        elif load_name == "EAT":
+            from transformers import AutoModel
+            _w2v_model = AutoModel.from_pretrained(
+                "worstchan/EAT-base_epoch30_finetune_AS2M",
+                trust_remote_code=True,
+            ).to(_w2v_device)
+```
+
+**Step 4: Add EAT preprocessing helper**
+
+Add as a module-level function near `_get_w2v_model`:
+
+```python
+def _eat_preprocess(chunks: list[np.ndarray], sr: int, device: str):
+    """Convert raw audio chunks to EAT mel spectrogram input.
+
+    Returns tensor of shape [B, 1, T, 128].
+    8s audio at 10ms frame shift produces ~798 frames, zero-padded to 1024.
+    """
+    import torch
+    import torchaudio.compliance.kaldi as kaldi
+
+    TARGET_LEN = 1024
+    MEAN, STD = -4.268, 4.569
+
+    mels = []
+    for chunk in chunks:
+        wav = torch.from_numpy(chunk).unsqueeze(0).float()
+        fbank = kaldi.fbank(
+            wav, htk_compat=True, sample_frequency=sr, use_energy=False,
+            window_type='hanning', num_mel_bins=128, dither=0.0, frame_shift=10,
+        )
+        # Pad or truncate to TARGET_LEN
+        if fbank.shape[0] < TARGET_LEN:
+            fbank = torch.nn.functional.pad(fbank, (0, 0, 0, TARGET_LEN - fbank.shape[0]))
+        else:
+            fbank = fbank[:TARGET_LEN]
+        fbank = (fbank - MEAN) / (STD * 2)
+        mels.append(fbank)
+    return torch.stack(mels).unsqueeze(1).to(device)  # [B, 1, T, 128]
+```
+
+**Step 5: Add EAT inference branch in both extraction functions**
+
+Compute `is_eat` once before the loop:
+
+```python
+    is_eat = (model_name or _DEFAULT_EMBED_MODEL) == "EAT"
+```
+
+Then in the batch inference block, add after the `elif is_ast` branch and before `else`:
+
+```python
+            elif is_eat:
+                mel_input = _eat_preprocess(chunks, sr, device)
+                features = model.extract_features(mel_input)
+                # Mean-pool frame-level tokens (skip CLS at index 0)
+                batch_emb = features[:, 1:, :].mean(dim=1).cpu().numpy()
+```
+
+Important: `model.extract_features()` returns a plain `torch.Tensor` of shape `[B, 513, 768]` (not a tuple). Index 0 is the CLS token, indices 1-512 are frame-level patch embeddings. We mean-pool the frame tokens for consistency with how other models are pooled.
+
+**Step 6: Run all tests**
+
+Run: `pytest tests/ -v`
+Expected: All pass
+
+**Step 7: Commit**
+
+```bash
+git add core/audio_scan.py tests/test_audio_scan.py
+git commit -m "feat: add EAT (Efficient Audio Transformer) embedding model"
+```
+
+---
+
+### Task 5: Calibrated classifier
+
+**Files:**
+- Modify: `core/audio_scan.py:424-429` (train_classifier, wrap clf)
+- Test: `tests/test_audio_scan.py`
+
+**Step 1: Modify train_classifier**
+
+After the existing `clf.fit()` call (line 428), add calibration with a safe guard:
+
+```python
+    clf.fit(X[train_idx], y_arr[train_idx])
+    _log("audio_scan: classifier trained")
+
+    # Calibrate probabilities for better threshold behavior
+    # Requires at least 6 samples per class for stable 3-fold isotonic calibration
+    from sklearn.calibration import CalibratedClassifierCV
+    min_class = min(int(n_pos), int(n_neg_sample))
+    if min_class >= 6:
+        cal_clf = CalibratedClassifierCV(clf, cv=3, method='isotonic')
+        cal_clf.fit(X[train_idx], y_arr[train_idx])
+        clf = cal_clf
+        _log("audio_scan: classifier calibrated (isotonic, 3-fold)")
+    else:
+        _log(f"audio_scan: skipping calibration (min class size {min_class} < 6)")
+```
+
+Why `min_class >= 6`: `CalibratedClassifierCV` uses stratified k-fold internally. With `cv=3`, each fold needs at least 2 samples per class. `min_class >= 6` guarantees this. With fewer samples, the uncalibrated HistGBT probabilities are still reasonable — calibration is an enhancement, not a requirement.
+
+Previous plan bug: `cv=min(3, n_pos, n_neg_sample)` could produce `cv=1` when `n_pos=1`, which raises `ValueError` (minimum is 2). Even `cv=2` with 2 positives causes one fold to have only 1 positive, making isotonic regression unstable. The `>= 6` guard avoids all these edge cases.
+
+**Step 2: Run all tests**
+
+Run: `pytest tests/ -v`
+Expected: All pass
+
+**Step 3: Commit**
+
+```bash
+git add core/audio_scan.py
+git commit -m "feat: calibrate classifier probabilities with isotonic regression"
+```
+
+---
+
+### Task 6: Integration test with real model (manual)
+
+This task is manual — it requires GPU and a real video file.
+
+**Step 1: Test multi-layer extraction**
+
+```bash
+python -c "
+from core.audio_scan import _extract_w2v_windows, _embed_dim
+import numpy as np
+y = np.random.randn(16000 * 20).astype(np.float32) * 0.01
+ts, emb = _extract_w2v_windows(y, model_name='HUBERT_XLARGE_ML')
+print(f'HUBERT_XLARGE_ML: {emb.shape}')  # expect (13, 5120)
+assert emb.shape[1] == _embed_dim('HUBERT_XLARGE_ML')
+print('PASS')
+"
+```
+
+**Step 2: Test AST extraction**
+
+```bash
+python -c "
+from core.audio_scan import _extract_w2v_windows, _embed_dim
+import numpy as np
+y = np.random.randn(16000 * 20).astype(np.float32) * 0.01
+ts, emb = _extract_w2v_windows(y, model_name='AST')
+print(f'AST: {emb.shape}')  # expect (13, 768)
+assert emb.shape[1] == _embed_dim('AST')
+print('PASS')
+"
+```
+
+**Step 3: Test AST multi-layer**
+
+```bash
+python -c "
+from core.audio_scan import _extract_w2v_windows, _embed_dim
+import numpy as np
+y = np.random.randn(16000 * 20).astype(np.float32) * 0.01
+ts, emb = _extract_w2v_windows(y, model_name='AST_ML')
+print(f'AST_ML: {emb.shape}')  # expect (13, 3072)
+assert emb.shape[1] == _embed_dim('AST_ML')
+print('PASS')
+"
+```
+
+**Step 4: Test EAT extraction**
+
+```bash
+python -c "
+from core.audio_scan import _extract_w2v_windows, _embed_dim
+import numpy as np
+y = np.random.randn(16000 * 20).astype(np.float32) * 0.01
+ts, emb = _extract_w2v_windows(y, model_name='EAT')
+print(f'EAT: {emb.shape}')  # expect (13, 768)
+assert emb.shape[1] == _embed_dim('EAT')
+print('PASS')
+"
+```
+
+**Step 5: Test model switching doesn't reload unnecessarily**
+
+```bash
+python -c "
+from core.audio_scan import _get_w2v_model
+import core.audio_scan as m
+# Load HUBERT_XLARGE
+_get_w2v_model('HUBERT_XLARGE')
+name1 = m._w2v_model_name
+# Switch to ML variant — should NOT reload
+_get_w2v_model('HUBERT_XLARGE_ML')
+name2 = m._w2v_model_name
+assert name1 == name2 == 'HUBERT_XLARGE', f'Expected no reload, got {name1} -> {name2}'
+print('PASS: no reload on ML switch')
+"
+```
+
+**Step 6: Test full train+scan cycle in app**
+
+Load app, select each new model from scan model dropdown, scan a video, train, verify results display correctly.
+
+**Step 7: Final commit and push**
+
+```bash
+git push
+```
@@ -0,0 +1,226 @@
+# ComfyUI-8cut Node Pack Design
+
+Date: 2026-04-19
+
+## Goal
+
+Port 8-cut's video scanning, training, review, and export workflow to a ComfyUI node pack. The primary motivation is **remote access** — ComfyUI's web UI allows browser-based operation over the network, and HTML5 `<video>` handles streaming compression natively. No tensor-based image pipeline; videos stay as file paths throughout.
+
+## Architecture
+
+### Approach
+
+Monolithic Review Node + simple pipeline nodes. One central **VideoReview** node embeds the full interactive player/timeline/region table as a large DOM widget. Other nodes (Scan, Train, Export) are headless pipeline nodes that pass lightweight metadata.
+
+### Core reuse
+
+The entire `8-cut/core/` package is Qt-free and reusable as-is:
+- `core/audio_scan.py` — `scan_video()`, `train_classifier()`, `load_classifier()`
+- `core/db.py` — `ProcessedDB` (SQLite, all scan/training/export persistence)
+- `core/ffmpeg.py` — `build_ffmpeg_command()` (clip export)
+- `core/tracking.py` — YOLO-based subject tracking
+- `core/paths.py` — path helpers, `format_time()`
+
+No porting required — these are imported directly.
+
+---
+
+## Node Pack Structure
+
+```
+ComfyUI-8cut/
+  __init__.py                    # NODE_CLASS_MAPPINGS, WEB_DIRECTORY
+  core/                          # symlink or copy of 8-cut/core/
+  data/
+    8cut.db                      # separate SQLite DB (can copy from ~/.8cut.db)
+  models/                        # trained classifiers (.joblib)
+  nodes/
+    load_video.py
+    audio_scan.py
+    video_review.py
+    train_model.py
+    export_clips.py
+  server_routes.py               # custom API routes
+  web/
+    js/
+      video_review.js            # timeline + player + scan panel widget
+```
+
+---
+
+## Custom Types
+
+No tensors anywhere in the pipeline. All data flows as lightweight metadata:
+
+| Type | Python value | Purpose |
+|------|-------------|---------|
+| `VIDEO_PATH` | `str` (absolute path) | Video file reference |
+| `SCAN_REGIONS` | `list[dict]` with start/end/score/model/disabled | Scan output / review edits |
+| `SCAN_MODEL` | `str` (path to .joblib) | Trained classifier |
+
+---
+
+## Nodes
+
+### LoadVideo
+
+| | |
+|---|---|
+| **Input** | `video_path` (STRING, file browser), `profile` (STRING combo from DB profiles) |
+| **Output** | `VIDEO_PATH`, `filename` (STRING) |
+| **Logic** | Validates path exists, returns it. Populates profile combo via API route. |
+
+### AudioScan
+
+| | |
+|---|---|
+| **Input** | `VIDEO_PATH`, `SCAN_MODEL`, `threshold` (FLOAT 0-1), `hop` (FLOAT) |
+| **Output** | `SCAN_REGIONS` |
+| **Logic** | Calls `core.audio_scan.scan_video()` directly. Progress via `PromptServer.send_sync("progress", ...)`. |
+
+### VideoReview (interactive, blocking)
+
+| | |
+|---|---|
+| **Input** | `VIDEO_PATH`, `SCAN_REGIONS` (optional) |
+| **Output** | `SCAN_REGIONS` (edited) |
+| **OUTPUT_NODE** | `True` |
+| **Logic** | Execution pauses here. User interacts via the widget. Clicks "Continue" to pass edited regions downstream. |
+
+The widget layout:
+
+```
+-------------------------------------+
+|  [video player (HTML5 <video>)]     |
+|  +- timeline with scan regions ----+|
+|  |  cursor + region drag/resize    ||
+|  +---------------------------------+|
+|  +- model tabs [EAT_LARGE][HuBERT]+|
+|  | Time   | End    | Score         ||
+|  | 1:23   | 1:31   | 0.92          ||
+|  | 3:45   | 3:53   | 0.87          ||
+|  | [Add Negative] [Export] [Continue]|
+|  +---------------------------------+|
+-------------------------------------+
+```
+
+Widget size: ~640x500px minimum, resizable via LiteGraph.
+
+**Blocking mechanism**: The node's `run()` method blocks on a server-side event/queue. The frontend signals completion via `POST /8cut/review_done/{node_id}`, which unblocks `run()` and returns the edited `SCAN_REGIONS`.
+
+### TrainModel
+
+| | |
+|---|---|
+| **Input** | `profile` (STRING combo), `positive_folder` (STRING combo), `negative_folder` (STRING combo, optional), `embed_model` (STRING combo from `_EMBED_MODELS`), `use_hard_negatives` (BOOL) |
+| **Output** | `SCAN_MODEL` |
+| **Logic** | Queries `db.get_training_data()` to assemble `video_infos`, calls `core.audio_scan.train_classifier()`. Saves to `models/{profile}_{embed_model}.joblib` with version rotation. Progress via ComfyUI progress bar. |
+
+### ExportClips
+
+| | |
+|---|---|
+| **Input** | `VIDEO_PATH`, `SCAN_REGIONS`, `output_folder` (STRING), `short_side` (INT), `format` (combo MP4/WEBM), `spread` (FLOAT), `clip_count` (INT), `fuse_gap` (FLOAT) |
+| **Output** | exported file paths (list) |
+| **Logic** | Region fusion via `_build_export_spans()`, then `core.ffmpeg.build_ffmpeg_command()` per clip. Records each clip in DB via `db.add()`. |
+
+### Typical workflow
+
+```
+[LoadVideo] --> [AudioScan] --> [VideoReview] --> [ExportClips]
+                    ^
+              [TrainModel]
+```
+
+### Training loop (hard negatives round-trip)
+
+1. Scan with existing model -> regions in VideoReview
+2. Review -> mark false positives as negatives (DB)
+3. Train -> new model uses hard negatives
+4. Rescan -> better results
+5. Repeat
+
+---
+
+## API Routes
+
+### Video serving
+
+| Route | Method | Purpose |
+|-------|--------|---------|
+| `/8cut/video` | GET | Serve raw video file via `web.FileResponse`. Query param: `path`. Browser decodes mp4/h264 natively — key for remote streaming. |
+| `/8cut/video_transcode` | GET | Fallback: transcode to webm on-the-fly via ffmpeg `StreamResponse` for browser-incompatible formats (some MKV, odd codecs). |
+
+### Region editing (from VideoReview widget)
+
+| Route | Method | Purpose |
+|-------|--------|---------|
+| `/8cut/toggle_region` | POST | `toggle_scan_result_disabled()` |
+| `/8cut/resize_region` | POST | `update_scan_result()` |
+| `/8cut/delete_region` | POST | `delete_scan_result()` |
+| `/8cut/add_negatives` | POST | `add_hard_negatives()` |
+| `/8cut/scan_versions` | GET | `get_scan_versions()` |
+| `/8cut/review_done/{node_id}` | POST | Unblock the VideoReview node's `run()`, pass final regions |
+
+### Data queries (for combo widget population)
+
+| Route | Method | Purpose |
+|-------|--------|---------|
+| `/8cut/profiles` | GET | `db.get_profiles()` |
+| `/8cut/export_folders` | GET | `db.get_export_folders()` |
+| `/8cut/models` | GET | List available `.joblib` models |
+
+---
+
+## Frontend JS Widget (`web/js/video_review.js`)
+
+Registered via `app.registerExtension()`. Hooks into the VideoReview node's `onNodeCreated` and `onExecuted` callbacks.
+
+### Components
+
+1. **Video player** — HTML5 `<video>` element, src pointed at `/8cut/video?path=...`
+2. **Timeline** — `<canvas>` overlay below the video. Renders:
+   - Scan region rectangles (color-coded by score, red for negatives, gray for disabled)
+   - Cursor line (click to seek)
+   - Drag handles on region edges (resize)
+   - Waveform (optional, fetched via separate route)
+3. **Region table** — HTML table with model tabs. Click row to seek. Columns: Time, End, Score.
+4. **Action buttons** — Add Negative, Export, Continue
+5. **Version combo** — dropdown to switch scan history versions
+
+### Interaction flow
+
+- Widget activates when `onExecuted` fires with scan regions
+- User clicks/drags timeline, edits regions, marks negatives
+- Each edit hits an API route (immediate DB persistence)
+- "Continue" sends `POST /8cut/review_done/{node_id}` with final region state
+- Node's `run()` unblocks, passes `SCAN_REGIONS` downstream
+
+---
+
+## DB
+
+Separate SQLite DB at `ComfyUI-8cut/data/8cut.db`. Uses the existing `ProcessedDB` class unchanged — same schema, same migration code. Users can copy their existing `~/.8cut.db` to carry over scan history, training data, and hard negatives.
+
+---
+
+## Dependencies
+
+Same as 8-cut's `requirements.txt` minus PyQt6/python-mpv:
+- `torch`, `torchaudio`, `torchvision` (from CUDA index)
+- `transformers>=4.30,<5.0`, `timm>=0.9`
+- `librosa`, `scikit-learn`, `joblib`, `soundfile`, `numpy`
+- `ultralytics` (YOLO tracking)
+
+ComfyUI already provides torch. The node pack's install script just needs the audio/ML extras.
+
+---
+
+## Implementation Priority
+
+1. **Node pack skeleton** — structure, `__init__.py`, custom types, API routes for video serving
+2. **LoadVideo + AudioScan** — headless nodes, no widget needed yet
+3. **VideoReview widget (minimal)** — video player + static region display + Continue button
+4. **VideoReview interactivity** — timeline click/drag, region editing, negative marking
+5. **TrainModel + ExportClips** — complete the pipeline
+6. **Polish** — version history, waveform overlay, transcode fallback
@@ -0,0 +1,205 @@
+# Scan History & Hard Negative Management — Final Design
+
+Date: 2026-04-19 (implemented on `feat/training-ui`)
+
+## Goal
+
+1. Keep scan result history per `(file, model)` so users can track classifier improvement across training iterations
+2. Make hard negatives manageable — viewable, removable, and optionally disabled per training run
+3. Fix latent bug: `get_export_folders()` doesn't filter by `scan_export`
+
+---
+
+## 1. Ghost Folder Fix
+
+### Bug
+
+`get_export_folders()` queried all `output_path` rows without filtering `scan_export`. Folders that only contained scan-exported clips appeared in training dropdowns with 0 clips.
+
+### Implementation (`core/db.py`)
+
+**`get_export_folders(profile, include_scan_exports=False)`** — new parameter. When `False` (default), the SQL query adds `AND scan_export = 0` to exclude scan-only folders. The `get_training_stats()` method passes this through and also filters its return dict to remove folders with 0 clips:
+
+```python
+return {k: v for k, v in stats.items() if v["clips"] > 0}
+```
+
+### Test
+
+`tests/test_db.py::test_export_folders_excludes_scan_exports` — verifies scan-only folders are excluded by default and included when `include_scan_exports=True`.
+
+---
+
+## 2. Scan Result History
+
+### Schema
+
+Added column to `scan_results`:
+
+```sql
+scan_timestamp TEXT NOT NULL DEFAULT ''
+```
+
+All rows from the same scan share one timestamp string with **microsecond precision** (`%Y%m%d_%H%M%S_%f`, e.g. `"20260419_143022_123456"`). Microsecond precision prevents version collisions on fast successive scans.
+
+Migration adds the column via `ALTER TABLE` for existing databases. Legacy rows keep `scan_timestamp = ''`.
+
+### DB methods (`core/db.py`)
+
+**`save_scan_results(filename, profile, model, regions, max_versions=5)`**
+1. Inserts new rows with current microsecond-precision timestamp
+2. Counts distinct timestamps for this `(filename, profile, model)`
+3. Prunes oldest timestamps beyond `max_versions`
+
+No more DELETE-then-INSERT — all versions coexist in the table.
+
+**`get_scan_versions(filename, profile, model)`**
+Returns `[{timestamp, count, max_score}, ...]` ordered newest first. Filters `scan_timestamp != ''` so legacy rows don't appear as named versions.
+
+**`get_scan_results(filename, profile, scan_timestamp=None)`**
+- With `scan_timestamp`: returns rows matching that exact version
+- Without (default): uses `INNER JOIN` subquery with `MAX(scan_timestamp)` per model to return only the latest version. Legacy rows (empty timestamp) sort before any real timestamp, so they're returned when no versioned scans exist.
+
+### UI (`main.py` — `ScanResultsPanel`)
+
+Each model tab wraps its `QTableWidget` in a container `QWidget` with a `QComboBox` for version selection:
+
+```
+container (QWidget)
+├── cmb_version (QComboBox) — hidden when ≤ 1 version
+└── table (QTableWidget)
+```
+
+**Helper methods** unwrap this container:
+- `_current_table()` — returns `QTableWidget` from active tab (handles both raw table and container)
+- `_tab_table(index)` — same by tab index
+
+**Version combo** is populated by `_populate_version_combos()` after every `load_for_file()` and `add_scan_results()` call. Labels use `datetime.strptime` parsing with try/except fallback for robustness:
+
+```
+2026-04-19 14:30 (12 regions, best: 0.95)
+```
+
+**Version switching** via `_on_version_changed(model, idx)`:
+1. Reads `scan_timestamp` from combo's `userData`
+2. Calls `get_scan_results(filename, profile, scan_timestamp=ts)`
+3. Repopulates the table in-place
+4. **Clears the undo stack** — stale undo entries from a different version would corrupt data
+5. Emits `regions_edited` to refresh the timeline
+
+**Tab switch** connects `tab_changed` signal to `_on_scan_regions_edited` (not just `_update_scan_export_count`), so the timeline updates scan regions when switching model tabs.
+
+### Cache interaction
+
+Embedding cache is per `(file, model)` and doesn't change across scans. History stores classified regions (start, end, score), not embeddings.
+
+### Test
+
+`tests/test_db.py::test_scan_result_history` — saves 3 versions, verifies counts, ordering, and latest-by-default behavior.
+
+---
+
+## 3. Hard Negative Management
+
+### Schema
+
+Added column to `hard_negatives`:
+
+```sql
+source_model TEXT NOT NULL DEFAULT ''
+```
+
+Migration adds the column via `ALTER TABLE` for existing databases.
+
+### DB methods (`core/db.py`)
+
+**`add_hard_negatives(filename, profile, times, source_path="", source_model="")`** — now stores which embedding model produced the scan that led to the negative marking.
+
+**`get_hard_negatives(profile)`** — returns all rows as `[{id, filename, start_time, source_path, source_model}, ...]` for the management dialog.
+
+**`delete_hard_negatives_by_ids(ids)`** — bulk delete by row IDs.
+
+**`get_training_data(..., use_hard_negatives=True)`** — new parameter. When `False`, the hard negatives query is skipped entirely. Non-destructive — negatives remain in DB.
+
+### Source model tracking (`main.py`)
+
+`_on_scan_negatives()` now passes `source_model=self._scan_panel.current_model_name()` when marking negatives from scan results. `current_model_name()` extracts the model name from the active tab text (stripping the count suffix).
+
+### Training toggle (`main.py` — `TrainDialog`)
+
+Checkbox **"Use hard negatives in training"** (default checked) with "Manage..." button in an HBox layout. The toggle:
+- Updates live training stats preview via debounced `_update_stats()`
+- Passes `use_hard_negatives` through `_open_train_dialog()` to `get_training_data()`
+
+### Management dialog (`main.py` — `HardNegativesDialog`)
+
+Accessible from TrainDialog's "Manage..." button. Features:
+
+| Component | Details |
+|-----------|---------|
+| **Filter combo** | `(all)` + each distinct `source_model` found in data |
+| **Summary label** | `<b>N</b> hard negatives` |
+| **Table** | File, Time (`{:.1f}s`), Source Model, hidden ID column |
+| **Delete Selected** | Multi-select aware, skips hidden (filtered) rows |
+| **Clear All** | **Filter-aware**: if a model filter is active, only deletes negatives for that model with an appropriate confirmation message. If `(all)`, deletes everything. |
+| **Close** | Closes dialog, triggers stats refresh in parent TrainDialog |
+
+`blockSignals(True)` guards prevent spurious filter callbacks during `_load()` repopulation.
+
+### Tests
+
+- `test_hard_negatives_source_model` — verifies source_model stored and retrieved
+- `test_training_data_skips_hard_negatives` — verifies `use_hard_negatives=False` excludes them
+- `test_delete_hard_negatives_by_ids` — verifies bulk deletion by ID
+
+---
+
+## 4. Runtime Fixes (discovered during testing)
+
+### EAT/torchvision ABI mismatch
+
+**Problem:** `torchvision` installed from PyPI (CPU build) was incompatible with `torch` from CUDA wheel index, causing `operator torchvision::nms does not exist`.
+
+**Fix:** Added `torchvision` to the explicit torch install line in both setup scripts:
+```bash
+pip install torch torchaudio torchvision --index-url "$TORCH_INDEX"
+```
+
+Also added `--extra-index-url "$TORCH_INDEX"` to the `pip install -r requirements.txt` line to prevent transitive dependencies (timm, ultralytics) from pulling CPU-only torch packages.
+
+Applied to: `setup_env.sh` (both conda and venv paths), `setup-windows.ps1`.
+
+### EAT / transformers 5.x incompatibility
+
+**Problem:** transformers 5.x broke EAT's remote model code (`'EATModel' object has no attribute 'all_tied_weights_keys'`).
+
+**Fix:** Pinned `transformers>=4.30,<5.0` in `requirements.txt`.
+
+### NumPy non-writable array warning
+
+**Problem:** Cached HuBERT/EAT embeddings loaded from disk are read-only numpy arrays. `torch.from_numpy()` on a non-writable array triggers a deprecation warning.
+
+**Fix:** In `core/audio_scan.py`, changed EAT preprocessing to copy the array:
+```python
+wav = torch.from_numpy(np.array(chunk)).unsqueeze(0).float()
+```
+
+### Timeline not updating on tab switch
+
+**Problem:** Switching model tabs in the scan results panel didn't refresh the timeline's highlighted regions because `tab_changed` was only connected to `_update_scan_export_count`.
+
+**Fix:** Connected `tab_changed` to `_on_scan_regions_edited` instead, which handles both timeline refresh and export count update.
+
+---
+
+## File Summary
+
+| File | Changes |
+|------|---------|
+| `core/db.py` | Schema migrations, `get_export_folders` filter, versioned `save_scan_results`, `get_scan_versions`, version-aware `get_scan_results`, `add_hard_negatives` with `source_model`, `get_hard_negatives`, `delete_hard_negatives_by_ids`, `get_training_data` with `use_hard_negatives` |
+| `main.py` | `HardNegativesDialog` class, `TrainDialog` hard neg toggle + manage button, `ScanResultsPanel` container/combo architecture, version combo population and switching, `current_model_name()`, tab-switch timeline fix |
+| `core/audio_scan.py` | `np.array(chunk)` copy for read-only numpy arrays in EAT preprocessing |
+| `requirements.txt` | `transformers>=4.30,<5.0` pin |
+| `setup_env.sh` | `torchvision` in torch install, `--extra-index-url` on requirements install |
+| `setup-windows.ps1` | `torchvision` in torch install, `--extra-index-url` on requirements install, removed skip-if-exists guard |
+| `tests/test_db.py` | 5 tests covering all DB-layer changes |
@@ -0,0 +1,94 @@
+# Scan History & Hard Negative Management — Implementation Log
+
+> All tasks complete. See the design doc for the final specification.
+
+**Branch:** `feat/training-ui`
+
+---
+
+### Task 1: Fix ghost folder bug in get_export_folders -- DONE
+
+**Commit:** `2614a76 fix: get_export_folders respects scan_export filter`
+
+- `core/db.py` — `get_export_folders(profile, include_scan_exports=False)`: filters `scan_export = 0` by default
+- `core/db.py` — `get_training_stats()`: passes `include_scan_exports` through, filters out 0-clip folders
+- `tests/test_db.py` — `test_export_folders_excludes_scan_exports`
+
+---
+
+### Task 2: Scan result history — schema and DB methods -- DONE
+
+**Commit:** `4fb2ae1 feat: scan result history — keep N versions per (file, model)`
+
+- `core/db.py` — added `scan_timestamp TEXT NOT NULL DEFAULT ''` column with migration
+- `core/db.py` — `save_scan_results()`: versioned insert with microsecond-precision timestamp (`%Y%m%d_%H%M%S_%f`), auto-prunes beyond `max_versions=5`
+- `core/db.py` — `get_scan_versions()`: returns `[{timestamp, count, max_score}, ...]` newest first
+- `core/db.py` — `get_scan_results(scan_timestamp=None)`: `INNER JOIN` subquery with `MAX(scan_timestamp)` for latest-by-default
+- `tests/test_db.py` — `test_scan_result_history`
+
+---
+
+### Task 3: Scan history UI — version selector in ScanResultsPanel -- DONE
+
+**Commit:** `8ed9fbf feat: scan version selector in results panel`
+
+- `main.py` — `_add_tab()`: wraps table in container `QWidget` with version `QComboBox` (hidden when ≤ 1 version)
+- `main.py` — `_current_table()` / `_tab_table(idx)`: unwrap container to get `QTableWidget`
+- `main.py` — `_populate_version_combos()`: queries `get_scan_versions()`, formats labels with `datetime.strptime` + try/except fallback
+- `main.py` — `_on_version_changed()`: reloads table from specific version, clears undo stack, emits `regions_edited`
+- `main.py` — `current_model_name()`: extracts model name from tab text
+
+---
+
+### Task 4: Hard negatives — schema and training toggle -- DONE
+
+**Commit:** `edc5784 feat: hard negative source_model tracking, training toggle`
+
+- `core/db.py` — added `source_model TEXT NOT NULL DEFAULT ''` column to `hard_negatives` with migration
+- `core/db.py` — `add_hard_negatives(source_model="")`: stores originating model
+- `core/db.py` — `get_hard_negatives(profile)`: returns full rows as list of dicts
+- `core/db.py` — `delete_hard_negatives_by_ids(ids)`: bulk delete by row IDs
+- `core/db.py` — `get_training_data(use_hard_negatives=True)`: conditionally skips hard negatives query
+- `main.py` — `TrainDialog`: "Use hard negatives" checkbox + "Manage..." button in HBox layout
+- `main.py` — `_on_scan_negatives()`: passes `source_model=self._scan_panel.current_model_name()`
+- `tests/test_db.py` — `test_hard_negatives_source_model`, `test_training_data_skips_hard_negatives`, `test_delete_hard_negatives_by_ids`
+
+---
+
+### Task 5: Hard negatives management dialog -- DONE
+
+**Commit:** `e6db83f feat: hard negatives management dialog with filter and bulk delete`
+
+- `main.py` — `HardNegativesDialog`: table with File/Time/Source Model/hidden ID columns, model filter combo, delete selected, filter-aware clear all, close button
+- Filter-aware "Clear All": respects active model filter, shows appropriate confirmation message
+
+---
+
+### Task 6: Code review fixes -- DONE
+
+**Commit:** `5d45b8d fix: timestamp collision, undo stack invalidation, label parsing, filter-aware clear`
+
+Four issues found during code review:
+1. **Timestamp collision** — second-precision timestamps could merge versions on sub-second calls. Fixed with microsecond precision `%f`
+2. **Undo stack invalidation** — switching scan versions left stale undo entries. Fixed by clearing undo stack in `_on_version_changed()`
+3. **Timestamp label fragile parsing** — hard-coded string slicing. Fixed with `datetime.strptime` + try/except fallback
+4. **Clear All ignoring filter** — deleted all negatives regardless of model filter. Fixed to respect active filter
+
+---
+
+### Runtime fixes (discovered during manual testing)
+
+| Commit | Fix |
+|--------|-----|
+| `a3c657c` | Install `torchvision` from CUDA wheel index (was pulling CPU build from PyPI) |
+| `3c3b1d7` | Remove "skip if torch exists" guard in Windows setup so re-runs fix broken envs |
+| `fd043f4` | Pin `transformers>=4.30,<5.0` — EAT remote model code incompatible with transformers 5.x |
+| `7d6fee9` | Copy read-only numpy array before `torch.from_numpy()` in EAT preprocessing |
+| `bd345ab` | Connect `tab_changed` to `_on_scan_regions_edited` so timeline refreshes on tab switch |
+| `d8b3972` | Add `--extra-index-url` to `pip install -r requirements.txt` in both setup scripts |
+
+---
+
+### Test results
+
+All 68 tests pass (5 new DB tests + 63 existing).
@@ -0,0 +1,130 @@
+# Main Window UI Restructure — Design
+
+**Goal:** Reorganize the `MainWindow` UI in `main.py` from a flat wall of ~50 always-visible controls into a legible, grouped layout — a menu bar for rare actions, a tabbed control deck for settings, an always-visible transport bar, and a real status bar — plus a visual polish pass. Keep every existing behavior, shortcut, and mouse interaction working.
+
+**Scope:** Reorganization **and** visual polish. **Not** an interaction-model change — single-key shortcuts, timeline mouse overloading, and the export/scan logic are untouched.
+
+**Audience:** Single power user. Optimize for density and speed. The goal is *order, not hiding*: keep everything fast to reach; push only genuinely rare actions into menus.
+
+**Runs in:** Python/Qt client (`main.py`), `MainWindow` class only. No `core/` changes.
+
+---
+
+## Problem (from audit)
+
+- **No information architecture.** No menu bar, no toolbar; status bar explicitly disabled (`setStatusBar(None)`, main.py:4440). Every function is a permanently-visible widget at equal weight.
+- **`settings_row` overloaded** (main.py:4334–4370): 24 widgets in one non-wrapping `QHBoxLayout` spanning three unrelated domains (encode/clip params, export variants, audio-scan ML). Needs >1500px; window opens at 1100px.
+- **Stranded controls** — e.g. the workers spinbox sits between Cancel and Delete in the transport row (main.py:4316).
+- **Weak feedback** — only an 11px `#888` status label at the far-right end of the overflowing settings row (main.py:4364).
+- **Flat visual hierarchy** — single Fusion stylesheet, scattered inline `setStyleSheet` state swaps, no primary/secondary distinction, no grouping.
+
+---
+
+## Chosen approach: Tabbed control deck
+
+The 3-pane horizontal splitter (Queue · Center · Scan results) is unchanged. The center column is restructured:
+
+```
+╔═ File   Edit   Scan   View   Help ═══════════════════ Profile:[default▾]  [?] ╗  menu bar (+ corner widgets)
+║ ┌Queue──┐ │ current_file.mp4                          │ ┌ Scan results ─────┐ ║
+║ │+Open  │ │ ┌──────────────────────────────────────┐ │ │ [model tabs]      │ ║
+║ │filter │ │ │             VIDEO (mpv)               │ │ │ version▾          │ ║
+║ │┌List┬+┐│ │ │                                      │ │ │ start  end  score │ ║
+║ ││f1  ││ │ │ └──────────────────────────────────────┘ │ │ ...               │ ║
+║ ││f2  ││ │ │ [════════════ timeline ════════════════] │ │                   │ ║
+║ │└────┘ ││ │ [════════════ crop bar ════════════════] │ │ [Neg] [Export]    │ ║
+║ └───────┘ │ ┌─ transport (always visible) ──────────┐ │ └───────────────────┘ ║
+║           │ │▶ ⏸ x2 x4 🔒  --/--   ···  [Export] +₁+₂ Cancel  Delete│         ║
+║           │ ├─[ Export ]─[ Crop & Track ]─[ Scan ]──┤  ← control deck (tabs)  ║
+║           │ │  (controls for the active tab here)   │                         ║
+║           │ └───────────────────────────────────────┘                         ║
+╠═══════════════════════════════════════════════════════════════════════════════╣
+║ Ready.                                  current file · profile: default · 8 wk ║  status bar
+╚═══════════════════════════════════════════════════════════════════════════════╝
+```
+
+**Why tabbed deck:** Replaces the three stacked rows with a compact tab strip. The transport bar (most-used controls) stays always visible above the tabs; settings group by concern behind tabs. Trade-off accepted: viewing Scan + Export controls simultaneously costs a tab switch.
+
+---
+
+## Control mapping
+
+Every current control has an explicit home; nothing is removed.
+
+### Menu bar (rare / batch / management)
+
+| Menu | Items |
+|------|-------|
+| **File** | Open Files… · Set export folder… · Quit |
+| **Edit** | Undo *(Ctrl+Z → `_scan_panel.undo`)* · Subprofiles ▸ (Add… / Remove…) |
+| **Scan** | Scan current · Auto-export · Scan All… · Train classifier… |
+| **View** | Review mode ✓ · Subcategory markers ▸ · Hide exported ✓ · Show hidden ✓ |
+| **Help** | Keyboard shortcuts *(? / F1)* · What's new · About |
+| *corner (right)* | Profile ▾ · `?` |
+
+*Hard Negatives and Dataset Stats remain inside the Train dialog (main.py:682, 762) — not surfaced separately. Profile new/delete remains driven by the profile combo's `activated` handler.*
+
+### Transport bar (always visible — playback + one-press export actions)
+
+`▶ Play · ⏸ Pause · x2 · x4 · 🔒 Lock · --/-- time · ⟨stretch⟩ · next-preview · **Export** · subprofile buttons ₁₂… · Cancel · Delete`
+
+### Control deck — Export tab
+`Label · Category · Name · Folder + browse · Format · HW encode · Resize · Duration · Clips · Spread · Workers · Re-export`
+
+### Control deck — Crop & Track tab
+`Portrait ratio · 1 random portrait · 1 random square · Track subject`
+
+### Control deck — Scan tab
+`Scan model ▾ · ⏲ history · Scan · Auto · Speech · Review · Fuse · Threshold`
+
+### Left pane (Queue) — unchanged
+`+ Open · filter · Hide exported · Show hidden · list tabs (tabbed / side-by-side)`
+
+### Right pane (Scan results) — unchanged structurally
+
+### Decisions
+- **Train** → Scan menu only (no deck button).
+- **Subcategory markers ("Sub")** → View menu submenu (off the deck).
+- Items appearing in both a menu and a visible control (Hide exported, Review, Scan, Auto) share one handler and stay synced.
+
+---
+
+## Status bar
+
+Restores `QStatusBar` (removes `setStatusBar(None)`):
+- **Left**: transient feedback — `Exporting 2/3…`, `Scan complete · 14 regions`, `Ready.` — with an optional inline `QProgressBar` for export/scan runs. Replaces `_lbl_status` and the `_status_timer` clear logic.
+- **Right (permanent widget)**: `current file · profile: <name> · <n> workers`.
+
+---
+
+## Visual polish
+
+Extends the existing dark Fusion theme — no theme change.
+
+1. **Aligned tab layouts** — each deck tab uses `QFormLayout`/grid so `label : control` pairs align in columns (biggest legibility win vs. today's ragged horizontal runs).
+2. **Primary/secondary button weight** — **Export** gets an accent style (blue, reusing `#3a6ea8`); Cancel/Delete read as secondary/destructive. The existing **red Export = "armed to overwrite"** state (main.py:5403) is preserved as a distinct state layered on top.
+3. **Consistent toggle states** — x2 / x4 / 🔒 Lock / Review are checkable; one global `:checked` style replaces Lock's ad-hoc inline `#4a3000` swap (main.py:5705).
+4. **Spacing rhythm** — uniform margins/spacing; **fixed deck height** (= tallest tab) so the video never resizes on tab switch.
+5. **Label cleanup** — de-abbreviate where cheap (`Thr→Threshold`, `Dur→Duration`); replace cryptic `⏲` with a clearer history affordance.
+6. **One stylesheet block** — fold scattered inline `setStyleSheet` calls into the central sheet (tabs, separators, status bar, toggles, primary button); keep per-widget overrides only for genuine state changes (overwrite-armed Export).
+
+---
+
+## Implementation notes & risks
+
+- **Preserve all signal wiring.** Controls are re-parented into new layouts, but every existing `connect()` and the controls' object identities are kept — this is a layout move, not a rewrite of handlers.
+- **Preserve all shortcuts.** The `QShortcut` block (main.py:4450–4483) and `_KeyFilter` focus suppression are untouched. Menu items reuse the same handler methods and may display the matching shortcut text.
+- **Fixed deck height** prevents video-area jump when switching tabs.
+- **Synced menu/button state** — checkable menu items (Review, Hide exported) and their visible toggles must reflect each other; route both through the existing handler and update both widgets.
+- **Profile combo** moves to a menu-bar corner widget but keeps its existing `activated` → new/delete/switch logic intact.
+- Risk: re-parenting a large `__init__` is error-prone. Mitigate by moving controls in small, independently-runnable stages (menu bar → status bar → deck tabs → transport bar → polish), launching the app after each.
+
+---
+
+## What this does NOT do
+
+- No change to export, scan, tracking, or DB logic — `core/` untouched.
+- No change to keyboard shortcuts or timeline mouse interactions.
+- No theme change — stays dark Fusion.
+- No new features — every control already exists; this is rehousing + polish.
+- No change to the Queue or Scan-results panes' internal structure.
@@ -0,0 +1,547 @@
+# Main Window UI Restructure — Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** Re-house `MainWindow`'s ~50 flat controls into a menu bar (rare actions), an always-visible transport bar, a 3-tab control deck (Export / Crop & Track / Scan), and a real status bar — then a visual-polish pass — without changing any behavior, shortcut, or `core/` logic.
+
+**Architecture:** Pure layout reorganization inside `main.py`'s `MainWindow`. Existing widget objects and every `connect()` are **preserved and re-parented**, not recreated. The monster `__init__` is incrementally broken into `_build_*` helper methods (stays single-file — matches the project's architecture). Companion design doc: `docs/plans/2026-06-13-ui-restructure-design.md`.
+
+**Tech Stack:** Python 3.11+, PyQt6, pytest. App entry: `main.py`; launch via `./8cut.sh`.
+
+---
+
+## Conventions for every task
+
+- **Line references drift** as edits land. Always locate by the named symbol (method/variable), not the line number alone. Numbers are the *starting* anchors as of this plan.
+- **Authoritative verification is a manual launch.** After each task, run `./8cut.sh`, load a video, and confirm the task's controls work AND prior behavior is intact (play, scrub, export, scan). Use the `verify` skill for structured manual checks.
+- **Structure test is the safety net.** `tests/test_ui_structure.py` (built in Task 0.2) constructs `MainWindow` and asserts containment invariants. It **skips gracefully** if construction fails (e.g. no GL for `MpvWidget` in headless CI), so it never blocks `core/` tests. Run with a display: `pytest tests/test_ui_structure.py -v`.
+- **Commit after every task.** Small, reversible commits. Commit message convention matches the repo (`feat:`/`fix:`/`refactor:`/`change:`).
+- **Do not touch** `core/`, export/scan/tracking logic, the `QShortcut` block (around main.py:4450–4483), `_KeyFilter`, or `TimelineWidget` mouse handling.
+
+---
+
+## Stage 0 — Branch & safety net
+
+### Task 0.1: Create a working branch
+
+**Step 1:** Confirm clean intent and branch off `master`:
+```bash
+git switch -c ui-restructure
+```
+**Step 2:** Verify: `git branch --show-current` → `ui-restructure`.
+(The repo has pre-existing untracked/modified files; leave them alone — they are not part of this work.)
+
+### Task 0.2: Add the structure-test safety net
+
+**Files:**
+- Create: `tests/test_ui_structure.py`
+
+**Step 1: Write the test harness + baseline invariant**
+
+```python
+import os
+import pytest
+
+# A real platform is needed because MpvWidget creates a GL context.
+# If construction fails for any environment reason, skip — this test is a
+# best-effort structural net, not a gate on core/ tests.
+pytestmark = pytest.mark.gui
+
+
+@pytest.fixture(scope="module")
+def app():
+    from PyQt6.QtWidgets import QApplication
+    inst = QApplication.instance() or QApplication([])
+    yield inst
+
+
+@pytest.fixture
+def win(app):
+    try:
+        from main import MainWindow
+        w = MainWindow()
+    except Exception as e:  # GL/mpv/display unavailable, etc.
+        pytest.skip(f"MainWindow could not be constructed here: {e}")
+    yield w
+    w.close()
+    w.deleteLater()
+
+
+def _descendant_object_names(widget):
+    """All objectNames in a widget's child tree (for containment asserts)."""
+    return {c.objectName() for c in widget.findChildren(object) if c.objectName()}
+
+
+def test_window_constructs(win):
+    assert win.windowTitle() == "8-cut"
+```
+
+**Step 2: Run it**
+
+Run: `pytest tests/test_ui_structure.py -v`
+Expected: `test_window_constructs` PASSES (with a display) or SKIPS (headless). Either is acceptable — it must not ERROR.
+
+**Step 3:** Register the `gui` marker to silence warnings.
+
+Modify `conftest.py` — append:
+```python
+def pytest_configure(config):
+    config.addinivalue_line("markers", "gui: constructs Qt widgets; needs a display")
+```
+
+**Step 4: Confirm core tests still pass**
+
+Run: `pytest tests/test_utils.py tests/test_db.py -q`
+Expected: PASS (unchanged).
+
+**Step 5: Commit**
+```bash
+git add tests/test_ui_structure.py conftest.py
+git commit -m "test: add MainWindow structure smoke test (skips headless)"
+```
+
+---
+
+## Stage 1 — Menu bar
+
+Add a `QMenuBar` whose actions reuse existing handler methods. Move the profile combo and `?` button into menu-bar corner widgets. Keep the original buttons that also live elsewhere (Scan, Auto) — menus and buttons share handlers.
+
+### Task 1.1: Extract a `_build_menubar()` and add the five menus
+
+**Files:**
+- Modify: `main.py` `MainWindow.__init__` (call site) and add method `_build_menubar`
+
+**Step 1:** Add the method (place near other `_build`/setup helpers, e.g. after `__init__`). Wire each action to the **existing** handler method:
+
+```python
+def _build_menubar(self) -> None:
+    from PyQt6.QtGui import QAction
+    mb = self.menuBar()
+
+    # File
+    m_file = mb.addMenu("&File")
+    m_file.addAction("Open Files…", self._on_open_files)
+    m_file.addAction("Set export folder…", self._pick_folder)
+    m_file.addSeparator()
+    m_file.addAction("Quit", self.close)
+
+    # Edit
+    m_edit = mb.addMenu("&Edit")
+    self._act_undo = m_edit.addAction("Undo scan edit", self._scan_panel.undo)
+    self._act_undo.setShortcut("Ctrl+Z")
+    m_edit.addSeparator()
+    m_subs = m_edit.addMenu("Subprofiles")
+    m_subs.addAction("Add…", self._new_subprofile)
+    self._menu_subprofiles_remove = m_subs.addMenu("Remove")
+    self._rebuild_remove_subprofile_menu()  # built in Task 4.x
+
+    # Scan
+    m_scan = mb.addMenu("&Scan")
+    m_scan.addAction("Scan current", self._start_scan)
+    m_scan.addAction("Auto-export", self._auto_export)
+    m_scan.addSeparator()
+    m_scan.addAction("Scan All…", self._start_scan_all)
+    m_scan.addAction("Train classifier…", self._open_train_dialog)
+
+    # View
+    m_view = mb.addMenu("&View")
+    self._act_review = m_view.addAction("Review mode")
+    self._act_review.setCheckable(True)
+    self._act_review.toggled.connect(self._btn_scan_mode.setChecked)
+    m_view.addAction("Subcategory markers…", self._show_subcat_menu)
+    m_view.addSeparator()
+    self._act_hide_exported = m_view.addAction("Hide exported")
+    self._act_hide_exported.setCheckable(True)
+    self._act_hide_exported.toggled.connect(self._chk_hide_exported.setChecked)
+    self._chk_hide_exported.toggled.connect(self._act_hide_exported.setChecked)
+    self._act_show_hidden = m_view.addAction("Show hidden")
+    self._act_show_hidden.setCheckable(True)
+    self._act_show_hidden.toggled.connect(self._btn_show_hidden.setChecked)
+    self._btn_show_hidden.toggled.connect(self._act_show_hidden.setChecked)
+
+    # Help
+    m_help = mb.addMenu("&Help")
+    m_help.addAction("Keyboard shortcuts", self._show_shortcuts).setShortcut("F1")
+    m_help.addAction("What's new", self._show_changelog)
+    m_help.addAction("About", self._show_about)  # tiny method, Task 1.3
+```
+
+> **Sync note:** `QAction.toggled`/`QAbstractButton.toggled` do not re-emit when the value is unchanged, so the bidirectional `setChecked` connections (Review, Hide exported, Show hidden) cannot loop. `_btn_scan_mode` → `_act_review` reverse sync is added in Task 3.4 once the button is in the Scan tab.
+
+**Step 2:** Stub the two small new methods referenced above:
+```python
+def _show_about(self) -> None:
+    QMessageBox.about(self, "About 8-cut",
+                      f"<b>8-cut</b> v{self.APP_VERSION}<br>"
+                      "8-second clips for foley datasets.")
+
+def _rebuild_remove_subprofile_menu(self) -> None:
+    self._menu_subprofiles_remove.clear()
+    for name in self._subprofiles:
+        self._menu_subprofiles_remove.addAction(
+            name, lambda _=False, n=name: self._remove_subprofile(n))
+    self._menu_subprofiles_remove.setEnabled(bool(self._subprofiles))
+```
+
+**Step 3:** Call `self._build_menubar()` in `__init__`, **after** `self._scan_panel` and all referenced buttons exist (i.e. just before/after the splitter assembly around main.py:4429). The scan panel is created at main.py:4414, so place the call after that.
+
+**Step 4 (manual verify):** `./8cut.sh` → menu bar shows File/Edit/Scan/View/Help; each item triggers its action; Ctrl+Z still undoes scan edits; F1 shows shortcuts.
+
+**Step 5:** Commit: `feat: add menu bar wired to existing handlers`.
+
+### Task 1.2: Move profile combo + `?` into menu-bar corner
+
+**Files:** Modify `main.py` — `top_bar` assembly (around main.py:4290–4294) and `_build_menubar`.
+
+**Step 1:** Remove `self._cmb_profile` and `self._btn_shortcuts` (and the `"Profile:"` `QLabel`) from `top_bar`. Keep `self._lbl_file` in `top_bar` (it stays as the slim filename header above the video).
+
+**Step 2:** In `_build_menubar`, set a corner widget:
+```python
+from PyQt6.QtWidgets import QWidget, QHBoxLayout, QLabel
+corner = QWidget()
+ch = QHBoxLayout(corner)
+ch.setContentsMargins(0, 0, 6, 0)
+ch.addWidget(QLabel("Profile:"))
+ch.addWidget(self._cmb_profile)
+ch.addWidget(self._btn_shortcuts)
+mb.setCornerWidget(corner, Qt.Corner.TopRightCorner)
+```
+(Build the corner widget at the end of `_build_menubar`, after `self._cmb_profile` exists — it is created at main.py:4272.)
+
+**Step 3 (manual verify):** Profile dropdown works (switch/new/delete); `?` opens shortcuts; filename still shows above the video.
+
+**Step 4:** Commit: `change: move profile selector and help into menu-bar corner`.
+
+---
+
+## Stage 2 — Status bar
+
+### Task 2.1: Restore `QStatusBar` and route `_show_status` to it
+
+**Files:** Modify `main.py` — `__init__` (`setStatusBar(None)` at main.py:4440, `_lbl_status`/`_status_timer` at main.py:4364–4370) and `_show_status` (main.py:5065).
+
+**Step 1:** Replace `self.setStatusBar(None)` with a real status bar built in a helper:
+```python
+def _build_status_bar(self) -> None:
+    sb = self.statusBar()
+    self._status_perm = QLabel("")
+    self._status_perm.setStyleSheet("color: #888;")
+    sb.addPermanentWidget(self._status_perm)
+    self._update_status_perm()
+
+def _update_status_perm(self) -> None:
+    name = os.path.basename(self._file_path) if self._file_path else "—"
+    self._status_perm.setText(
+        f"{name} · profile: {self._profile()} · {self._spn_workers.value()} workers")
+```
+Call `self._build_status_bar()` in `__init__` near the menubar call.
+
+**Step 2:** Rewrite `_show_status` to use the status bar (this subsumes `_status_timer`):
+```python
+def _show_status(self, msg: str, timeout: int = 0) -> None:
+    """Show a transient message in the status bar. timeout in ms (0 = sticky)."""
+    self.statusBar().showMessage(msg, timeout)
+```
+
+**Step 3:** Delete `self._lbl_status`, `self._status_timer`, and `settings_row.addWidget(self._lbl_status)` (main.py:4364–4370). Remove the `_status_timer.timeout` connection.
+
+**Step 4:** Keep `_update_status_perm()` fresh — call it where file/profile/workers change: end of `_after_load`, in `_on_profile_activated`, and in the `_spn_workers.valueChanged` lambda.
+
+**Step 5 (manual verify):** Start an export → status text appears bottom-left and auto-clears; bottom-right shows file · profile · workers and updates on file/profile/worker change.
+
+**Step 6:** Commit: `feat: real status bar replaces inline status label`.
+
+---
+
+## Stage 3 — Control deck (the core move)
+
+Build a fixed-height `QTabWidget` with three tab pages, then **re-parent** the existing controls from `path_row` and `settings_row` into them. Give each page an `objectName` for the structure test. Do tabs one at a time so the app stays runnable.
+
+### Task 3.1: Build the empty deck and mount it
+
+**Files:** Modify `main.py` — `right_layout` assembly (main.py:4372–4382).
+
+**Step 1:** Add a helper that creates the deck and three empty pages:
+```python
+def _build_control_deck(self) -> "QTabWidget":
+    from PyQt6.QtWidgets import QTabWidget, QWidget
+    deck = QTabWidget()
+    deck.setObjectName("control_deck")
+    deck.setDocumentMode(True)
+    self._tab_export = QWidget(); self._tab_export.setObjectName("export_tab")
+    self._tab_crop = QWidget();   self._tab_crop.setObjectName("crop_tab")
+    self._tab_scan = QWidget();   self._tab_scan.setObjectName("scan_tab")
+    deck.addTab(self._tab_export, "Export")
+    deck.addTab(self._tab_crop, "Crop && Track")
+    deck.addTab(self._tab_scan, "Scan")
+    self._control_deck = deck
+    return deck
+```
+
+**Step 2:** In `right_layout`, **keep** `transport_row` for now, but replace the `path_row` and `settings_row` additions with the deck:
+- Remove `right_layout.addLayout(path_row)` and `right_layout.addLayout(settings_row)`.
+- Add `right_layout.addWidget(self._build_control_deck())`.
+- Leave the `path_row`/`settings_row` *construction* in place for this task (the widgets are still parented to nothing visible) — they get moved into tabs in 3.2–3.4. **App is briefly missing those controls between 3.1 and 3.4; that's expected mid-stage.**
+
+**Step 3 (manual verify):** App launches; three empty tabs appear under the transport bar; switching tabs doesn't resize the video (height fixed in Task 3.5).
+
+**Step 4:** Commit: `refactor: add empty 3-tab control deck under transport`.
+
+### Task 3.2: Populate the Export tab
+
+**Files:** Modify `main.py` — move widgets from `path_row` (main.py:4322–4331) and the encode/clip parts of `settings_row` (main.py:4334–4348) plus `_spn_workers` (main.py:4213).
+
+**Step 1:** Build the Export tab with an aligned grid:
+```python
+def _build_export_tab(self) -> None:
+    from PyQt6.QtWidgets import QGridLayout, QLabel, QHBoxLayout
+    g = QGridLayout(self._tab_export)
+    g.setContentsMargins(8, 6, 8, 6); g.setHorizontalSpacing(8); g.setVerticalSpacing(6)
+    # Row 0: annotation
+    g.addWidget(QLabel("Label:"), 0, 0); g.addWidget(self._txt_label, 0, 1)
+    g.addWidget(QLabel("Cat:"),   0, 2); g.addWidget(self._cmb_category, 0, 3)
+    g.addWidget(QLabel("Name:"),  0, 4); g.addWidget(self._txt_name, 0, 5)
+    # Row 1: output path
+    folder_row = QHBoxLayout()
+    folder_row.addWidget(self._txt_folder, 1); folder_row.addWidget(self._btn_folder)
+    g.addWidget(QLabel("Folder:"), 1, 0); g.addLayout(folder_row, 1, 1, 1, 5)
+    # Row 2: encode / clip params
+    g.addWidget(QLabel("Format:"), 2, 0); g.addWidget(self._cmb_format, 2, 1)
+    g.addWidget(self._chk_hw, 2, 2)
+    g.addWidget(QLabel("Resize:"), 2, 3); g.addWidget(self._spn_resize, 2, 4)
+    # Row 3: batch params + actions
+    g.addWidget(QLabel("Duration:"), 3, 0); g.addWidget(self._spn_clip_dur, 3, 1)
+    g.addWidget(QLabel("Clips:"),    3, 2); g.addWidget(self._spn_clips, 3, 3)
+    g.addWidget(QLabel("Spread:"),   3, 4); g.addWidget(self._spn_spread, 3, 5)
+    g.addWidget(QLabel("Workers:"),  4, 0); g.addWidget(self._spn_workers, 4, 1)
+    g.addWidget(self._btn_reexport, 4, 5)
+```
+Call it from `_build_control_deck` (or right after, in `__init__`).
+
+**Step 2:** Delete the now-duplicate `addWidget` calls for these widgets from `path_row` and `settings_row` construction. (Re-parenting via `addWidget` into the grid auto-removes them from the old layout, but remove the dead lines to keep `__init__` honest.)
+
+**Step 3 (manual verify):** Export tab shows aligned Label/Cat/Name, Folder+browse, Format/HW/Resize, Duration/Clips/Spread/Workers/Re-export. Change each → still persists to `QSettings` and updates the timeline span / next-label as before. Export still works (E).
+
+**Step 4:** Commit: `refactor: move export & encode controls into Export tab`.
+
+### Task 3.3: Populate the Crop & Track tab
+
+**Files:** Modify `main.py` — move `_cmb_portrait`, `_chk_rand_portrait`, `_chk_rand_square`, `_chk_track` from `settings_row` (main.py:4337, 4349–4351).
+
+**Step 1:**
+```python
+def _build_crop_tab(self) -> None:
+    from PyQt6.QtWidgets import QGridLayout, QLabel
+    g = QGridLayout(self._tab_crop)
+    g.setContentsMargins(8, 6, 8, 6); g.setHorizontalSpacing(8); g.setVerticalSpacing(6)
+    g.addWidget(QLabel("Portrait:"), 0, 0); g.addWidget(self._cmb_portrait, 0, 1)
+    g.addWidget(self._chk_rand_portrait, 1, 0, 1, 2)
+    g.addWidget(self._chk_rand_square,   2, 0, 1, 2)
+    g.addWidget(self._chk_track,         3, 0, 1, 2)
+    g.setRowStretch(4, 1); g.setColumnStretch(2, 1)
+```
+
+**Step 2:** Remove those four widgets' old `settings_row.addWidget` lines.
+
+**Step 3 (manual verify):** Crop & Track tab shows the four controls; portrait ratio still toggles the crop overlay/crop-bar; random/track checkboxes persist.
+
+**Step 4:** Commit: `refactor: move crop & track controls into their tab`.
+
+### Task 3.4: Populate the Scan tab (and drop menu-only buttons)
+
+**Files:** Modify `main.py` — move scan widgets from `settings_row` (main.py:4352–4362). Buttons that became **menu-only** (Train, Scan All, Sub) are NOT added to the tab and are deleted.
+
+**Step 1:**
+```python
+def _build_scan_tab(self) -> None:
+    from PyQt6.QtWidgets import QGridLayout, QLabel, QHBoxLayout
+    g = QGridLayout(self._tab_scan)
+    g.setContentsMargins(8, 6, 8, 6); g.setHorizontalSpacing(8); g.setVerticalSpacing(6)
+    model_row = QHBoxLayout()
+    model_row.addWidget(self._cmb_scan_model, 1); model_row.addWidget(self._btn_model_history)
+    g.addWidget(QLabel("Model:"), 0, 0); g.addLayout(model_row, 0, 1, 1, 3)
+    g.addWidget(self._btn_scan, 1, 0); g.addWidget(self._btn_auto_export, 1, 1)
+    g.addWidget(self._btn_speech, 1, 2); g.addWidget(self._btn_scan_mode, 1, 3)
+    g.addWidget(self._spn_auto_fuse, 2, 0); g.addWidget(self._sld_threshold, 2, 1)
+    g.setColumnStretch(3, 1)
+```
+
+**Step 2:** Reverse-sync Review with the View menu (the forward sync was added in Task 1.1):
+```python
+self._btn_scan_mode.toggled.connect(self._act_review.setChecked)
+```
+Add this right after `_build_scan_tab` runs (both `_btn_scan_mode` and `_act_review` exist by then).
+
+**Step 3:** Delete the menu-only buttons and their `settings_row` lines: `self._btn_train` (main.py:4167–4170), `self._btn_scan_all` (main.py:4172–4174), `self._btn_hide_subcats` (main.py:4154–4157). Their handlers (`_open_train_dialog`, `_start_scan_all`, `_show_subcat_menu`) stay — now reached via menus.
+
+**Step 4:** Re-anchor `_show_subcat_menu` (main.py:5989) so it no longer depends on the deleted `_btn_hide_subcats`:
+```python
+# was: self._btn_hide_subcats.mapToGlobal(self._btn_hide_subcats.rect().bottomLeft())
+from PyQt6.QtGui import QCursor
+menu.exec(QCursor.pos())
+```
+Apply to **both** `exec` call sites in that method.
+
+**Step 5 (manual verify):** Scan tab shows Model+history, Scan/Auto/Speech/Review, Fuse/Threshold. `Scan` runs; `Review` toggles and stays in sync with View ▸ Review mode (both directions); View ▸ Subcategory markers… opens the full popup near the cursor; Scan ▸ Scan All / Train still work.
+
+**Step 6:** Commit: `refactor: move scan controls into Scan tab; Train/ScanAll/Sub to menus`.
+
+### Task 3.5: Fix deck height; remove dead `path_row`/`settings_row`
+
+**Files:** Modify `main.py` — `__init__`.
+
+**Step 1:** The `path_row`/`settings_row` `QHBoxLayout`s should now be empty. Delete their construction blocks entirely (main.py:4321–4370 minus what was already removed), including the `self._transport_row = transport_row` line only if unused elsewhere (it IS used by `_rebuild_subprofile_buttons` — keep `transport_row`).
+
+**Step 2:** Pin the deck height so tab switches don't move the video:
+```python
+self._control_deck.setFixedHeight(self._control_deck.sizeHint().height())
+```
+Call after all three tabs are built. If the tallest tab (Export, 5 rows) clips, set an explicit value instead (e.g. `setFixedHeight(150)`); confirm visually.
+
+**Step 3 (manual verify):** Switching Export↔Crop↔Scan keeps the video size constant; no clipped controls; all three tabs fully usable.
+
+**Step 4:** Commit: `refactor: fix control-deck height; drop dead settings rows`.
+
+### Task 3.6: Extend the structure test for the deck
+
+**Files:** Modify `tests/test_ui_structure.py`.
+
+**Step 1:** Add invariants:
+```python
+def test_menubar_has_expected_menus(win):
+    titles = [m.title().replace("&", "") for m in win.menuBar().findChildren(type(win.menuBar().addMenu("")))]
+    for expected in ("File", "Edit", "Scan", "View", "Help"):
+        assert any(expected == t for t in titles)
+
+def test_status_bar_exists(win):
+    assert win.statusBar() is not None
+
+def test_workers_spinbox_in_export_tab(win):
+    from PyQt6.QtWidgets import QSpinBox
+    assert win._spn_workers in win._tab_export.findChildren(QSpinBox)
+
+def test_scan_button_in_scan_tab(win):
+    from PyQt6.QtWidgets import QPushButton
+    assert win._btn_scan in win._tab_scan.findChildren(QPushButton)
+
+def test_portrait_combo_in_crop_tab(win):
+    from PyQt6.QtWidgets import QComboBox
+    assert win._cmb_portrait in win._tab_crop.findChildren(QComboBox)
+```
+(Adjust the menu-title introspection if the helper is awkward; the key invariants are the tab-containment ones.)
+
+**Step 2:** Run: `pytest tests/test_ui_structure.py -v` → PASS with a display (or SKIP headless).
+
+**Step 3:** Commit: `test: assert control-deck containment invariants`.
+
+---
+
+## Stage 4 — Transport bar tidy & subprofile menu sync
+
+### Task 4.1: Confirm transport bar contents; keep subprofile export buttons inline
+
+**Files:** Modify `main.py` — `transport_row` (main.py:4296–4319).
+
+**Step 1:** The workers spinbox was moved in Task 3.2 — confirm `transport_row.addWidget(self._spn_workers)` is gone. Remaining transport order: Play, Pause, x2, x4, Lock, time, stretch, next-label, **Export**, subprofile buttons, `+` (add subprofile), Cancel, Delete. Leave subprofile **export** buttons inline (they carry the 1–9 shortcuts and belong with Export).
+
+**Step 2:** Keep the inline `+` add-subprofile button, but also ensure the Edit ▸ Subprofiles ▸ Remove submenu is rebuilt whenever subprofiles change. In `_rebuild_subprofile_buttons` (main.py:5530-ish) and after add/remove, call `self._rebuild_remove_subprofile_menu()`.
+
+**Step 3 (manual verify):** Transport row reads cleanly; adding/removing a subprofile updates both the inline buttons and Edit ▸ Subprofiles ▸ Remove; number keys 1–9 still export to subprofiles.
+
+**Step 4:** Commit: `change: tidy transport row; sync subprofile remove menu`.
+
+---
+
+## Stage 5 — Visual polish
+
+All Stage 5 verification is **manual** (visual). Take a screenshot before 5.1 for comparison (use the `run`/`verify` skill).
+
+### Task 5.1: Consolidate the stylesheet (tabs, status bar, toggles, primary button)
+
+**Files:** Modify `main.py` — global stylesheet in `main()` (main.py:3811–3827).
+
+**Step 1:** Extend the central sheet (append rules; keep existing ones):
+```css
+QTabWidget::pane { border: 1px solid #444; border-radius: 3px; top: -1px; }
+QTabBar::tab { background: #2a2a2a; color: #bbb; padding: 5px 12px;
+               border: 1px solid #444; border-bottom: none;
+               border-top-left-radius: 3px; border-top-right-radius: 3px; }
+QTabBar::tab:selected { background: #333; color: #fff; }
+QPushButton:checked { background: #4a3000; border-color: #ffd230; color: #fff; }
+QStatusBar { background: #1a1a1a; color: #bbb; }
+QStatusBar::item { border: none; }
+QPushButton#primary { background: #3a6ea8; border-color: #4f86c6; color: #fff; }
+QPushButton#primary:hover { background: #4f86c6; }
+QMenuBar { background: #1e1e1e; } QMenuBar::item:selected { background: #3a6ea8; }
+QMenu { background: #2a2a2a; border: 1px solid #555; }
+QMenu::item:selected { background: #3a6ea8; }
+```
+
+**Step 2:** Mark Export primary: `self._btn_export.setObjectName("primary")`.
+
+**Step 3:** Replace Lock's inline stylesheet swap (main.py:5705) — since `QPushButton:checked` now styles all toggles, delete the two `self._btn_lock.setStyleSheet(...)` lines in `_on_lock_toggled` (keep the rest of the handler).
+
+**Step 4 (manual verify):** Tabs, menus, status bar, and checked toggles (x2/x4/Lock/Review) all read consistently; Export stands out as primary; Lock still highlights when active.
+
+**Step 5:** Commit: `style: unify tab/menu/statusbar/toggle styling; mark Export primary`.
+
+### Task 5.2: Preserve the "armed to overwrite" Export state
+
+**Files:** Inspect `main.py` — the red-Export swaps (main.py:5403, and the resets at 4960/5211/5447/7170/7199/7218).
+
+**Step 1:** These set/clear `self._btn_export.setStyleSheet("QPushButton { background: #6a3030; ... }")` to mean "this export will overwrite". With Export now `objectName("primary")`, an empty `setStyleSheet("")` reset reverts to the **primary** look (good). Confirm the armed (red) state still visually overrides primary — inline stylesheet beats the objectName rule, so it does.
+
+**Step 2 (manual verify):** Select a marker for re-export → Export turns red (armed); deselect → returns to blue primary; export → resets correctly.
+
+**Step 3:** Commit (only if changes were needed): `fix: keep armed-overwrite Export state over primary style`.
+
+### Task 5.3: Label cleanup
+
+**Files:** Modify `main.py` — prefixes/labels.
+
+**Step 1:** De-abbreviate where free: `_sld_threshold.setPrefix("Threshold: ")` (main.py:4207) → keep short if it overflows the tab; `_spn_auto_fuse` prefix stays `"Fuse: "`. Replace the `⏲` history button text with a tooltip-backed `"History"` or a clearer glyph; keep `setFixedWidth` generous enough.
+
+**Step 2 (manual verify):** Labels legible; nothing clipped in the Scan tab.
+
+**Step 3:** Commit: `style: de-abbreviate scan labels`.
+
+---
+
+## Stage 6 — Finalize
+
+### Task 6.1: Full regression pass
+
+**Step 1 (manual, use `verify` skill):** With a real video loaded, confirm end-to-end: scrub/play/pause/speed/lock; export (E) single + batch + subprofile (1–9); re-export; delete; portrait crop + random + track; scan + auto + speech + review + threshold/fuse; scan-all; train dialog opens; profile switch; queue filter/hide/show-hidden; Ctrl+Z undo; F1/`?` shortcuts.
+
+**Step 2:** Run `pytest -q` (all suites). Expected: `core/` PASS; `test_ui_structure` PASS (display) or SKIP.
+
+### Task 6.2: Docs & changelog
+
+**Files:** Modify `README.md` (UI/shortcuts sections if any references moved) and the in-app `CHANGELOG` list (main.py:4500) — bump `APP_VERSION` and add a "UI restructure" entry so the What's-new dialog announces it.
+
+**Step 1:** Add changelog entry summarizing: menu bar, tabbed control deck, status bar, visual polish; note all shortcuts unchanged.
+
+**Step 2:** Commit: `docs: changelog + README for UI restructure`.
+
+### Task 6.3: Hand off the branch
+
+**Step 1:** `git log --oneline master..ui-restructure` — review the commit series.
+**Step 2:** Offer the user: merge to `master`, open a PR, or keep iterating (use `finishing-a-development-branch` skill).
+
+---
+
+## Risk register
+
+| Risk | Mitigation |
+|------|-----------|
+| Re-parenting breaks a `connect()` | Widgets keep identity; only layout membership changes. Manual launch after every task catches breakage immediately. |
+| Headless test can't build `MpvWidget` | Structure test skips on construction failure; manual launch is authoritative. |
+| Menu/button state desync (Review, Hide exported) | Bidirectional `setChecked` (no re-emit on equal value → no loop); verified manually in 3.4. |
+| Subcat popup anchored to deleted button | Re-anchored to `QCursor.pos()` in Task 3.4. |
+| Deck height jump on tab switch | `setFixedHeight` in Task 3.5. |
+| Armed-overwrite red Export lost under primary style | Inline stylesheet overrides objectName rule; verified in 5.2. |
+| Mid-Stage-3 app missing controls | Expected between 3.1–3.4; each sub-task is still committable and launchable. |
+
+## What this plan does NOT change
+
+`core/` logic · export/scan/tracking/DB behavior · keyboard shortcuts · timeline mouse interactions · the Queue and Scan-results panes' internals · the dark Fusion theme.
@@ -0,0 +1,96 @@
+# Multi-pane Control Deck — Design + Plan Addendum
+
+> Addendum to `2026-06-13-ui-restructure-design.md` / `-implementation.md`. Same branch (`ui-restructure`), same constraints (preserve behavior; reorg/feature only; no `core/` changes).
+
+**Goal:** Let the control-deck panels (Export / Crop & Track / Scan) optionally show **side-by-side as resizable columns** instead of one-at-a-time tabs — mirroring the existing playlist pin→side-by-side pattern.
+
+> **Revision (post-use, 2026-06-13):** The first implementation showed unpinned panels as a "leftover" tab-column so nothing was hidden — but in use, pinning 2 panels then displayed 3 columns, which read as "all three pinned" and was confusing (and inconsistent with what persisted). **Revised behavior:** the split view shows **exactly the pinned panels** as columns (pin 2 → 2 columns, pin 3 → 3). Unpinned panels are not shown as columns. Because the right-click-tab "Show side-by-side" gesture only works in tabbed mode, an always-available **View ▸ Side-by-side panels ▸ Export / Crop / Scan** submenu of checkable toggles is the way to pin/unpin any panel (including adding a 3rd while already in split view). The `if leftovers:` block below is removed; the View submenu + its sync in `_refresh_deck_layout` replace it.
+
+**Mirror these existing playlist members** (study them — the deck is a simpler, fixed-3-panel version): `_PlaylistTabBar` (main.py:3284), `_refresh_layout` (~4872), `_on_pin_toggle`/`_on_unpin` (~4942), `_detach_all_pws`/`_clear_split_container` (~4861), and the `_list_stack`/`_split_container` setup (~3916–3923).
+
+---
+
+## Design
+
+### Panel identity
+The deck's three pages (`_tab_export`, `_tab_crop`, `_tab_scan`) each get three attributes (set in `_build_control_deck`):
+- `_pinned: bool = False`
+- `_label: str` — "Export" / "Crop & Track" / "Scan"
+- `_deck_key: str` — "export" / "crop" / "scan" (stable key for persistence)
+
+Keep an ordered list `self._deck_panels = [self._tab_export, self._tab_crop, self._tab_scan]` for deterministic column order.
+
+### Tab bar
+New `class _DeckTabBar(QTabBar)` (minimal version of `_PlaylistTabBar`): on `contextMenuEvent`, show a checkable "Show side-by-side" action reflecting the page's `_pinned`, and emit `pin_toggle_requested(idx)` when chosen. No rename/folder. Install via `self._control_deck.setTabBar(_DeckTabBar())` in `_build_control_deck` and connect `pin_toggle_requested → self._on_deck_pin_toggle`.
+
+### Stacked container (mirrors `_list_stack`)
+Wrap the deck so it can swap between tabbed and split views:
+- `self._deck_split_container = QWidget()` with an `QHBoxLayout` (`_deck_split_layout`, margins 0, spacing 2).
+- `self._deck_stack = QStackedWidget()`; page 0 = `self._control_deck`, page 1 = `self._deck_split_container`.
+- In `right_layout`, mount `self._deck_stack` where `self._control_deck` is currently added (replace that one `addWidget`).
+
+### `_refresh_deck_layout()` (mirrors `_refresh_layout`)
+```
+pinned = [p for p in self._deck_panels if p._pinned]
+guard self._deck_loading = True  (avoid re-entrant signals)
+detach all panels (setParent(None)); self._control_deck.clear(); clear _deck_split_layout
+if len(pinned) >= 2:
+    splitter = QSplitter(Horizontal); splitter.setChildrenCollapsible(False)
+    leftovers = []
+    for panel in self._deck_panels:        # preserve deck order
+        if panel._pinned:
+            col = QWidget(); v = QVBoxLayout(col) (0 margins)
+            header = label(panel._label, bold) + "✕" button (unpin, fixed 18x18,
+                     tooltip "Return to tabs", clicked → self._on_deck_unpin(panel))
+            header fixed height ~22
+            panel.setVisible(True)          # reparented pages start hidden
+            v.addWidget(header); v.addWidget(panel, 1)
+            splitter.addWidget(col)
+        else:
+            leftovers.append(panel)
+    if leftovers:                            # keep unpinned reachable as a tab-column
+        lt = QTabWidget(); lt.setDocumentMode(True)
+        for panel in leftovers:
+            panel.setVisible(True); lt.addTab(panel, panel._label)
+        splitter.addWidget(lt)
+    splitter.setSizes([1000]*splitter.count())
+    _deck_split_layout.addWidget(splitter)
+    self._deck_stack.setCurrentWidget(self._deck_split_container)
+else:
+    for panel in self._deck_panels:          # fixed order
+        self._control_deck.addTab(panel, panel._label)
+    self._deck_stack.setCurrentWidget(self._control_deck)
+restore self._deck_loading
+```
+
+### Toggle handlers (mirror `_on_pin_toggle`/`_on_unpin`)
+- `_on_deck_pin_toggle(idx)`: `panel = self._control_deck.widget(idx)` (only valid in tabbed mode — pin is only offered there); flip `panel._pinned`; if now pinned and `<2` pinned, `_show_status("Pin another panel to show them side-by-side", 3500)`; `_refresh_deck_layout()`; `_save_deck_layout()`.
+- `_on_deck_unpin(panel)`: `panel._pinned = False`; `_refresh_deck_layout()`; `_save_deck_layout()`.
+
+### Persistence
+- `_save_deck_layout()`: `self._settings.setValue("deck_pinned", [p._deck_key for p in self._deck_panels if p._pinned])`.
+- Restore at the end of `__init__` (after the deck + menubar exist): read `deck_pinned` (handle str/list like the subprofiles loader at main.py:3867), set each panel's `_pinned`, then `_refresh_deck_layout()` once.
+
+### Height
+The deck pages now also render with a 22px header in split mode. After building, set the stack's minimum height to fit the tallest **split-mode** column (header + Export content) so split mode never clips: compute once via `self._deck_stack.setMinimumHeight(...)` using `sizeHint`, and keep vertical size policy `Fixed` (as the deck has now). Switching INTO split mode may change the deck height slightly (deliberate user action — acceptable); switching tabs within tabbed mode must still not jump. Reuse the existing height-pin logic — apply it to `_deck_stack` instead of `_control_deck`.
+
+---
+
+## Implementation tasks (bite-sized, commit per task)
+
+**Task M.1 — scaffolding (no behavior change yet).** Add `_DeckTabBar`; in `_build_control_deck` set it on the deck, set `_pinned/_label/_deck_key` on the three pages, build `self._deck_panels`, create `_deck_split_container`/`_deck_split_layout`/`_deck_stack`, and mount `_deck_stack` in `right_layout` instead of `_control_deck`. Connect `pin_toggle_requested` to a stub. App still behaves as plain tabs. Verify: `import main`, structure tests 6/6, and a probe that `_deck_stack.currentWidget() is _control_deck`.
+
+**Task M.2 — split rendering.** Implement `_refresh_deck_layout`, `_detach_deck_panels`, `_clear_deck_split`, `_on_deck_pin_toggle`, `_on_deck_unpin`. Verify with a probe: set two panels `_pinned=True`, call `_refresh_deck_layout()`, assert stack shows `_deck_split_container`, the splitter has 3 columns (2 pinned + 1 leftover QTabWidget), and all three panels are visible/parented; unpin one → back to `_control_deck` with 3 tabs in order.
+
+**Task M.3 — persistence.** Add `_save_deck_layout()` + restore block in `__init__`. Verify a probe round-trips a pinned set through QSettings (use an isolated QSettings scope in the test if needed) without error and that restore calls refresh exactly once.
+
+**Task M.4 — height + tests.** Apply the height-pin to `_deck_stack`; confirm split mode doesn't clip the tallest column. Add structure tests: `test_deck_stack_exists`, and `test_pinning_two_panels_switches_to_split` (programmatically pin 2, refresh, assert `_deck_stack.currentWidget() is _deck_split_container`).
+
+## Verification note
+Env quirk (same as the restructure): bare `python -c` constructing `MainWindow` segfaults on mpv GL; run checks under the pytest fixture and `LD_PRELOAD=/usr/lib/libstdc++.so.6 QT_QPA_PLATFORM=offscreen`. Visual confirmation (drag dividers, pin/unpin gestures, persistence across real launches) is the user's, done at the end.
+
+## Risks
+- **Reparenting hidden pages:** QTabWidget hides non-current pages; reparented panels must be `setVisible(True)` in split columns (same gotcha the playlist documents at main.py:4909-4911).
+- **Signal re-entrancy:** guard with `_deck_loading` during refresh.
+- **Pin offered in split mode:** `_on_deck_pin_toggle` reads `_control_deck.widget(idx)`, which is only meaningful in tabbed mode. The ✕ header is the unpin path in split mode — don't rely on the context menu there.
+- **Height jump on mode toggle:** acceptable (deliberate); tab-switch-within-tabs must remain jump-free.
@@ -0,0 +1,66 @@
+# LTX-2 per-tab export mode — Design
+
+**Goal:** Add an export *pipeline mode* to each file-list tab — **Foley** (current behavior) or **LTX-2** — so the same source videos can feed both a Foley dataset (8 s clips) and an LTX-2 V2A dataset (frame-exact, ÷32, 25 fps) without the two ever mixing.
+
+**Depends on:** the per-tab export folder feature (branch `tab-export-folder`) — this design extends that per-tab state. Implementation branch `ltx2-preset` is based on it.
+
+**Scope:** soft preset (no hard enforcement — defaults are LTX-2-legal but every control stays editable). `core/` gains optional pipeline params; Foley path is byte-for-byte unchanged.
+
+---
+
+## LTX-2 constraints (why this exists)
+
+LTX-2 (32× spatial VAE, 8× temporal + 1) requires, for a clip:
+- **W and H each divisible by 32.**
+- **Frame count F such that `F % 8 == 1`** → 9, 17, 25, … 201, … (transformer seq-len ∝ `(W/32)·(H/32)·((F−1)/8+1)`).
+- **fps** only sets real duration `F/fps`; for V2A it fixes the paired-audio length and audio↔motion sync, so it must be **consistent across the dataset and equal to the inference `frame_rate`**. Target: **25 fps**.
+- V2A video is frozen conditioning → low spatial res (384–512) is fine and cheaper.
+
+Note: 8 s @ 25 fps = 200 frames, and `200 % 8 == 0` → **8 s is not legal**. Nearest legal: F=193 (7.72 s) or **F=201 (8.04 s)**.
+
+---
+
+## Model: per-tab mode
+
+Each tab (`PlaylistWidget`) gains `_mode ∈ {"foley","ltx2"}`, persisted alongside `_dest_folder`/`_pinned`/`_tab_folder` in `_save_playlist_tabs`/`_load_playlist_tabs`. Default `"foley"` → existing tabs load unchanged. The **active tab's mode drives the export pipeline and the length control.**
+
+### Tab context menu (`_DeckTabBar`/`_PlaylistTabBar`)
+- **Duplicate as LTX-2** — headline action: clone the tab's file list + separators into a new tab; set `mode="ltx2"`; derive a separate export folder `"<dest_folder>_ltx2"`; load LTX-2 default geometry. Lets you spin an LTX-2 dataset off a Foley working set.
+- **Duplicate tab** — clone keeping the same mode.
+- **LTX-2 mode** — checkable, flips an existing tab between foley/ltx2.
+- Tab label shows a small **`[LTX2]`** badge when `mode=="ltx2"`.
+
+## What `ltx2` mode changes (soft — still editable)
+
+| Aspect | Foley | LTX-2 |
+|--------|-------|-------|
+| Clip length | Duration spinbox (seconds) | **Frame-count F** control stepping the legal series (9, 17, …, 201, …); shows `= F/25 s` |
+| Output fps | inherits source | **forced 25 fps** (resample; preserves duration/sync) |
+| Output W×H | short-side resize → even long side | **center-cropped to ÷32** on both axes (no aspect distortion; loses ≤31 px/side); resize default **512** |
+| Frame exactness | duration-based | exactly **F** frames (`-frames:v F`) |
+
+Defaults loaded on convert: resize **512**, **F = 201** (≈8.04 s, mirrors the 8 s Foley clips), ratio as set. All editable afterward.
+
+## Pipeline (`core/ffmpeg.build_ffmpeg_command`)
+
+Add optional params; Foley calls pass none → identical output to today:
+- `target_fps: float | None` — when set, append `fps={target_fps}` filter and `-r {target_fps}`.
+- `snap32: bool` — when true, after the scale append a centered crop to the nearest lower multiple of 32 on each axis: `crop=trunc(iw/32)*32:trunc(ih/32)*32`.
+- Frame-exact length: caller computes `duration = F/target_fps` and passes `-frames:v F` on the video output so the clip has exactly F frames; audio extract uses the same `F/target_fps` duration so V2A pairing stays aligned.
+
+Filter order: portrait-crop (aspect) → scale (short side, ÷32 default) → snap32 crop → fps. The snap32 center-crop runs after scaling so the ÷32 trim is on final pixels.
+
+## UI wiring (`MainWindow`)
+
+- The length spinbox area swaps with the active tab's mode: Foley shows *Duration (s)*; LTX-2 shows *Frames (F)* with a live `= s @25fps` readout. Switching tabs (or toggling mode) reconfigures it; uses the existing `_sync_folder_field_to_tab`-style sync hook on tab change.
+- `_on_export` / `_start_export_batch`: when the active tab is `ltx2`, pass `target_fps=25`, `snap32=True`, and frame-exact length to the ffmpeg builder; otherwise unchanged.
+- The mismatch guardrail (just added) and per-tab folder continue to apply.
+
+## Persistence & migration
+`_mode` added to each tab's saved JSON (default `"foley"` when absent). No DB changes. Existing sessions load every tab as Foley → zero behavior change until a tab is converted.
+
+## What this does NOT do
+- No hard enforcement: you can set an illegal F or non-÷32 resize manually; the pipeline still crops to ÷32 and uses whatever F you pick (the *control* defaults/steps keep you legal, but nothing blocks you).
+- No motion interpolation on fps resample (frame drop/dup only); keep sources native 25 fps where possible.
+- No change to Foley exports, the scan pipeline, or the DB schema.
+- No automatic re-export of existing clips into LTX-2 — you cut LTX-2 clips in the converted tab.
@@ -0,0 +1,179 @@
+# LTX-2 per-tab export mode — Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** Add a per-tab export pipeline mode (Foley | LTX-2) so the same videos can feed both an 8 s Foley dataset and a frame-exact, ÷32, 25 fps LTX-2 V2A dataset, with a "Duplicate as LTX-2" tab action.
+
+**Architecture:** `core/ffmpeg.build_ffmpeg_command` gains optional `target_fps` / `snap32` / `frames` params (Foley path unchanged); a tiny `core/ltx2.py` holds the legal-frame math. `PlaylistWidget` gains `_mode`; the tab menu gains duplicate/convert actions; the length control + `_on_export` wiring switch on the active tab's mode. Soft preset — defaults are legal, everything stays editable.
+
+**Tech Stack:** Python 3.11+, PyQt6, ffmpeg, pytest. Branch `ltx2-preset` (based on `tab-export-folder`). Design: `docs/plans/2026-06-18-ltx2-preset-design.md`.
+
+---
+
+## Conventions
+- **Core (`core/ffmpeg.py`, `core/ltx2.py`) is real TDD** — pure functions tested in `tests/test_utils.py` style. Run: `LD_PRELOAD=/usr/lib/libstdc++.so.6 python -m pytest tests/test_utils.py -q` (the preload is needed because importing `main` pulls `mpv`; see `project_qt_test_env`). 3 pre-existing failures there are unrelated — don't count them.
+- **GUI parts** verified by the offscreen structure test (`LD_PRELOAD=/usr/lib/libstdc++.so.6 QT_QPA_PLATFORM=offscreen python -m pytest tests/test_ui_structure.py -v`) plus a **manual launch** (`./8cut.sh`).
+- Line numbers are starting anchors; locate by symbol. Commit per task. Co-author trailer on every commit:
+  `Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>`
+
+---
+
+## Stage 1 — LTX-2 math (`core/ltx2.py`) [TDD]
+
+### Task 1.1: legal-frame helpers
+**Files:** Create `core/ltx2.py`; Test in `tests/test_utils.py` (append).
+
+**Step 1 — failing tests** (append to `tests/test_utils.py`):
+```python
+from core.ltx2 import is_legal_frames, nearest_legal_frames, frames_for_duration, duration_for_frames, legal_frames
+
+def test_ltx2_is_legal():
+    assert is_legal_frames(201) and is_legal_frames(9) and is_legal_frames(25)
+    assert not is_legal_frames(200) and not is_legal_frames(8)
+
+def test_ltx2_nearest():
+    assert nearest_legal_frames(200) == 201   # 200 -> nearest 8k+1
+    assert nearest_legal_frames(196) == 193
+    assert nearest_legal_frames(5) == 9        # floor at 9
+
+def test_ltx2_duration_roundtrip():
+    assert duration_for_frames(201, 25) == 201 / 25
+    assert frames_for_duration(8.0, 25) == 201   # 200 -> 201
+
+def test_ltx2_legal_series():
+    s = legal_frames(min_f=9, max_f=33)
+    assert s == [9, 17, 25, 33]
+```
+**Step 2 — run, expect ImportError/FAIL:** `LD_PRELOAD=/usr/lib/libstdc++.so.6 python -m pytest tests/test_utils.py -k ltx2 -q`
+
+**Step 3 — implement `core/ltx2.py`:**
+```python
+"""LTX-2 frame-count math. Legal F satisfy F % 8 == 1 (8x temporal + 1)."""
+
+def is_legal_frames(f: int) -> bool:
+    return f >= 9 and f % 8 == 1
+
+def legal_frames(min_f: int = 9, max_f: int = 1000) -> list[int]:
+    start = max(9, min_f + ((1 - min_f) % 8))   # first 8k+1 >= min_f
+    return list(range(start, max_f + 1, 8))
+
+def nearest_legal_frames(f: int) -> int:
+    if f <= 9:
+        return 9
+    low = ((f - 1) // 8) * 8 + 1
+    high = low + 8
+    return low if (f - low) <= (high - f) else high
+
+def duration_for_frames(frames: int, fps: float) -> float:
+    return frames / fps
+
+def frames_for_duration(duration: float, fps: float) -> int:
+    return nearest_legal_frames(round(duration * fps))
+```
+**Step 4 — run, expect PASS** (same command). **Step 5 — commit:** `feat: LTX-2 legal-frame helpers (core/ltx2.py)`.
+
+---
+
+## Stage 2 — ffmpeg pipeline params [TDD]
+
+### Task 2.1: `target_fps`, `snap32`, `frames` in `build_ffmpeg_command`
+**Files:** Modify `core/ffmpeg.py:74` (`build_ffmpeg_command`); Test `tests/test_utils.py`.
+
+**Step 1 — failing tests:**
+```python
+def test_ffmpeg_ltx2_fps_and_frames():
+    cmd = build_ffmpeg_command("/in/v.mp4", 0.0, "/out/c.mp4",
+                               short_side=512, target_fps=25, frames=201)
+    assert "-r" in cmd and cmd[cmd.index("-r")+1] == "25"
+    assert "-frames:v" in cmd and cmd[cmd.index("-frames:v")+1] == "201"
+    vf = cmd[cmd.index("-vf")+1]
+    assert "fps=25" in vf
+
+def test_ffmpeg_ltx2_snap32_crop():
+    cmd = build_ffmpeg_command("/in/v.mp4", 0.0, "/out/c.mp4",
+                               short_side=512, snap32=True)
+    vf = cmd[cmd.index("-vf")+1]
+    assert "crop=trunc(iw/32)*32:trunc(ih/32)*32" in vf
+
+def test_ffmpeg_foley_unchanged():
+    cmd = build_ffmpeg_command("/in/v.mp4", 0.0, "/out/c.mp4", short_side=256)
+    assert "-r" not in cmd and "-frames:v" not in cmd
+    assert "crop=trunc" not in cmd[cmd.index("-vf")+1]
+```
+**Step 2 — run, expect FAIL** (unexpected kwargs). 
+
+**Step 3 — implement:** add params `target_fps: float | None = None, snap32: bool = False, frames: int | None = None` to the signature. After the scale filter (and before the VAAPI block), append:
+```python
+    if snap32:
+        filters.append("crop=trunc(iw/32)*32:trunc(ih/32)*32")
+    if target_fps is not None:
+        filters.append(f"fps={target_fps:g}")
+```
+Add output flags: after `-t duration` (or near the encoder args, before `output_path`), when `target_fps` set add `cmd += ["-r", f"{target_fps:g}"]`; when `frames` set add `cmd += ["-frames:v", str(frames)]` (video frame cap — exact F). Ensure ordering keeps `-vf` before outputs. Keep `fps`/`snap32` filters out of the `image_sequence=False` vs `True` branches consistently (they apply to both; webp seq also benefits from fps/÷32).
+
+**Step 4 — run, expect PASS.** Also run full `tests/test_utils.py` (the 3 pre-existing failures only). **Step 5 — commit:** `feat: LTX-2 ffmpeg params (target_fps, snap32, frames)`.
+
+### Task 2.2: audio extract honors frame-exact duration
+**Files:** `core/ffmpeg.py:145` (`build_audio_extract_command`) — confirm it takes a duration; if it derives from a fixed 8 s, add a `duration` param so the `.wav` for an LTX-2 webp sequence is exactly `F/25 s`. Add a test mirroring `test_audio_extract_timing` asserting the `-t` value equals `frames/fps`. Commit: `fix: audio extract duration for LTX-2 frame-exact clips`.
+
+---
+
+## Stage 3 — per-tab `_mode`
+
+### Task 3.1: attribute + persistence + migration
+**Files:** `main.py` — `PlaylistWidget.__init__` (~3409, next to `_dest_folder`); `_save_playlist_tabs` (~5271); `_load_playlist_tabs` (~5315).
+- Add `self._mode: str = "foley"` in `PlaylistWidget.__init__`.
+- `_save_playlist_tabs`: add `"mode": pw._mode` to each tab dict.
+- `_load_playlist_tabs`: after creating each pw, `pw._mode = t.get("mode", "foley")`.
+- `_add_playlist_tab`: new tabs default `_mode="foley"` (already via init).
+
+**Verify:** structure test passes; add `test_tab_mode_defaults_foley` (construct, assert each `_pws[i]._mode == "foley"`). Commit: `feat: per-tab export mode attribute (foley default)`.
+
+---
+
+## Stage 4 — tab menu: duplicate / convert / toggle
+
+### Task 4.1: menu actions + label badge
+**Files:** `main.py` — `_PlaylistTabBar.contextMenuEvent` (~3300) add items; new handlers in `MainWindow`; tab-title rendering.
+- Add to the tab context menu: **"Duplicate tab"**, **"Duplicate as LTX-2"**, and a checkable **"LTX-2 mode"** (checked when `pw._mode=="ltx2"`). Emit new signals (e.g. `duplicate_requested(idx, as_ltx2: bool)`, `mode_toggle_requested(idx)`) like the existing `pin_toggle_requested`.
+- `MainWindow._on_duplicate_tab(idx, as_ltx2)`: build a new tab via `_add_playlist_tab(label=…, files=list(src._paths), separators=sorted(src._separators_before), select=True)`; set `pw._dest_folder = src._dest_folder + ("_ltx2" if as_ltx2 else "")`; `pw._mode = "ltx2" if as_ltx2 else src._mode`; if ltx2, apply LTX-2 defaults (Stage 5 hook); `_save_playlist_tabs()`; refresh.
+- `MainWindow._on_tab_mode_toggle(idx)`: flip `pw._mode`; if now ltx2, apply LTX-2 defaults; `_save_playlist_tabs()`; re-sync controls (Stage 5).
+- Label badge: when adding/refreshing a tab whose `_mode=="ltx2"`, show `f"{label} [LTX2]"` (or set a distinct color) — apply in `_refresh_layout`/`_add_playlist_tab` title set.
+
+**Verify:** manual launch — right-click a tab → Duplicate as LTX-2 creates a `[LTX2]` tab with `_ltx2` folder; toggle works. Structure test still green. Commit: `feat: tab duplicate / Duplicate-as-LTX-2 / mode toggle + [LTX2] badge`.
+
+---
+
+## Stage 5 — length control swap + export wiring
+
+### Task 5.1: length control reflects active tab mode
+**Files:** `main.py` — the clip-length widgets (`_spn_clip_dur` ~4051 area) + the tab-change sync hook (`_on_tab_changed` / `_sync_folder_field_to_tab` neighbor).
+- Add a frames spinbox `_spn_frames` (min 9, singleStep 8 → always 8k+1; suffix " f"; tooltip live `= F/25 s`). Default 201.
+- Add `_apply_mode_to_controls()`: if active tab `ltx2` → show `_spn_frames` (+ "Frames" label), hide the seconds Duration control, default resize 512 if unset; else show Duration (seconds), hide frames. Call it from `_on_tab_changed`, after `_on_duplicate_tab`/`_on_tab_mode_toggle`, and once after `_load_playlist_tabs`.
+- A small label shows `= {F/25:.2f}s @25fps` updating on `_spn_frames.valueChanged`.
+
+### Task 5.2: route LTX-2 params through export
+**Files:** `main.py` — `_on_export` (~7317) + `ExportWorker` construction (~7484) + `_update_next_label`.
+- When the active tab's `_mode=="ltx2"`: compute `frames = self._spn_frames.value()`; `fps = 25`; `duration = frames / fps`; pass `target_fps=25, snap32=True, frames=frames, duration=duration` through to `ExportWorker` → `build_ffmpeg_command`. Default `short_side` to 512 if 0/None in ltx2.
+- Foley path: unchanged (no new params).
+- `ExportWorker.__init__`/`run`: thread the new params (default None/False) into `build_ffmpeg_command`.
+
+**Verify (manual, authoritative):** in an LTX-2 tab, export → inspect an output clip: `ffprobe` shows **25 fps, exactly F frames, W&H ÷32**; a Foley tab still exports 8 s/source-fps unchanged. Structure test green; full `pytest tests/test_utils.py` (3 pre-existing fails only). Commit: `feat: route LTX-2 (25fps, ÷32 crop, F frames) through export for ltx2 tabs`.
+
+---
+
+## Stage 6 — finalize
+- **Task 6.1:** Full regression — `pytest tests/test_ui_structure.py` + `tests/test_utils.py` separately; manual: Foley export unchanged, LTX-2 export legal (ffprobe), duplicate/convert, persistence across relaunch, guardrail + per-tab folder still work.
+- **Task 6.2:** Changelog (`main.py` CHANGELOG, bump APP_VERSION) + README note (per-tab LTX-2 mode). Commit `docs: changelog + README for LTX-2 export mode`.
+- **Task 6.3:** Hand off branch (depends on `tab-export-folder`; merge that first, then this).
+
+## Risks
+| Risk | Mitigation |
+|------|-----------|
+| `-frames:v` vs `-t` interaction yields F±1 frames | Set both `-t F/fps` and `-frames:v F`; verify exact count with ffprobe in 5.2. |
+| `fps` filter + HW (VAAPI) filter ordering | Place `fps`/`snap32` among CPU filters before the VAAPI hwupload block; test a HW-encoder build if available. |
+| Length-control swap leaves stale state across tab switches | `_apply_mode_to_controls()` called on every tab change + mode toggle + load. |
+| Depends on unmerged `tab-export-folder` | Branch is based on it; land that branch first. |
+
+## NOT in scope
+Hard enforcement (illegal F/resize allowed manually), motion-interpolated fps, auto re-export of existing Foley clips, DB schema changes, scan-pipeline changes.
@@ -13,6 +13,8 @@ soundfile>=0.12
 # or manually: pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu128
 torch>=2.0
 torchaudio>=2.0
+transformers>=4.30,<5.0  # EAT remote model code incompatible with transformers 5.x
+timm>=0.9

 # Object detection
 ultralytics>=8.0
@@ -22,25 +22,23 @@ if (Test-Path (Join-Path $venvDir "Scripts\python.exe")) {
 & "$venvDir\Scripts\Activate.ps1"

 # ── PyTorch ───────────────────────────────────────────────
-$hasTorch = python -c "import torch" 2>&1
-if ($LASTEXITCODE -eq 0) {
-    Write-Host "`nPyTorch already installed, skipping." -ForegroundColor Green
-} else {
 # Detect NVIDIA GPU via nvidia-smi
 $hasNvidia = Get-Command nvidia-smi -ErrorAction SilentlyContinue
 if ($hasNvidia) {
-        Write-Host "`nNVIDIA GPU detected — installing PyTorch with CUDA 12.8..." -ForegroundColor Green
-        pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu128
+    $torchIndex = "https://download.pytorch.org/whl/cu128"
+    Write-Host "`nNVIDIA GPU detected — using CUDA 12.8 PyTorch index" -ForegroundColor Green
 } else {
-        Write-Host "`nNo NVIDIA GPU detected — installing CPU-only PyTorch..." -ForegroundColor Yellow
-        Write-Host "(Audio scanning will work but will be slower without GPU)" -ForegroundColor Yellow
-        pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu
-    }
+    $torchIndex = "https://download.pytorch.org/whl/cpu"
+    Write-Host "`nNo NVIDIA GPU detected — using CPU-only PyTorch index" -ForegroundColor Yellow
 }
+# Always install/upgrade torch stack from correct index
+# (pip install is a no-op if already at the right version)
+Write-Host "Installing PyTorch + torchaudio + torchvision..."
+pip install torch torchaudio torchvision --index-url $torchIndex

 # ── Python deps ───────────────────────────────────────────
 Write-Host "`nInstalling project dependencies..."
-pip install -r (Join-Path $root "requirements.txt")
+pip install -r (Join-Path $root "requirements.txt") --extra-index-url $torchIndex

 # ── libmpv ────────────────────────────────────────────────
 $mpvDll = Join-Path $root "libmpv-2.dll"
@@ -66,10 +66,10 @@ setup_conda() {
    fi

    echo "  Installing PyTorch + torchaudio (CUDA 12.8)..."
-    pip install torch torchaudio --index-url "$TORCH_INDEX"
+    pip install torch torchaudio torchvision --index-url "$TORCH_INDEX"

    echo "  Installing project dependencies..."
-    pip install -r "$SCRIPT_DIR/requirements.txt"
+    pip install -r "$SCRIPT_DIR/requirements.txt" --extra-index-url "$TORCH_INDEX"

    echo ""
    echo "Done! Activate with:"
@@ -91,10 +91,10 @@ setup_venv() {
    source "$VENV_DIR/bin/activate"

    echo "  Installing PyTorch + torchaudio (CUDA 12.8)..."
-    pip install torch torchaudio --index-url "$TORCH_INDEX"
+    pip install torch torchaudio torchvision --index-url "$TORCH_INDEX"

    echo "  Installing project dependencies..."
-    pip install -r "$SCRIPT_DIR/requirements.txt"
+    pip install -r "$SCRIPT_DIR/requirements.txt" --extra-index-url "$TORCH_INDEX"

    echo ""
    echo "Done! Activate with:"
@@ -25,6 +25,39 @@ def test_default_model_path_contains_profile():
    assert path.endswith(".joblib")


+def test_embed_dim_multi_layer():
+    from core.audio_scan import _embed_dim
+    # Multi-layer models should report concatenated dimension
+    assert _embed_dim("HUBERT_XLARGE_ML") == 5120
+    assert _embed_dim("HUBERT_LARGE_ML") == 4096
+    assert _embed_dim("HUBERT_BASE_ML") == 3072
+    # Single-layer unchanged
+    assert _embed_dim("HUBERT_XLARGE") == 1280
+
+
+def test_ml_config():
+    from core.audio_scan import _ml_config
+    assert _ml_config("HUBERT_XLARGE") is None
+    assert _ml_config("BEATS_ML") is None  # BEATS has no ML variant
+    base, layers = _ml_config("HUBERT_XLARGE_ML")
+    assert base == "HUBERT_XLARGE"
+    assert layers == [11, 23, 35, 47]
+    base, layers = _ml_config("HUBERT_BASE_ML")
+    assert base == "HUBERT_BASE"
+    assert layers == [2, 5, 8, 11]
+
+
+def test_embed_dim_ast():
+    from core.audio_scan import _embed_dim
+    assert _embed_dim("AST") == 768
+    assert _embed_dim("AST_ML") == 3072
+
+
+def test_embed_dim_eat():
+    from core.audio_scan import _embed_dim
+    assert _embed_dim("EAT") == 768
+
+
 def test_db_get_all_export_paths():
    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
        path = f.name
@@ -0,0 +1,106 @@
+import os
+import tempfile
+
+from core.db import ProcessedDB
+
+
+def test_export_folders_excludes_scan_exports():
+    """Scan-export-only folders should not appear when include_scan_exports=False."""
+    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
+        path = f.name
+    try:
+        db = ProcessedDB(path)
+        # Manual export
+        db.add("a.mp4", 10.0, "/out/mp4_Intense/g1/clip.mp4", profile="test")
+        # Scan export to different folder
+        db.add("a.mp4", 20.0, "/out/mp4_ScanOnly/g1/clip.mp4", profile="test",
+               scan_export=True)
+        folders = db.get_export_folders("test")
+        assert "mp4_Intense" in folders
+        assert "mp4_ScanOnly" not in folders, "scan-only folder should be excluded"
+        # With include_scan_exports=True, both should appear
+        folders_all = db.get_export_folders("test", include_scan_exports=True)
+        assert "mp4_ScanOnly" in folders_all
+    finally:
+        os.unlink(path)
+
+
+def test_scan_result_history():
+    """save_scan_results should keep multiple versions."""
+    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
+        path = f.name
+    try:
+        db = ProcessedDB(path)
+        # Save three versions (microsecond-precision timestamps avoid collisions)
+        db.save_scan_results("v.mp4", "test", "MODEL_A", [(0, 8, 0.9)])
+        db.save_scan_results("v.mp4", "test", "MODEL_A",
+                             [(0, 8, 0.8), (10, 18, 0.7)])
+        db.save_scan_results("v.mp4", "test", "MODEL_A", [(5, 13, 0.95)])
+        versions = db.get_scan_versions("v.mp4", "test", "MODEL_A")
+        assert len(versions) == 3
+        # Most recent first
+        assert versions[0]["count"] == 1   # latest: 1 region
+        assert versions[1]["count"] == 2   # middle: 2 regions
+        assert versions[2]["count"] == 1   # oldest: 1 region
+        # get_scan_results returns latest version by default
+        results = db.get_scan_results("v.mp4", "test")
+        assert len(results.get("MODEL_A", [])) == 1
+    finally:
+        os.unlink(path)
+
+
+def test_hard_negatives_source_model():
+    """Hard negatives should store source_model."""
+    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
+        path = f.name
+    try:
+        db = ProcessedDB(path)
+        db.add_hard_negatives("a.mp4", "test", [10.0, 20.0],
+                              source_path="/a.mp4", source_model="HUBERT_XLARGE")
+        rows = db.get_hard_negatives("test")
+        assert len(rows) == 2
+        assert all(r["source_model"] == "HUBERT_XLARGE" for r in rows)
+    finally:
+        os.unlink(path)
+
+
+def test_training_data_skips_hard_negatives():
+    """get_training_data with use_hard_negatives=False should skip them."""
+    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
+        path = f.name
+    try:
+        db = ProcessedDB(path)
+        # Create a source file that "exists" — use the temp db file itself
+        db.add("a.mp4", 10.0, "/out/folder/g/clip.mp4", profile="test",
+               source_path=path)
+        db.add_hard_negatives("a.mp4", "test", [500.0], source_path=path)
+        # With hard negatives
+        data_with = db.get_training_data("test", "folder", use_hard_negatives=True)
+        # Without hard negatives
+        data_without = db.get_training_data("test", "folder", use_hard_negatives=False)
+        assert len(data_with) >= 1
+        # The "with" case should have the hard negative time in neg list
+        neg_with = sum(len(vi[3]) for vi in data_with)
+        neg_without = sum(len(vi[3]) for vi in data_without)
+        assert neg_with > neg_without, "hard negatives should be excluded when use_hard_negatives=False"
+    finally:
+        os.unlink(path)
+
+
+def test_delete_hard_negatives_by_ids():
+    """delete_hard_negatives_by_ids should remove specific rows."""
+    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
+        path = f.name
+    try:
+        db = ProcessedDB(path)
+        db.add_hard_negatives("a.mp4", "test", [10.0, 20.0, 30.0],
+                              source_path="/a.mp4")
+        rows = db.get_hard_negatives("test")
+        assert len(rows) == 3
+        # Delete first two
+        db.delete_hard_negatives_by_ids([rows[0]["id"], rows[1]["id"]])
+        remaining = db.get_hard_negatives("test")
+        assert len(remaining) == 1
+        assert remaining[0]["start_time"] == 30.0
+    finally:
+        os.unlink(path)
@@ -0,0 +1,273 @@
+import pytest
+
+# Redirect QSettings to a throwaway dir BEFORE any MainWindow is constructed, so
+# these GUI tests can never read or clobber the user's real ~/.config/8cut.conf
+# (constructing MainWindow loads — and on window close re-saves — the playlist
+# tabs; a test mutating tab state would otherwise persist into the real session).
+import tempfile as _tempfile
+from PyQt6.QtCore import QSettings as _QSettings
+_QS_DIR = _tempfile.mkdtemp(prefix="8cut-test-qs-")
+_QSettings.setPath(_QSettings.Format.NativeFormat, _QSettings.Scope.UserScope, _QS_DIR)
+_QSettings.setPath(_QSettings.Format.IniFormat, _QSettings.Scope.UserScope, _QS_DIR)
+
+# A real platform is needed because MpvWidget creates a GL context.
+# If construction fails for any environment reason, skip — this test is a
+# best-effort structural net, not a gate on core/ tests.
+pytestmark = pytest.mark.gui
+
+
+@pytest.fixture(scope="module")
+def app():
+    from PyQt6.QtWidgets import QApplication
+    inst = QApplication.instance() or QApplication([])
+    yield inst
+
+
+@pytest.fixture
+def win(app):
+    try:
+        from main import MainWindow
+        w = MainWindow()
+    except Exception as e:  # GL/mpv/display unavailable, etc.
+        pytest.skip(f"MainWindow could not be constructed here: {e}")
+    # Deterministic deck state regardless of any persisted side-by-side layout
+    # (construction restores deck_pinned from QSettings).
+    for _p in w._deck_panels:
+        _p._pinned = False
+    w._refresh_deck_layout()
+    yield w
+    w.close()
+    w.deleteLater()
+
+
+def test_window_constructs(win):
+    assert win.windowTitle().startswith("8-cut")
+
+
+def test_status_bar_exists(win):
+    assert win.statusBar() is not None
+
+
+def test_workers_spinbox_in_export_tab(win):
+    from PyQt6.QtWidgets import QSpinBox
+    assert win._spn_workers in win._tab_export.findChildren(QSpinBox)
+
+
+def test_scan_button_in_scan_tab(win):
+    from PyQt6.QtWidgets import QPushButton
+    assert win._btn_scan in win._tab_scan.findChildren(QPushButton)
+
+
+def test_portrait_combo_in_crop_tab(win):
+    from PyQt6.QtWidgets import QComboBox
+    assert win._cmb_portrait in win._tab_crop.findChildren(QComboBox)
+
+
+def test_menu_only_buttons_not_in_deck(win):
+    from PyQt6.QtWidgets import QPushButton
+    deck_btns = win._control_deck.findChildren(QPushButton)
+    assert win._btn_train not in deck_btns
+    assert win._btn_scan_all not in deck_btns
+    assert win._btn_hide_subcats not in deck_btns
+
+
+def test_deck_stack_exists(win):
+    # The deck is wrapped in a stack so it can swap tabbed <-> side-by-side.
+    # Default (nothing pinned) shows the tabbed control deck.
+    assert win._deck_stack is not None
+    assert win._deck_stack.currentWidget() is win._control_deck
+
+
+def _split_columns(win):
+    """Widgets of the splitter actually mounted in the layout (not findChild,
+    which can return a stale deleteLater'd splitter)."""
+    from PyQt6.QtWidgets import QSplitter
+    item = win._deck_split_layout.itemAt(0)
+    spl = item.widget() if item else None
+    assert isinstance(spl, QSplitter)
+    return [spl.widget(i) for i in range(spl.count())]
+
+
+def test_pinning_two_panels_shows_exactly_two_columns(win):
+    # Pin two panels directly (avoid the toggle handler so no QSettings write
+    # leaks into other test windows) and refresh.
+    from PyQt6.QtWidgets import QTabWidget
+    win._tab_export._pinned = True
+    win._tab_crop._pinned = True
+    win._refresh_deck_layout()
+    assert win._deck_stack.currentWidget() is win._deck_split_container
+    cols = _split_columns(win)
+    assert len(cols) == 2                                    # only the pinned ones
+    assert not any(isinstance(c, QTabWidget) for c in cols)  # no leftover tab-column
+
+
+def test_side_by_side_menu_pins_third_panel(win):
+    # In split mode the View ▸ Side-by-side menu is the way to pin a 3rd panel
+    # (there's no tab bar to right-click). Suppress the QSettings save via the
+    # _deck_loading guard so this doesn't leak into other windows.
+    win._tab_export._pinned = True
+    win._tab_scan._pinned = True
+    win._refresh_deck_layout()
+    assert len(_split_columns(win)) == 2
+    act = next(a for a, p in win._deck_pin_actions if p is win._tab_crop)
+    win._deck_loading = True            # suppress _save_deck_layout
+    try:
+        act.trigger()                   # simulate clicking the menu item
+    finally:
+        win._deck_loading = False
+    assert win._tab_crop._pinned is True
+    assert len(_split_columns(win)) == 3
+
+
+def test_duplicate_tab(win):
+    # Right-click → Duplicate tab: clones files into a new tab with an adapted
+    # name + adapted own folder, no file moves. Suppress QSettings writes via
+    # _loading_tabs so the test can't touch the real session.
+    win._loading_tabs = True
+    try:
+        src = win._pws[0]
+        src._label = "AlexisCrystal"
+        src._dest_folder = "/data/alexis/"   # trailing slash, like real folders
+        n_before = len(win._pws)
+        win._on_duplicate_tab(win._playlist_tabs.indexOf(src))
+    finally:
+        win._loading_tabs = False
+    assert len(win._pws) == n_before + 1
+    dup = win._pws[-1]
+    assert dup._label == "AlexisCrystal copy"
+    # sibling, not a child: ".../alexis/" -> ".../alexis_copy" (not ".../alexis/_copy")
+    assert dup._dest_folder == "/data/alexis_copy"
+
+
+def test_tab_mode_defaults_foley(win):
+    # Fresh tabs use the Foley pipeline; sessions/tabs without a stored mode
+    # load unchanged.
+    assert win._pws
+    for pw in win._pws:
+        assert pw._mode == "foley"
+
+
+def test_tab_mode_toggle(win):
+    # Right-click → "LTX-2 mode" flips the per-tab mode and the displayed title
+    # gains a [LTX2] badge (without mutating pw._label). Suppress QSettings
+    # writes via _loading_tabs so the test can't touch the real session.
+    win._loading_tabs = True
+    try:
+        win._on_tab_mode_toggle(win._playlist_tabs.indexOf(win._pws[0]))
+    finally:
+        win._loading_tabs = False
+    assert win._pws[0]._mode == "ltx2"
+    assert win._tab_title(win._pws[0]).endswith("[LTX2]")
+
+
+def test_ltx2_params_none_for_foley(win):
+    # A Foley tab feeds no LTX-2 ffmpeg params into export. Set the mode
+    # explicitly: a prior test's closeEvent can persist an ltx2 tab into the
+    # shared (throwaway) QSettings, so don't rely on the loaded default here.
+    win._playlist._mode = "foley"
+    assert win._ltx2_export_params() is None
+
+
+def test_ltx2_params_for_ltx2_tab(win):
+    # An ltx2-mode active tab: _ltx2_export_params returns the 25fps / ÷32 /
+    # exact-frames kwargs, and _apply_mode_to_controls swaps the length control
+    # (Duration hidden, frames shown). short_side defaults to 512 when unset.
+    win._spn_resize.setValue(0)            # force the 512 LTX-2 default path
+    win._pws[0]._mode = "ltx2"
+    win._active_pw = win._pws[0]
+    win._playlist_tabs.setCurrentWidget(win._pws[0])
+    win._spn_frames.setValue(201)
+    win._apply_mode_to_controls()
+
+    assert win._ltx2_export_params() == {
+        "target_fps": 25.0,
+        "snap32": True,
+        "frames": 201,
+        "duration": 201 / 25,
+        "short_side": 512,
+    }
+    # In offscreen, isVisibleTo(win) may be False for both; assert via the
+    # show/hide flag that the Duration control is hidden in ltx2 mode.
+    assert win._spn_clip_dur.isHidden()
+    assert not win._spn_frames.isHidden()
+
+
+def test_duplicate_preserves_ltx2_mode(win):
+    # Duplicating an LTX-2 tab must yield an LTX-2 tab (mode is copied alongside
+    # the folder fields). Suppress QSettings writes via _loading_tabs.
+    win._loading_tabs = True
+    try:
+        src = win._pws[0]
+        src._mode = "ltx2"
+        win._on_duplicate_tab(win._playlist_tabs.indexOf(src))
+    finally:
+        win._loading_tabs = False
+    dup = win._pws[-1]
+    assert dup._mode == "ltx2"
+
+
+def test_frames_snaps_to_legal(win):
+    # A typed (illegal) frame count snaps to the nearest legal 8k+1 value so the
+    # displayed value == the exported value and is always a valid LTX-2 clip.
+    win._spn_frames.setValue(100)
+    win._snap_frames_to_legal()              # the editingFinished slot
+    assert win._spn_frames.value() == 97     # nearest 8k+1 to 100
+    assert (win._spn_frames.value() - 1) % 8 == 0
+
+
+def test_export_base_name_handles_trailing_slash(win):
+    # A folder ending in "/" must still yield the real base name, else
+    # subprofile naming breaks ("_blowjob" instead of "mp4_blowjob").
+    win._txt_folder.setText("/x/AlexisCrystal/mp4/")
+    assert win._export_base_name() == "mp4"
+    win._txt_folder.setText("/x/AlexisCrystal/mp4")
+    assert win._export_base_name() == "mp4"
+
+
+def test_subprofile_button_visibility_exact_match(win):
+    # A subcategory's export button must track ITS folder exactly. A ghost
+    # "_blowjob" (empty-base leftover) or an unrelated "mp4_no_clap" must NOT
+    # hide the "blowjob"/"clap" buttons (the old fuzzy endswith() match did,
+    # so enabling a subcategory never revealed its export button).
+    win._txt_folder.setText("/x/AlexisCrystal/mp4")
+    win._subprofiles = ["blowjob", "clap"]
+    win._rebuild_subprofile_buttons()
+    btns = {b.text().removeprefix("▸ "): b for b in win._subprofile_btns}
+
+    win._hidden_subcats = {"_blowjob", "mp4_no_clap"}
+    win._apply_subcat_visibility()
+    assert not btns["blowjob"].isHidden()   # ghost "_blowjob" must not hide it
+    assert not btns["clap"].isHidden()      # "mp4_no_clap" must not hide "clap"
+
+    win._hidden_subcats = {"mp4_blowjob"}    # exact folder -> hidden
+    win._apply_subcat_visibility()
+    assert btns["blowjob"].isHidden()
+    assert not btns["clap"].isHidden()
+
+
+def test_extract_audio_controls_exist(win):
+    from PyQt6.QtWidgets import QPushButton, QDoubleSpinBox
+    assert isinstance(win._btn_extract_audio, QPushButton)
+    assert isinstance(win._spn_audio_len, QDoubleSpinBox)
+    # Disabled until a file is loaded.
+    assert not win._btn_extract_audio.isEnabled()
+    # Arrows step by 1s and there's no practical upper cap (long audio areas).
+    assert win._spn_audio_len.singleStep() == 1.0
+    assert win._spn_audio_len.maximum() >= 3600.0
+
+
+def test_audio_region_tracks_cursor_and_length(win):
+    # The teal audio band spans [cursor, cursor + length]; changing the length
+    # or moving the cursor moves the band. Fake a loaded file so the guard in
+    # _update_audio_region passes.
+    win._file_path = "/x/video.mp4"
+    win._cursor = 10.0
+    win._spn_audio_len.setValue(4.0)     # fires _on_audio_len_changed
+    assert win._timeline._audio_region == (10.0, 14.0)
+    win._cursor = 20.0
+    win._update_audio_region()
+    assert win._timeline._audio_region == (20.0, 24.0)
+    # No file -> band cleared.
+    win._file_path = ""
+    win._update_audio_region()
+    assert win._timeline._audio_region is None
@@ -1,25 +1,26 @@
 import tempfile, os, json
 from main import build_export_path, format_time, build_ffmpeg_command, build_sequence_dir, build_audio_extract_command, resolve_keyframe, apply_keyframes_to_jobs
+from core.ffmpeg import build_audio_clip_command
 from core.annotations import build_annotation_json_path, upsert_clip_annotation
 from main import ProcessedDB


 def test_build_export_path_first():
-    assert build_export_path("/out", "clip", 1) == "/out/clip_001/clip_001.mp4"
+    assert build_export_path("/out", "clip", 1) == "/out/clip_001.mp4"

 def test_build_export_path_counter():
-    assert build_export_path("/out", "clip", 42) == "/out/clip_042/clip_042.mp4"
+    assert build_export_path("/out", "clip", 42) == "/out/clip_042.mp4"

 def test_build_export_path_deep_counter():
-    assert build_export_path("/out", "shot", 999) == "/out/shot_999/shot_999.mp4"
+    assert build_export_path("/out", "shot", 999) == "/out/shot_999.mp4"

 def test_build_export_path_sub():
-    assert build_export_path("/out", "clip", 1, sub=0) == "/out/clip_001/clip_001_0.mp4"
-    assert build_export_path("/out", "clip", 1, sub=2) == "/out/clip_001/clip_001_2.mp4"
+    assert build_export_path("/out", "clip", 1, sub=0) == "/out/clip_001_0.mp4"
+    assert build_export_path("/out", "clip", 1, sub=2) == "/out/clip_001_2.mp4"

 def test_build_sequence_dir_sub():
-    assert build_sequence_dir("/out", "clip", 1, sub=0) == "/out/clip_001/clip_001_0"
-    assert build_sequence_dir("/out", "clip", 1, sub=1) == "/out/clip_001/clip_001_1"
+    assert build_sequence_dir("/out", "clip", 1, sub=0) == "/out/clip_001_0"
+    assert build_sequence_dir("/out", "clip", 1, sub=1) == "/out/clip_001_1"

 def test_format_time_seconds():
    assert format_time(0.0) == "0:00.0"
@@ -54,6 +55,27 @@ def test_ffmpeg_command_with_resize():
    assert cmd[-1] == "/out/clip_001.mp4"


+def test_audio_clip_command_exact_length():
+    cmd = build_audio_clip_command("/in/video.mp4", 12.5, 3.2, "/out/clip.wav")
+    assert cmd[0] == "ffmpeg"
+    # fast seek before input, exact duration, no video
+    assert cmd[cmd.index("-ss") + 1] == "12.5"
+    assert cmd[cmd.index("-t") + 1] == "3.2"
+    assert cmd.index("-ss") < cmd.index("-i")
+    assert "-vn" in cmd
+    assert cmd[-1] == "/out/clip.wav"
+
+def test_audio_clip_command_codec_by_extension():
+    assert "pcm_s16le" in build_audio_clip_command("/in.mp4", 0, 1, "/o/a.wav")
+    assert "libmp3lame" in build_audio_clip_command("/in.mp4", 0, 1, "/o/a.mp3")
+    assert "flac" in build_audio_clip_command("/in.mp4", 0, 1, "/o/a.flac")
+    # Unknown extension -> no explicit -c:a, let ffmpeg pick from the container.
+    assert "-c:a" not in build_audio_clip_command("/in.mp4", 0, 1, "/o/a.xyz")
+
+def test_audio_clip_command_extension_case_insensitive():
+    assert "flac" in build_audio_clip_command("/in.mp4", 0, 1, "/o/A.FLAC")
+
+
 # --- ProcessedDB ---

 def test_db_add_and_get_markers():
@@ -178,10 +200,10 @@ def test_audio_extract_timing():


 def test_build_sequence_dir_basic():
-    assert build_sequence_dir("/out", "clip", 1) == "/out/clip_001/clip_001"
+    assert build_sequence_dir("/out", "clip", 1) == "/out/clip_001"

 def test_build_sequence_dir_counter():
-    assert build_sequence_dir("/out", "clip", 42) == "/out/clip_042/clip_042"
+    assert build_sequence_dir("/out", "clip", 42) == "/out/clip_042"

 def test_ffmpeg_command_image_sequence():
    cmd = build_ffmpeg_command("/in/v.mp4", 0.0, "/out/seq_001", image_sequence=True)
@@ -265,13 +287,13 @@ def test_db_get_group_returns_all_sub_clips():
        path = f.name
    try:
        db = ProcessedDB(path)
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_0.mp4")
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_1.mp4")
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_2.mp4")
-        group = db.get_group("/out/clip_001/clip_001_0.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_0.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_1.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_2.mp4")
+        group = db.get_group("/out/vid_001/clip_001_0.mp4")
        assert len(group) == 3
-        assert "/out/clip_001/clip_001_0.mp4" in group
-        assert "/out/clip_001/clip_001_2.mp4" in group
+        assert "/out/vid_001/clip_001_0.mp4" in group
+        assert "/out/vid_001/clip_001_2.mp4" in group
    finally:
        os.unlink(path)

@@ -281,10 +303,10 @@ def test_db_get_group_isolates_by_start_time():
        path = f.name
    try:
        db = ProcessedDB(path)
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_0.mp4")
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_1.mp4")
-        db.add("video.mp4", 30.0, "/out/clip_002/clip_002_0.mp4")
-        group = db.get_group("/out/clip_001/clip_001_0.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_0.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_1.mp4")
+        db.add("video.mp4", 30.0, "/out/vid_001/clip_002_0.mp4")
+        group = db.get_group("/out/vid_001/clip_001_0.mp4")
        assert len(group) == 2
    finally:
        os.unlink(path)
@@ -295,10 +317,10 @@ def test_db_delete_group_removes_all():
        path = f.name
    try:
        db = ProcessedDB(path)
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_0.mp4")
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_1.mp4")
-        db.add("video.mp4", 30.0, "/out/clip_002/clip_002_0.mp4")
-        deleted = db.delete_group("/out/clip_001/clip_001_0.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_0.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_1.mp4")
+        db.add("video.mp4", 30.0, "/out/vid_001/clip_002_0.mp4")
+        deleted = db.delete_group("/out/vid_001/clip_001_0.mp4")
        assert len(deleted) == 2
        # clip_002 should still exist
        markers = db.get_markers("video.mp4")
@@ -439,3 +461,57 @@ def test_apply_keyframes_before_first_uses_base():
    result = apply_keyframes_to_jobs(jobs, kfs, base_center=0.5, base_ratio="4:5",
                                     base_rand_p=True, base_rand_s=False)
    assert result == [(1.0, "/out/a", "4:5", 0.5, True, False)]
+
+
+# --- LTX-2 legal-frame math (core/ltx2.py) ---
+
+from core.ltx2 import is_legal_frames, nearest_legal_frames, frames_for_duration, duration_for_frames, legal_frames
+
+def test_ltx2_is_legal():
+    assert is_legal_frames(201) and is_legal_frames(9) and is_legal_frames(25)
+    assert not is_legal_frames(200) and not is_legal_frames(8)
+
+def test_ltx2_nearest():
+    assert nearest_legal_frames(200) == 201   # 200 -> nearest 8k+1
+    assert nearest_legal_frames(196) == 193
+    assert nearest_legal_frames(5) == 9        # floor at 9
+
+def test_ltx2_duration_roundtrip():
+    assert duration_for_frames(201, 25) == 201 / 25
+    assert frames_for_duration(8.0, 25) == 201   # 200 -> 201
+
+def test_ltx2_legal_series():
+    s = legal_frames(min_f=9, max_f=33)
+    assert s == [9, 17, 25, 33]
+
+
+# --- LTX-2 ffmpeg params (target_fps, snap32, frames) ---
+
+def test_ffmpeg_ltx2_fps_and_frames():
+    cmd = build_ffmpeg_command("/in/v.mp4", 0.0, "/out/c.mp4",
+                               short_side=512, target_fps=25, frames=201)
+    assert "-r" in cmd and cmd[cmd.index("-r")+1] == "25"
+    assert "-frames:v" in cmd and cmd[cmd.index("-frames:v")+1] == "201"
+    vf = cmd[cmd.index("-vf")+1]
+    assert "fps=25" in vf
+
+def test_ffmpeg_ltx2_snap32_crop():
+    cmd = build_ffmpeg_command("/in/v.mp4", 0.0, "/out/c.mp4",
+                               short_side=512, snap32=True)
+    vf = cmd[cmd.index("-vf")+1]
+    assert "crop=trunc(iw/32)*32:trunc(ih/32)*32" in vf
+
+def test_ffmpeg_foley_unchanged():
+    cmd = build_ffmpeg_command("/in/v.mp4", 0.0, "/out/c.mp4", short_side=256)
+    assert "-r" not in cmd and "-frames:v" not in cmd
+    assert "crop=trunc" not in cmd[cmd.index("-vf")+1]
+
+
+# --- LTX-2 audio extract frame-exact duration ---
+
+def test_audio_extract_ltx2_duration():
+    frames, fps = 201, 25
+    cmd = build_audio_extract_command("/in/v.mp4", 0.0, "/out/clip_001",
+                                      duration=frames / fps)
+    assert "-t" in cmd
+    assert cmd[cmd.index("-t") + 1] == str(frames / fps)