feat: integrate training UI, BEATs model, and clean up legacy code

- Remove legacy distance-mode scanning (build_profile, _similarity, etc.) and hand-crafted intensity features — pipeline is now embedding-only - Integrate Microsoft BEATs as embedding option alongside wav2vec2/HuBERT - Add TrainDialog with positive class selector, model picker, video dir fallback, and live training stats - Add TrainWorker QThread with cancel support and proper lifecycle cleanup - Add source_path column to DB for robust source video tracking - Add get_export_folders/get_training_data/get_training_stats to DB - Wire source_path in all export DB writes (_on_clip_done, _on_auto_clip_done) - Cancel scan/train workers in closeEvent to prevent use-after-free crashes - Add setup_env.sh supporting both conda and python venv (CUDA 12.8) - Update requirements.txt with all actual dependencies - Update 8cut_train.py with --positive flag for new DB-driven training Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
feat: rewrite audio scan with MFCC+delta+spectral contrast pipeline
2026-04-18 11:52:27 +02:00 · 2026-04-17 15:28:44 +02:00 · 2026-04-17 09:27:11 +02:00 · 2026-04-17 09:21:14 +02:00 · 2026-04-17 09:12:24 +02:00 · 2026-04-17 09:02:35 +02:00
89 changed files with 16575 additions and 616 deletions
@@ -0,0 +1,36 @@
+name: Docker Image
+
+on:
+  workflow_dispatch:  # manual only — build locally and push to ghcr.io
+
+permissions:
+  contents: read
+  packages: write
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - uses: docker/metadata-action@v5
+        id: meta
+        with:
+          images: ghcr.io/${{ github.repository }}-server
+          tags: |
+            type=ref,event=branch
+            type=semver,pattern={{version}}
+            type=sha,prefix=
+
+      - uses: docker/build-push-action@v6
+        with:
+          context: .
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
@@ -3,3 +3,5 @@ __pycache__/
 *.pyo
 .pytest_cache/
 .worktrees/
+client/node_modules/
+client/src-tauri/target/
@@ -0,0 +1,255 @@
+#!/usr/bin/env python3
+"""Calibration — per-video normalized features + classifier."""
+import sys, os, time, warnings
+sys.path.insert(0, os.path.dirname(__file__))
+warnings.filterwarnings("ignore")
+
+import numpy as np
+import librosa
+from sklearn.ensemble import GradientBoostingClassifier
+
+from core.audio_scan import _SR, _WINDOW
+
+_HOP_LENGTH = 1024
+_N_FFT = 2048
+from core.db import ProcessedDB
+
+PLEX_DIR = "/media/unraid/appdata/plex/download/porn_jav/"
+PROFILE_NAME = "JAV_missionary"
+TOLERANCE = 12.0
+NEG_MARGIN = 120.0
+
+
+def extract_rich_features(y, sr=_SR):
+    """Per-frame features: onset, energy, spectral shape, mel bands (22 features)."""
+    hop = _HOP_LENGTH
+    S = np.abs(librosa.stft(y, n_fft=_N_FFT, hop_length=hop)) ** 2
+    rms = librosa.feature.rms(S=S, hop_length=hop)
+    cent = librosa.feature.spectral_centroid(S=S, sr=sr)
+    bw = librosa.feature.spectral_bandwidth(S=S, sr=sr)
+    rolloff = librosa.feature.spectral_rolloff(S=S, sr=sr)
+    flatness = librosa.feature.spectral_flatness(S=S)
+    zcr = librosa.feature.zero_crossing_rate(y, hop_length=hop)
+    onset = librosa.onset.onset_strength(S=librosa.power_to_db(S), sr=sr, hop_length=hop).reshape(1, -1)
+
+    mel_S = librosa.feature.melspectrogram(S=S, sr=sr, hop_length=hop, n_mels=128)
+    mel_freqs = librosa.mel_frequencies(n_mels=128, fmin=0, fmax=sr/2)
+    bands = [(0, 100), (100, 300), (300, 600), (600, 1200),
+             (1200, 2000), (2000, 3500), (3500, 5500), (5500, 8000)]
+    band_feats = []
+    for flo, fhi in bands:
+        mask = (mel_freqs >= flo) & (mel_freqs < fhi)
+        if mask.sum() > 0:
+            band_feats.append(librosa.power_to_db(mel_S[mask].mean(axis=0, keepdims=True) + 1e-10))
+        else:
+            band_feats.append(np.zeros((1, mel_S.shape[1])))
+
+    sc = librosa.feature.spectral_contrast(S=S, sr=sr, hop_length=hop)
+
+    min_t = min(rms.shape[1], cent.shape[1], onset.shape[1], sc.shape[1],
+                band_feats[0].shape[1])
+    return np.vstack([
+        rms[:, :min_t], cent[:, :min_t], bw[:, :min_t], rolloff[:, :min_t],
+        flatness[:, :min_t], zcr[:, :min_t], onset[:, :min_t],
+    ] + [b[:, :min_t] for b in band_feats]
+    + [sc[:, :min_t]])
+
+
+def compute_window_stats(feat, hop=1.0):
+    """Sliding window mean/std → (timestamps, feature_vectors)."""
+    n_feats, T = feat.shape
+    fps = _SR / _HOP_LENGTH
+    win_frames = int(_WINDOW * fps)
+    hop_frames = int(hop * fps)
+    if win_frames > T:
+        return np.array([]), np.array([])
+
+    cumsum = np.zeros((n_feats, T + 1))
+    cumsum[:, 1:] = np.cumsum(feat, axis=1)
+    cumsq = np.zeros((n_feats, T + 1))
+    cumsq[:, 1:] = np.cumsum(feat ** 2, axis=1)
+
+    starts = np.arange(0, T - win_frames + 1, hop_frames)
+    ends = starts + win_frames
+    sums = cumsum[:, ends] - cumsum[:, starts]
+    sq_sums = cumsq[:, ends] - cumsq[:, starts]
+    means = sums / win_frames
+    stds = np.sqrt(np.maximum(sq_sums / win_frames - means ** 2, 0) + 1e-10)
+
+    return starts / fps, np.vstack([means, stds]).T
+
+
+def label_windows(timestamps, gt_intense, gt_soft):
+    all_gt = list(gt_intense) + list(gt_soft)
+    labels = np.zeros(len(timestamps), dtype=int)
+    for i, t in enumerate(timestamps):
+        di = min((abs(t - g) for g in gt_intense), default=9999)
+        da = min((abs(t - g) for g in all_gt), default=9999)
+        if di < TOLERANCE:
+            labels[i] = 1
+        elif da > NEG_MARGIN:
+            labels[i] = -1
+    return labels
+
+
+def main():
+    db = ProcessedDB()
+    rows = db._con.execute(
+        "SELECT filename, start_time, output_path FROM processed WHERE profile = ?",
+        (PROFILE_NAME,),
+    ).fetchall()
+
+    intense_by_video, soft_by_video = {}, {}
+    for fn, st, op in rows:
+        if '/mp4_Intense/' in op:
+            intense_by_video.setdefault(fn, set()).add(st)
+        elif '/mp4_Soft/' in op:
+            soft_by_video.setdefault(fn, set()).add(st)
+
+    videos = [fn for fn in intense_by_video
+              if os.path.exists(os.path.join(PLEX_DIR, fn))]
+    n_vids = int(sys.argv[1]) if len(sys.argv) > 1 else len(videos)
+    videos = videos[:n_vids]
+    print(f"Processing {len(videos)} videos...")
+
+    all_data_raw = []    # raw features
+    all_data_norm = []   # per-video z-scored features
+
+    for vi, vname in enumerate(videos):
+        vpath = os.path.join(PLEX_DIR, vname)
+        gt_intense = sorted(intense_by_video.get(vname, set()))
+        gt_soft = sorted(soft_by_video.get(vname, set()))
+
+        t0 = time.time()
+        y, _ = librosa.load(vpath, sr=_SR, mono=True)
+        feat = extract_rich_features(y)
+        timestamps, window_vectors = compute_window_stats(feat, hop=1.0)
+        dt = time.time() - t0
+
+        if len(timestamps) == 0:
+            continue
+
+        labels = label_windows(timestamps, gt_intense, gt_soft)
+
+        # Per-video z-score normalization
+        vid_mean = window_vectors.mean(axis=0)
+        vid_std = window_vectors.std(axis=0)
+        vid_std = np.maximum(vid_std, 1e-6)
+        normed = (window_vectors - vid_mean) / vid_std
+
+        n_pos = (labels == 1).sum()
+        n_neg = (labels == -1).sum()
+        print(f"  [{vi+1}/{len(videos)}] {vname[:55]}  pos={n_pos} neg={n_neg} ({dt:.1f}s)")
+
+        all_data_raw.append((vi, vname, timestamps, window_vectors, labels))
+        all_data_norm.append((vi, vname, timestamps, normed, labels))
+
+    # Run CV for both raw and normalized
+    for label, data in [("RAW features", all_data_raw),
+                        ("PER-VIDEO NORMALIZED features", all_data_norm)]:
+        print(f"\n{'='*70}")
+        print(f"  {label}")
+        print(f"{'='*70}")
+
+        all_y_true, all_y_prob = [], []
+
+        for test_idx in range(len(data)):
+            _, vname, _, test_X, test_labels = data[test_idx]
+            test_mask = test_labels != 0
+            if test_mask.sum() == 0 or (test_labels[test_mask] == 1).sum() == 0:
+                continue
+            X_test = test_X[test_mask]
+            y_test = (test_labels[test_mask] == 1).astype(int)
+
+            X_parts, y_parts = [], []
+            for i, (_, _, _, feats, labs) in enumerate(data):
+                if i == test_idx:
+                    continue
+                m = labs != 0
+                if m.sum() == 0:
+                    continue
+                X_parts.append(feats[m])
+                y_parts.append((labs[m] == 1).astype(int))
+
+            if not X_parts:
+                continue
+            X_train = np.vstack(X_parts)
+            y_train = np.concatenate(y_parts)
+
+            pos_idx = np.where(y_train == 1)[0]
+            neg_idx = np.where(y_train == 0)[0]
+            if len(pos_idx) == 0 or len(neg_idx) == 0:
+                continue
+            rng = np.random.RandomState(42)
+            n_neg = min(len(neg_idx), len(pos_idx) * 3)
+            neg_sample = rng.choice(neg_idx, n_neg, replace=False)
+            train_idx = np.concatenate([pos_idx, neg_sample])
+
+            clf = GradientBoostingClassifier(
+                n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42
+            )
+            clf.fit(X_train[train_idx], y_train[train_idx])
+            probs = clf.predict_proba(X_test)[:, 1]
+
+            tp = ((probs >= 0.5) & (y_test == 1)).sum()
+            fp = ((probs >= 0.5) & (y_test == 0)).sum()
+            fn_count = ((probs < 0.5) & (y_test == 1)).sum()
+            pos_s = probs[y_test == 1].mean() if (y_test == 1).sum() > 0 else 0
+            neg_s = probs[y_test == 0].mean() if (y_test == 0).sum() > 0 else 0
+            print(f"  {vname[:50]:50s}  TP={tp:3d} FP={fp:4d} FN={fn_count:3d}  pos_p={pos_s:.3f} neg_p={neg_s:.3f}")
+
+            all_y_true.extend(y_test)
+            all_y_prob.extend(probs)
+
+        if not all_y_true:
+            print("  No test results.")
+            continue
+
+        y_true = np.array(all_y_true)
+        y_prob = np.array(all_y_prob)
+        pos_probs = y_prob[y_true == 1]
+        neg_probs = y_prob[y_true == 0]
+
+        if len(pos_probs) > 0 and len(neg_probs) > 0:
+            print(f"\n  POS: 25%={np.percentile(pos_probs,25):.3f} 50%={np.percentile(pos_probs,50):.3f}"
+                  f" 75%={np.percentile(pos_probs,75):.3f} max={pos_probs.max():.3f}")
+            print(f"  NEG: 25%={np.percentile(neg_probs,25):.3f} 50%={np.percentile(neg_probs,50):.3f}"
+                  f" 75%={np.percentile(neg_probs,75):.3f} max={neg_probs.max():.3f}")
+
+        best_f1, best_thr = 0, 0
+        print(f"\n  {'thr':>5}  {'prec':>6}  {'recall':>6}  {'TP':>5}  {'FP':>5}  {'FN':>4}  {'F1':>6}")
+        for thr in np.arange(0.10, 0.91, 0.05):
+            tp = ((y_prob >= thr) & (y_true == 1)).sum()
+            fp = ((y_prob >= thr) & (y_true == 0)).sum()
+            fn_count = ((y_prob < thr) & (y_true == 1)).sum()
+            prec = tp / (tp + fp) if (tp + fp) > 0 else 0
+            rec = tp / (tp + fn_count) if (tp + fn_count) > 0 else 0
+            f1 = 2 * prec * rec / (prec + rec) if (prec + rec) > 0 else 0
+            if f1 > best_f1:
+                best_f1, best_thr = f1, thr
+            print(f"  {thr:.2f}   {prec:.4f}  {rec:.4f}  {tp:5d}  {fp:5d}  {fn_count:4d}  {f1:.4f}")
+        print(f"\n  Best F1={best_f1:.4f} at thr={best_thr:.2f}")
+
+        # Feature importance
+        X_all = np.vstack([f[l != 0] for _, _, _, f, l in data])
+        y_all = np.concatenate([(l[l != 0] == 1).astype(int) for _, _, _, _, l in data])
+        pos_idx = np.where(y_all == 1)[0]
+        neg_idx = np.where(y_all == 0)[0]
+        rng = np.random.RandomState(42)
+        neg_sub = rng.choice(neg_idx, min(len(neg_idx), len(pos_idx)*3), replace=False)
+        clf = GradientBoostingClassifier(n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42)
+        clf.fit(X_all[np.concatenate([pos_idx, neg_sub])], y_all[np.concatenate([pos_idx, neg_sub])])
+
+        feat_names = (
+            ["rms", "centroid", "bw", "rolloff", "flat", "zcr", "onset"]
+            + [f"mel{i}" for i in range(8)]
+            + [f"sc{i}" for i in range(7)]
+        )
+        stat_names = [f"{f}_m" for f in feat_names] + [f"{f}_s" for f in feat_names]
+        imp = clf.feature_importances_
+        top = sorted(zip(stat_names, imp), key=lambda x: -x[1])[:10]
+        print(f"  Top features: {', '.join(f'{n}={v:.3f}' for n, v in top)}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+"""Train an audio scan classifier from DB ground truth.
+
+Usage:
+    python 8cut_train.py                                    # default model, auto-detect positive
+    python 8cut_train.py --model BEATS                      # specific embedding model
+    python 8cut_train.py --positive mp4_Intense                 # explicit positive folder
+    python 8cut_train.py --positive mp4_Intense --model BEATS   # both
+"""
+import sys, os, warnings
+sys.path.insert(0, os.path.dirname(__file__))
+warnings.filterwarnings("ignore")
+
+from core.audio_scan import train_classifier, default_model_path, _EMBED_MODELS
+from core.db import ProcessedDB
+
+PROFILE_NAME = "JAV_missionary"
+
+# Fallback for old DB rows without source_path
+PLEX_DIR = "/media/unraid/appdata/plex/download/porn_jav/"
+
+
+def main():
+    embed_model = None
+    if "--model" in sys.argv:
+        idx = sys.argv.index("--model")
+        if idx + 1 < len(sys.argv):
+            embed_model = sys.argv[idx + 1]
+            if embed_model not in _EMBED_MODELS:
+                print(f"Unknown model: {embed_model}")
+                print(f"Available: {', '.join(_EMBED_MODELS)}")
+                sys.exit(1)
+
+    positive_suffix = None
+    if "--positive" in sys.argv:
+        idx = sys.argv.index("--positive")
+        if idx + 1 < len(sys.argv):
+            positive_suffix = sys.argv[idx + 1]
+
+    db = ProcessedDB()
+
+    # If --positive given, use the new DB helper
+    if positive_suffix:
+        video_infos = db.get_training_data(
+            PROFILE_NAME, positive_suffix, fallback_video_dir=PLEX_DIR,
+        )
+        if not video_infos:
+            print(f"No training data found for positive='{positive_suffix}'")
+            sys.exit(1)
+    else:
+        # Legacy fallback: classify by folder path pattern
+        rows = db._con.execute(
+            "SELECT filename, start_time, output_path, source_path"
+            " FROM processed WHERE profile = ?",
+            (PROFILE_NAME,),
+        ).fetchall()
+
+        intense_by_video, soft_by_video = {}, {}
+        source_by_fn = {}
+        for fn, st, op, sp in rows:
+            if sp:
+                source_by_fn[fn] = sp
+            if "/mp4_Intense/" in op or "_Intense/" in op:
+                intense_by_video.setdefault(fn, set()).add(st)
+            elif "/mp4_Soft/" in op or "_Soft/" in op:
+                soft_by_video.setdefault(fn, set()).add(st)
+
+        video_infos = []
+        for fn in intense_by_video:
+            # Try source_path from DB first, fall back to PLEX_DIR
+            vpath = source_by_fn.get(fn) or os.path.join(PLEX_DIR, fn)
+            if not os.path.exists(vpath):
+                print(f"  skip (not found): {fn}")
+                continue
+            gt_intense = sorted(intense_by_video[fn])
+            gt_soft = sorted(soft_by_video.get(fn, set()))
+            video_infos.append((vpath, gt_intense, gt_soft))
+
+    label = embed_model or "WAV2VEC2_BASE"
+    print(f"Training {label} model on {len(video_infos)} videos...")
+    model_path = default_model_path(PROFILE_NAME)
+    result = train_classifier(
+        video_infos, model_path=model_path, embed_model=embed_model,
+    )
+    if result is None:
+        print("Training failed: no valid samples or missing class balance")
+        sys.exit(1)
+    print(f"Model saved to {model_path}")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,13 @@
+FROM nvidia/cuda:12.6.3-runtime-ubuntu24.04
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        python3 python3-pip ffmpeg \
+    && rm -rf /var/lib/apt/lists/*
+
+WORKDIR /app
+COPY core/ core/
+COPY server/ server/
+RUN pip install --no-cache-dir --break-system-packages fastapi uvicorn[standard]
+
+EXPOSE 8000
+CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
@@ -0,0 +1,10 @@
+.DS_Store
+node_modules
+/build
+/.svelte-kit
+/package
+.env
+.env.*
+!.env.example
+vite.config.js.timestamp-*
+vite.config.ts.timestamp-*
@@ -0,0 +1,7 @@
+{
+  "recommendations": [
+    "svelte.svelte-vscode",
+    "tauri-apps.tauri-vscode",
+    "rust-lang.rust-analyzer"
+  ]
+}
@@ -0,0 +1,3 @@
+{
+  "svelte.enable-ts-plugin": true
+}
@@ -0,0 +1,7 @@
+# Tauri + SvelteKit + TypeScript
+
+This template should help get you started developing with Tauri, SvelteKit and TypeScript in Vite.
+
+## Recommended IDE Setup
+
+[VS Code](https://code.visualstudio.com/) + [Svelte](https://marketplace.visualstudio.com/items?itemName=svelte.svelte-vscode) + [Tauri](https://marketplace.visualstudio.com/items?itemName=tauri-apps.tauri-vscode) + [rust-analyzer](https://marketplace.visualstudio.com/items?itemName=rust-lang.rust-analyzer).
@@ -0,0 +1,29 @@
+{
+  "name": "client",
+  "version": "0.1.0",
+  "description": "",
+  "type": "module",
+  "scripts": {
+    "dev": "vite dev",
+    "build": "vite build",
+    "preview": "vite preview",
+    "check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
+    "check:watch": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json --watch",
+    "tauri": "tauri"
+  },
+  "license": "MIT",
+  "dependencies": {
+    "@tauri-apps/api": "^2",
+    "@tauri-apps/plugin-opener": "^2"
+  },
+  "devDependencies": {
+    "@sveltejs/adapter-static": "^3.0.6",
+    "@sveltejs/kit": "^2.9.0",
+    "@sveltejs/vite-plugin-svelte": "^5.0.0",
+    "svelte": "^5.0.0",
+    "svelte-check": "^4.0.0",
+    "typescript": "~5.6.2",
+    "vite": "^6.0.3",
+    "@tauri-apps/cli": "^2"
+  }
+}
@@ -0,0 +1,7 @@
+# Generated by Cargo
+# will have compiled files and executables
+/target/
+
+# Generated by Tauri
+# will have schema files for capabilities auto-completion
+/gen/schemas
@@ -0,0 +1,25 @@
+[package]
+name = "client"
+version = "0.1.0"
+description = "A Tauri App"
+authors = ["you"]
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[lib]
+# The `_lib` suffix may seem redundant but it is necessary
+# to make the lib name unique and wouldn't conflict with the bin name.
+# This seems to be only an issue on Windows, see https://github.com/rust-lang/cargo/issues/8519
+name = "client_lib"
+crate-type = ["staticlib", "cdylib", "rlib"]
+
+[build-dependencies]
+tauri-build = { version = "2", features = [] }
+
+[dependencies]
+tauri = { version = "2", features = [] }
+tauri-plugin-opener = "2"
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+
@@ -0,0 +1,3 @@
+fn main() {
+    tauri_build::build()
+}
@@ -0,0 +1,10 @@
+{
+  "$schema": "../gen/schemas/desktop-schema.json",
+  "identifier": "default",
+  "description": "Capability for the main window",
+  "windows": ["main"],
+  "permissions": [
+    "core:default",
+    "opener:default"
+  ]
+}
@@ -0,0 +1,56 @@
+use tauri::State;
+use std::sync::Mutex;
+use crate::mpv::Mpv;
+
+pub struct MpvState(pub Mutex<Mpv>);
+
+#[tauri::command]
+pub fn mpv_start(state: State<MpvState>) -> Result<(), String> {
+    state.0.lock().unwrap().start()
+}
+
+#[tauri::command]
+pub fn mpv_stop(state: State<MpvState>) -> Result<(), String> {
+    state.0.lock().unwrap().stop();
+    Ok(())
+}
+
+#[tauri::command]
+pub fn mpv_load(state: State<MpvState>, video_url: String, audio_url: String) -> Result<(), String> {
+    state.0.lock().unwrap().load_file(&video_url, &audio_url)
+}
+
+#[tauri::command]
+pub fn mpv_seek(state: State<MpvState>, time: f64) -> Result<(), String> {
+    state.0.lock().unwrap().seek(time)
+}
+
+#[tauri::command]
+pub fn mpv_pause(state: State<MpvState>) -> Result<(), String> {
+    state.0.lock().unwrap().pause()
+}
+
+#[tauri::command]
+pub fn mpv_resume(state: State<MpvState>) -> Result<(), String> {
+    state.0.lock().unwrap().resume()
+}
+
+#[tauri::command]
+pub fn mpv_set_loop(state: State<MpvState>, a: f64, b: f64) -> Result<(), String> {
+    state.0.lock().unwrap().set_loop(a, b)
+}
+
+#[tauri::command]
+pub fn mpv_clear_loop(state: State<MpvState>) -> Result<(), String> {
+    state.0.lock().unwrap().clear_loop()
+}
+
+#[tauri::command]
+pub fn mpv_time_pos(state: State<MpvState>) -> Result<f64, String> {
+    state.0.lock().unwrap().time_pos()
+}
+
+#[tauri::command]
+pub fn mpv_duration(state: State<MpvState>) -> Result<f64, String> {
+    state.0.lock().unwrap().get_duration()
+}
@@ -0,0 +1,27 @@
+mod mpv;
+mod commands;
+
+use commands::MpvState;
+use mpv::Mpv;
+use std::sync::Mutex;
+
+#[cfg_attr(mobile, tauri::mobile_entry_point)]
+pub fn run() {
+    tauri::Builder::default()
+        .plugin(tauri_plugin_opener::init())
+        .manage(MpvState(Mutex::new(Mpv::new())))
+        .invoke_handler(tauri::generate_handler![
+            commands::mpv_start,
+            commands::mpv_stop,
+            commands::mpv_load,
+            commands::mpv_seek,
+            commands::mpv_pause,
+            commands::mpv_resume,
+            commands::mpv_set_loop,
+            commands::mpv_clear_loop,
+            commands::mpv_time_pos,
+            commands::mpv_duration,
+        ])
+        .run(tauri::generate_context!())
+        .expect("error while running tauri application");
+}
@@ -0,0 +1,6 @@
+// Prevents additional console window on Windows in release, DO NOT REMOVE!!
+#![cfg_attr(not(debug_assertions), windows_subsystem = "windows")]
+
+fn main() {
+    client_lib::run()
+}
@@ -0,0 +1,167 @@
+use std::io::{BufRead, BufReader, Write};
+use std::os::unix::net::UnixStream;
+use std::process::{Child, Command};
+use std::sync::atomic::{AtomicU64, Ordering};
+use serde_json::{json, Value};
+
+pub struct Mpv {
+    process: Option<Child>,
+    writer: Option<UnixStream>,
+    reader: Option<BufReader<UnixStream>>,
+    socket_path: String,
+    next_id: AtomicU64,
+}
+
+impl Mpv {
+    pub fn new() -> Self {
+        let socket_path = format!("/tmp/8cut-mpv-{}", std::process::id());
+        Mpv {
+            process: None,
+            writer: None,
+            reader: None,
+            socket_path,
+            next_id: AtomicU64::new(1),
+        }
+    }
+
+    pub fn start(&mut self) -> Result<(), String> {
+        self.stop();
+
+        let child = Command::new("mpv")
+            .args([
+                "--idle=yes",
+                "--force-window=no",
+                "--vo=null",
+                "--keep-open=yes",
+                &format!("--input-ipc-server={}", self.socket_path),
+            ])
+            .spawn()
+            .map_err(|e| format!("Failed to start mpv: {e}"))?;
+
+        self.process = Some(child);
+
+        // Wait for socket
+        for _ in 0..50 {
+            std::thread::sleep(std::time::Duration::from_millis(100));
+            if let Ok(stream) = UnixStream::connect(&self.socket_path) {
+                stream.set_nonblocking(false).ok();
+                let reader_stream = stream.try_clone().map_err(|e| e.to_string())?;
+                self.writer = Some(stream);
+                self.reader = Some(BufReader::new(reader_stream));
+                return Ok(());
+            }
+        }
+        Err("Timeout waiting for mpv IPC socket".into())
+    }
+
+    pub fn stop(&mut self) {
+        if let Some(ref mut child) = self.process {
+            child.kill().ok();
+            child.wait().ok();
+        }
+        self.process = None;
+        self.writer = None;
+        self.reader = None;
+        std::fs::remove_file(&self.socket_path).ok();
+    }
+
+    /// Send a command and wait for the matching response (by request_id).
+    /// Skips over asynchronous mpv events while waiting.
+    fn send_and_recv(&mut self, cmd: Value) -> Result<Value, String> {
+        let id = self.next_id.fetch_add(1, Ordering::Relaxed);
+        let writer = self.writer.as_mut().ok_or("mpv not running")?;
+        let reader = self.reader.as_mut().ok_or("mpv not running")?;
+
+        let mut msg_val = cmd;
+        msg_val["request_id"] = json!(id);
+        let mut msg = serde_json::to_string(&msg_val).unwrap();
+        msg.push('\n');
+        writer.write_all(msg.as_bytes()).map_err(|e| e.to_string())?;
+
+        // Read lines until we find the response matching our request_id
+        let mut line = String::new();
+        loop {
+            line.clear();
+            reader.read_line(&mut line).map_err(|e| e.to_string())?;
+            let parsed: Value = serde_json::from_str(&line).map_err(|e| e.to_string())?;
+            // mpv events have "event" key, responses have "request_id"
+            if parsed.get("request_id").and_then(|v| v.as_u64()) == Some(id) {
+                return Ok(parsed);
+            }
+            // Otherwise it's an async event — skip it
+        }
+    }
+
+    pub fn command(&mut self, args: &[&str]) -> Result<(), String> {
+        let resp = self.send_and_recv(json!({ "command": args }))?;
+        if resp.get("error").and_then(|e| e.as_str()) != Some("success") {
+            return Err(format!("mpv error: {}", resp.get("error").unwrap_or(&Value::Null)));
+        }
+        Ok(())
+    }
+
+    pub fn set_property(&mut self, name: &str, value: Value) -> Result<(), String> {
+        let resp = self.send_and_recv(json!({ "command": ["set_property", name, value] }))?;
+        if resp.get("error").and_then(|e| e.as_str()) != Some("success") {
+            return Err(format!("mpv error: {}", resp.get("error").unwrap_or(&Value::Null)));
+        }
+        Ok(())
+    }
+
+    pub fn get_property(&mut self, name: &str) -> Result<Value, String> {
+        let resp = self.send_and_recv(json!({ "command": ["get_property", name] }))?;
+        if resp.get("error").and_then(|e| e.as_str()) != Some("success") {
+            return Err(format!("mpv error: {}", resp.get("error").unwrap_or(&Value::Null)));
+        }
+        Ok(resp.get("data").cloned().unwrap_or(Value::Null))
+    }
+
+    pub fn load_file(&mut self, video_url: &str, audio_url: &str) -> Result<(), String> {
+        let options = format!("audio-file={}", audio_url);
+        let resp = self.send_and_recv(json!({
+            "command": ["loadfile", video_url, "replace", -1, options]
+        }))?;
+        if resp.get("error").and_then(|e| e.as_str()) != Some("success") {
+            return Err(format!("mpv error: {}", resp.get("error").unwrap_or(&Value::Null)));
+        }
+        Ok(())
+    }
+
+    pub fn seek(&mut self, time: f64) -> Result<(), String> {
+        self.command(&["seek", &time.to_string(), "absolute"])
+    }
+
+    pub fn pause(&mut self) -> Result<(), String> {
+        self.set_property("pause", json!(true))
+    }
+
+    pub fn resume(&mut self) -> Result<(), String> {
+        self.set_property("pause", json!(false))
+    }
+
+    pub fn set_loop(&mut self, a: f64, b: f64) -> Result<(), String> {
+        self.set_property("ab-loop-a", json!(a))?;
+        self.set_property("ab-loop-b", json!(b))
+    }
+
+    pub fn clear_loop(&mut self) -> Result<(), String> {
+        self.set_property("ab-loop-a", json!("no"))?;
+        self.set_property("ab-loop-b", json!("no"))
+    }
+
+    pub fn time_pos(&mut self) -> Result<f64, String> {
+        let val = self.get_property("time-pos")?;
+        val.as_f64().ok_or("time-pos not a number".into())
+    }
+
+    pub fn get_duration(&mut self) -> Result<f64, String> {
+        let val = self.get_property("duration")?;
+        val.as_f64().ok_or("duration not a number".into())
+    }
+}
+
+impl Drop for Mpv {
+    fn drop(&mut self) {
+        self.stop();
+    }
+}
@@ -0,0 +1,35 @@
+{
+  "$schema": "https://schema.tauri.app/config/2",
+  "productName": "8cut",
+  "version": "0.1.0",
+  "identifier": "com.ethanfel.8cut",
+  "build": {
+    "beforeDevCommand": "pnpm dev",
+    "devUrl": "http://localhost:1420",
+    "beforeBuildCommand": "pnpm build",
+    "frontendDist": "../build"
+  },
+  "app": {
+    "windows": [
+      {
+        "title": "8-cut",
+        "width": 1200,
+        "height": 800
+      }
+    ],
+    "security": {
+      "csp": null
+    }
+  },
+  "bundle": {
+    "active": true,
+    "targets": ["deb", "appimage"],
+    "icon": [
+      "icons/32x32.png",
+      "icons/128x128.png",
+      "icons/128x128@2x.png",
+      "icons/icon.icns",
+      "icons/icon.ico"
+    ]
+  }
+}
@@ -0,0 +1,13 @@
+<!doctype html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8" />
+    <link rel="icon" href="%sveltekit.assets%/favicon.png" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <title>Tauri + SvelteKit + Typescript App</title>
+    %sveltekit.head%
+  </head>
+  <body data-sveltekit-preload-data="hover">
+    <div style="display: contents">%sveltekit.body%</div>
+  </body>
+</html>
@@ -0,0 +1,113 @@
+<script lang="ts" module>
+  // Module-level export so App can call doExport via bind:this
+</script>
+
+<script lang="ts">
+  import { startExport } from "$lib/api";
+  import {
+    currentFile, cursor, clips, spread, shortSide, portraitRatio,
+    cropCenter, format, label, category, clipName, profile,
+    hwEncode,
+    exportStatus, exportCompleted, exportTotal, subprofiles
+  } from "$lib/stores";
+
+  const CATEGORIES = ["", "Human", "Animal", "Vehicle", "Tool", "Music", "Nature", "Sport", "Other"];
+  const RATIOS = ["Off", "9:16", "4:5", "1:1"];
+
+  export async function doExport(folderSuffix: string = "") {
+    if (!$currentFile) return;
+    $exportStatus = "running";
+    $exportCompleted = 0;
+    $exportTotal = $clips;
+
+    const req = {
+      input_path: `${$currentFile.root}/${$currentFile.path}`,
+      cursor: $cursor,
+      name: $clipName || $currentFile.name.replace(/\.[^.]+$/, ""),
+      clips: $clips,
+      spread: $spread,
+      short_side: $shortSide,
+      portrait_ratio: $portraitRatio,
+      crop_center: $cropCenter,
+      format: $format,
+      label: $label,
+      category: $category,
+      profile: $profile,
+      folder_suffix: folderSuffix,
+      encoder: $hwEncode ? "h264_nvenc" : "libx264",
+    };
+
+    try {
+      await startExport(req);
+    } catch (e) {
+      $exportStatus = "error";
+      console.error(e);
+    }
+  }
+</script>
+
+<div class="export-panel">
+  <div class="row">
+    <button onclick={() => doExport()} disabled={$exportStatus === "running"}>
+      Export{#if $exportStatus === "running"} ({$exportCompleted}/{$exportTotal}){/if}
+    </button>
+    {#each $subprofiles as sub}
+      <button onclick={() => doExport(sub)} title="Export {sub}">
+        {sub}
+      </button>
+    {/each}
+  </div>
+
+  <div class="row">
+    <label>Clips <input type="number" bind:value={$clips} min="1" max="99" /></label>
+    <label>Spread <input type="number" bind:value={$spread} min="2" max="8" step="0.5" /></label>
+    <label>Size <input type="number" bind:value={$shortSide} min="0" max="4320" step="64" /></label>
+    <label>Ratio
+      <select bind:value={$portraitRatio}>
+        {#each RATIOS as r}
+          <option value={r === "Off" ? null : r}>{r}</option>
+        {/each}
+      </select>
+    </label>
+  </div>
+
+  <div class="row">
+    <label>Label <input type="text" bind:value={$label} /></label>
+    <label>Category
+      <select bind:value={$category}>
+        {#each CATEGORIES as c}
+          <option value={c}>{c || "---"}</option>
+        {/each}
+      </select>
+    </label>
+    <label>Format
+      <select bind:value={$format}>
+        <option>MP4</option>
+        <option>WebP sequence</option>
+      </select>
+    </label>
+    <label><input type="checkbox" bind:checked={$hwEncode} /> GPU</label>
+  </div>
+</div>
+
+<style>
+  .export-panel {
+    display: flex;
+    flex-direction: column;
+    gap: 4px;
+    padding: 4px;
+    font-size: 12px;
+  }
+  .row {
+    display: flex;
+    gap: 6px;
+    align-items: center;
+    flex-wrap: wrap;
+  }
+  label { display: flex; align-items: center; gap: 2px; }
+  input[type="number"] { width: 50px; background: #2d2d2d; color: #e0e0e0; border: 1px solid #444; }
+  input[type="text"] { width: 120px; background: #2d2d2d; color: #e0e0e0; border: 1px solid #444; }
+  select { background: #2d2d2d; color: #e0e0e0; border: 1px solid #444; }
+  button { background: #0066cc; color: white; border: none; padding: 4px 12px; cursor: pointer; }
+  button:disabled { background: #444; }
+</style>
@@ -0,0 +1,173 @@
+<script lang="ts">
+  import { onMount } from "svelte";
+  import { getFiles, getRoots, getHidden, getMarkers, hideFile, unhideFile } from "$lib/api";
+  import {
+    files, roots, hiddenFiles, currentFile, showHidden,
+    profile, markers, visibleFiles
+  } from "$lib/stores";
+
+  let selectedRoot = $state("");
+  let currentFolder = $state("");
+
+  onMount(async () => {
+    $roots = await getRoots();
+    if ($roots.length) {
+      selectedRoot = $roots[0];
+      await loadFiles();
+    }
+  });
+
+  // Reload hidden files when profile changes
+  $effect(() => {
+    void $profile;
+    if (selectedRoot) {
+      loadFiles();
+    }
+  });
+
+  async function loadFiles() {
+    $files = await getFiles(selectedRoot);
+    const hidden = await getHidden($profile);
+    $hiddenFiles = new Set(hidden);
+  }
+
+  // Derive subfolders and files at current folder level
+  let subfolders = $derived.by(() => {
+    const prefix = currentFolder ? currentFolder + "/" : "";
+    const folderSet = new Set<string>();
+    for (const f of $visibleFiles) {
+      if (!f.path.startsWith(prefix)) continue;
+      const rest = f.path.slice(prefix.length);
+      const slashIdx = rest.indexOf("/");
+      if (slashIdx !== -1) {
+        folderSet.add(rest.slice(0, slashIdx));
+      }
+    }
+    return [...folderSet].sort();
+  });
+
+  let currentFiles = $derived.by(() => {
+    const prefix = currentFolder ? currentFolder + "/" : "";
+    return $visibleFiles.filter(f => {
+      if (!f.path.startsWith(prefix)) return false;
+      const rest = f.path.slice(prefix.length);
+      return !rest.includes("/"); // only direct children
+    });
+  });
+
+  async function selectFile(file: typeof $files[0]) {
+    $currentFile = file;
+    $markers = await getMarkers(file.name, $profile);
+  }
+
+  function navigateToFolder(name: string) {
+    currentFolder = currentFolder ? currentFolder + "/" + name : name;
+  }
+
+  function navigateUp() {
+    const idx = currentFolder.lastIndexOf("/");
+    currentFolder = idx === -1 ? "" : currentFolder.slice(0, idx);
+  }
+
+  function formatSize(bytes: number): string {
+    if (bytes > 1e9) return (bytes / 1e9).toFixed(1) + " GB";
+    if (bytes > 1e6) return (bytes / 1e6).toFixed(0) + " MB";
+    return (bytes / 1e3).toFixed(0) + " KB";
+  }
+
+  async function toggleHidden(file: typeof $files[0]) {
+    if ($hiddenFiles.has(file.name)) {
+      await unhideFile(file.name, $profile);
+    } else {
+      await hideFile(file.name, $profile);
+    }
+    await loadFiles();
+  }
+</script>
+
+<div class="file-browser">
+  <div class="controls">
+    <select bind:value={selectedRoot} onchange={() => { currentFolder = ""; loadFiles(); }}>
+      {#each $roots as root}
+        <option value={root}>{root}</option>
+      {/each}
+    </select>
+    <label><input type="checkbox" bind:checked={$showHidden} /> Hidden</label>
+  </div>
+  {#if currentFolder}
+    <div class="breadcrumb" onclick={navigateUp}>.. / {currentFolder}</div>
+  {/if}
+  <ul class="file-list">
+    {#each subfolders as folder}
+      <li class="folder" onclick={() => navigateToFolder(folder)}>
+        <span class="name">{folder}/</span>
+        <span class="badge">dir</span>
+      </li>
+    {/each}
+    {#each currentFiles as file}
+      <li
+        class:selected={$currentFile?.path === file.path}
+        onclick={() => selectFile(file)}
+        oncontextmenu={(e) => { e.preventDefault(); toggleHidden(file); }}
+      >
+        <span class="name">{file.name}</span>
+        <span class="size">{formatSize(file.size)}</span>
+      </li>
+    {/each}
+  </ul>
+</div>
+
+<style>
+  .file-browser {
+    display: flex;
+    flex-direction: column;
+    height: 100%;
+    min-width: 200px;
+  }
+  .controls {
+    display: flex;
+    gap: 4px;
+    padding: 4px;
+    align-items: center;
+  }
+  .controls select {
+    flex: 1;
+    background: #2d2d2d;
+    color: #e0e0e0;
+    border: 1px solid #444;
+    padding: 2px;
+  }
+  .breadcrumb {
+    padding: 3px 8px;
+    font-size: 11px;
+    color: #88aaff;
+    cursor: pointer;
+    background: #252525;
+    border-bottom: 1px solid #333;
+    white-space: nowrap;
+    overflow: hidden;
+    text-overflow: ellipsis;
+  }
+  .breadcrumb:hover { background: #2a2a2a; }
+  .file-list {
+    list-style: none;
+    padding: 0;
+    margin: 0;
+    overflow-y: auto;
+    flex: 1;
+  }
+  .file-list li {
+    padding: 4px 8px;
+    cursor: pointer;
+    display: flex;
+    justify-content: space-between;
+    font-size: 12px;
+    white-space: nowrap;
+  }
+  .file-list li:hover { background: #333; }
+  .file-list li.selected { background: #0066cc; }
+  .file-list li.folder { color: #88aaff; }
+  .name { flex: 1; overflow: hidden; text-overflow: ellipsis; }
+  .size { flex-shrink: 0; margin-left: 8px; color: #888; font-size: 11px; }
+  .badge { flex-shrink: 0; margin-left: 8px; color: #666; font-size: 10px; }
+</style>
@@ -0,0 +1,93 @@
+<script lang="ts">
+  import { onMount } from "svelte";
+  import { getProfiles, setServer, getServer } from "$lib/api";
+  import { profile, subprofiles, serverUrl } from "$lib/stores";
+  import { saveSettings } from "$lib/settings";
+
+  let profiles = $state<string[]>([]);
+  let serverInput = $state(getServer());
+
+  onMount(async () => {
+    serverInput = getServer();
+    try {
+      profiles = await getProfiles();
+      if (profiles.length && !profiles.includes($profile)) {
+        $profile = profiles[0];
+      }
+    } catch { /* server not reachable yet */ }
+  });
+
+  function applyServer() {
+    const url = serverInput.replace(/\/+$/, "");
+    setServer(url);
+    $serverUrl = url;
+    saveSettings();
+    // Reload profiles from new server
+    getProfiles().then(p => { profiles = p; }).catch(() => {});
+  }
+
+  function addSubprofile() {
+    const name = prompt("Subprofile suffix:");
+    if (name && !$subprofiles.includes(name)) {
+      $subprofiles = [...$subprofiles, name];
+    }
+  }
+
+  function removeSubprofile(name: string) {
+    $subprofiles = $subprofiles.filter(s => s !== name);
+  }
+</script>
+
+<div class="profile-bar">
+  <input
+    class="server-input"
+    type="text"
+    bind:value={serverInput}
+    onkeydown={(e) => { if (e.key === "Enter") applyServer(); }}
+    placeholder="http://host:8000"
+  />
+  <button onclick={applyServer}>Set</button>
+
+  <select bind:value={$profile}>
+    {#each profiles as p}
+      <option value={p}>{p}</option>
+    {/each}
+  </select>
+
+  <span class="subs">
+    {#each $subprofiles as sub}
+      <span class="sub-tag" oncontextmenu={(e) => { e.preventDefault(); removeSubprofile(sub); }}>
+        {sub}
+      </span>
+    {/each}
+    <button onclick={addSubprofile}>+</button>
+  </span>
+</div>
+
+<style>
+  .profile-bar {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    padding: 4px;
+    font-size: 12px;
+  }
+  .server-input {
+    width: 180px;
+    background: #2d2d2d;
+    color: #e0e0e0;
+    border: 1px solid #444;
+    padding: 2px 4px;
+    font-size: 11px;
+  }
+  select { background: #2d2d2d; color: #e0e0e0; border: 1px solid #444; }
+  .subs { display: flex; gap: 4px; align-items: center; }
+  .sub-tag {
+    background: #444;
+    padding: 2px 6px;
+    border-radius: 3px;
+    cursor: context-menu;
+    font-size: 11px;
+  }
+  button { background: #333; color: #e0e0e0; border: 1px solid #555; padding: 1px 6px; cursor: pointer; }
+</style>
@@ -0,0 +1,170 @@
+<script lang="ts">
+  import { onMount } from "svelte";
+  import {
+    duration, cursor, playPos, markers, clips, spread, locked, clipSpan
+  } from "$lib/stores";
+
+  let {
+    onCursorChange = (_time: number) => {},
+    onSeek = (_time: number) => {},
+    onMarkerClick = (_marker: { start_time: number; output_path: string }) => {},
+    onMarkerDelete = (_outputPath: string) => {},
+  } = $props<{
+    onCursorChange?: (time: number) => void;
+    onSeek?: (time: number) => void;
+    onMarkerClick?: (marker: { start_time: number; output_path: string }) => void;
+    onMarkerDelete?: (outputPath: string) => void;
+  }>();
+
+  let canvas: HTMLCanvasElement;
+  let ctx: CanvasRenderingContext2D;
+  let dragging = $state(false);
+
+  const HEIGHT = 160;
+
+  function timeToX(t: number): number {
+    if ($duration <= 0) return 0;
+    return (t / $duration) * canvas.width;
+  }
+
+  function xToTime(x: number): number {
+    if ($duration <= 0) return 0;
+    return Math.max(0, Math.min($duration, (x / canvas.width) * $duration));
+  }
+
+  function draw() {
+    if (!ctx) return;
+    const w = canvas.width;
+    const h = canvas.height;
+    ctx.clearRect(0, 0, w, h);
+
+    // Background
+    ctx.fillStyle = "#1a1a1a";
+    ctx.fillRect(0, 0, w, h);
+
+    // Clip span region
+    if ($duration > 0) {
+      const x0 = timeToX($cursor);
+      const x1 = timeToX($cursor + $clipSpan);
+      ctx.fillStyle = "rgba(0, 100, 200, 0.15)";
+      ctx.fillRect(x0, 0, x1 - x0, h);
+    }
+
+    // Markers
+    for (const m of $markers) {
+      const x = timeToX(m.start_time);
+      ctx.fillStyle = "#22aa44";
+      ctx.fillRect(x - 1, 0, 3, h);
+    }
+
+    // Cursor
+    if ($duration > 0) {
+      const cx = timeToX($cursor);
+      ctx.fillStyle = "#ff4444";
+      ctx.fillRect(cx - 1, 0, 3, h);
+    }
+
+    // Play position
+    if ($playPos !== null && $duration > 0) {
+      const px = timeToX($playPos);
+      ctx.fillStyle = "#ffaa00";
+      ctx.fillRect(px - 1, 0, 2, h);
+    }
+
+    // Time labels
+    if ($duration > 0) {
+      ctx.fillStyle = "#888";
+      ctx.font = "11px monospace";
+      const step = Math.max(10, Math.pow(10, Math.floor(Math.log10($duration / 5))));
+      for (let t = 0; t <= $duration; t += step) {
+        const x = timeToX(t);
+        ctx.fillText(formatTime(t), x + 2, h - 4);
+        ctx.fillRect(x, h - 16, 1, 16);
+      }
+    }
+  }
+
+  function formatTime(s: number): string {
+    const m = Math.floor(s / 60);
+    const sec = (Math.floor(s % 60 * 10) / 10).toFixed(1);
+    return `${m}:${sec.padStart(4, "0")}`;
+  }
+
+  function handleMouseDown(e: MouseEvent) {
+    if ($locked) return;
+    dragging = true;
+    const time = xToTime(e.offsetX);
+    $cursor = time;
+    onCursorChange(time);
+  }
+
+  function handleMouseMove(e: MouseEvent) {
+    if (!dragging || $locked) return;
+    const time = xToTime(e.offsetX);
+    $cursor = time;
+    onCursorChange(time);
+  }
+
+  function handleMouseUp() {
+    dragging = false;
+  }
+
+  function handleDblClick(e: MouseEvent) {
+    const time = xToTime(e.offsetX);
+    for (const m of $markers) {
+      const mx = timeToX(m.start_time);
+      if (Math.abs(e.offsetX - mx) < 8) {
+        onMarkerClick(m);
+        return;
+      }
+    }
+    onSeek(time);
+  }
+
+  function handleContextMenu(e: MouseEvent) {
+    e.preventDefault();
+    for (const m of $markers) {
+      const mx = timeToX(m.start_time);
+      if (Math.abs(e.offsetX - mx) < 8) {
+        onMarkerDelete(m.output_path);
+        return;
+      }
+    }
+  }
+
+  // Redraw on any state change
+  $effect(() => {
+    void $duration; void $cursor; void $playPos; void $markers; void $clips; void $spread; void $clipSpan;
+    draw();
+  });
+
+  onMount(() => {
+    ctx = canvas.getContext("2d")!;
+    const obs = new ResizeObserver(() => {
+      canvas.width = canvas.clientWidth;
+      canvas.height = HEIGHT;
+      draw();
+    });
+    obs.observe(canvas);
+    return () => obs.disconnect();
+  });
+</script>
+
+<canvas
+  bind:this={canvas}
+  style="width:100%;height:{HEIGHT}px"
+  onmousedown={handleMouseDown}
+  onmousemove={handleMouseMove}
+  onmouseup={handleMouseUp}
+  onmouseleave={handleMouseUp}
+  ondblclick={handleDblClick}
+  oncontextmenu={handleContextMenu}
+></canvas>
+
+<style>
+  canvas {
+    display: block;
+    background: #1a1a1a;
+    cursor: crosshair;
+  }
+</style>
@@ -0,0 +1,158 @@
+const DEFAULT_SERVER = "http://192.168.1.51:8000";
+
+let serverUrl = DEFAULT_SERVER;
+
+export function setServer(url: string) {
+  serverUrl = url.replace(/\/+$/, "");
+}
+
+export function getServer(): string {
+  return serverUrl;
+}
+
+async function get<T>(path: string): Promise<T> {
+  const res = await fetch(`${serverUrl}${path}`);
+  if (!res.ok) throw new Error(`${res.status} ${res.statusText}`);
+  return res.json();
+}
+
+async function post<T>(path: string, body?: unknown): Promise<T> {
+  const res = await fetch(`${serverUrl}${path}`, {
+    method: "POST",
+    headers: body ? { "Content-Type": "application/json" } : {},
+    body: body ? JSON.stringify(body) : undefined,
+  });
+  if (!res.ok) throw new Error(`${res.status} ${res.statusText}`);
+  return res.json();
+}
+
+async function del<T>(path: string): Promise<T> {
+  const res = await fetch(`${serverUrl}${path}`, { method: "DELETE" });
+  if (!res.ok) throw new Error(`${res.status} ${res.statusText}`);
+  return res.json();
+}
+
+// --- Files ---
+
+export interface VideoFile {
+  name: string;
+  path: string;
+  root: string;
+  size: number;
+}
+
+export function getRoots(): Promise<string[]> {
+  return get("/api/roots");
+}
+
+export function getFiles(root?: string): Promise<VideoFile[]> {
+  const q = root ? `?root=${encodeURIComponent(root)}` : "";
+  return get(`/api/files${q}`);
+}
+
+// For {path:path} routes, encode each segment individually to preserve slashes
+function encodePath(p: string): string {
+  return p.split("/").map(encodeURIComponent).join("/");
+}
+
+export function streamUrl(path: string, root: string, quality: string): string {
+  return `${serverUrl}/api/stream/${encodePath(path)}?root=${encodeURIComponent(root)}&quality=${quality}`;
+}
+
+export function audioUrl(path: string, root: string): string {
+  return `${serverUrl}/api/audio/${encodePath(path)}?root=${encodeURIComponent(root)}`;
+}
+
+/** Poll cache status until both video and audio are ready. */
+export async function waitForCache(
+  path: string, root: string, quality: string,
+  signal: AbortSignal, interval = 2000
+): Promise<void> {
+  const url = `${serverUrl}/api/cache/status/${encodePath(path)}?root=${encodeURIComponent(root)}`;
+  // Trigger transcode/audio extraction by hitting stream+audio once
+  await fetch(streamUrl(path, root, quality), { signal }).catch(() => {});
+  await fetch(audioUrl(path, root), { signal }).catch(() => {});
+
+  while (!signal.aborted) {
+    const res = await fetch(url, { signal });
+    const status = await res.json();
+    if (status[quality] === "ready" && status.audio === "ready") return;
+    await new Promise(r => setTimeout(r, interval));
+  }
+  throw new Error("Aborted");
+}
+
+export function cacheStatus(path: string, root: string): Promise<Record<string, string>> {
+  return get(`/api/cache/status/${encodePath(path)}?root=${encodeURIComponent(root)}`);
+}
+
+// --- Markers & Profiles ---
+
+export interface Marker {
+  start_time: number;
+  marker_number: number;
+  output_path: string;
+}
+
+export function getMarkers(filename: string, profile: string = "default"): Promise<Marker[]> {
+  return get(`/api/markers/${encodeURIComponent(filename)}?profile=${encodeURIComponent(profile)}`);
+}
+
+export function getProfiles(): Promise<string[]> {
+  return get("/api/profiles");
+}
+
+export function getLabels(): Promise<string[]> {
+  return get("/api/labels");
+}
+
+// --- Export ---
+
+export interface ExportRequest {
+  input_path: string;
+  cursor: number;
+  name: string;
+  clips?: number;
+  spread?: number;
+  short_side?: number | null;
+  portrait_ratio?: string | null;
+  crop_center?: number;
+  format?: string;
+  label?: string;
+  category?: string;
+  profile?: string;
+  folder_suffix?: string;
+  encoder?: string;
+}
+
+export function startExport(req: ExportRequest): Promise<{ job_id: string }> {
+  return post("/api/export", req);
+}
+
+export function getExportStatus(jobId: string): Promise<{
+  status: string;
+  total: number;
+  completed: number;
+  outputs: string[];
+  error?: string;
+}> {
+  return get(`/api/export/${jobId}`);
+}
+
+export function deleteExport(outputPath: string): Promise<{ deleted: string }> {
+  return del(`/api/export?output_path=${encodeURIComponent(outputPath)}`);
+}
+
+// --- Hidden ---
+
+export function hideFile(filename: string, profile: string = "default"): Promise<unknown> {
+  return post(`/api/hidden/${encodeURIComponent(filename)}?profile=${encodeURIComponent(profile)}`);
+}
+
+export function unhideFile(filename: string, profile: string = "default"): Promise<unknown> {
+  return del(`/api/hidden/${encodeURIComponent(filename)}?profile=${encodeURIComponent(profile)}`);
+}
+
+export function getHidden(profile: string = "default"): Promise<string[]> {
+  return get(`/api/hidden?profile=${encodeURIComponent(profile)}`);
+}
@@ -0,0 +1,41 @@
+import { invoke } from "@tauri-apps/api/core";
+
+export async function mpvStart(): Promise<void> {
+  return invoke("mpv_start");
+}
+
+export async function mpvStop(): Promise<void> {
+  return invoke("mpv_stop");
+}
+
+export async function mpvLoad(videoUrl: string, audioUrl: string): Promise<void> {
+  return invoke("mpv_load", { videoUrl, audioUrl });
+}
+
+export async function mpvSeek(time: number): Promise<void> {
+  return invoke("mpv_seek", { time });
+}
+
+export async function mpvPause(): Promise<void> {
+  return invoke("mpv_pause");
+}
+
+export async function mpvResume(): Promise<void> {
+  return invoke("mpv_resume");
+}
+
+export async function mpvSetLoop(a: number, b: number): Promise<void> {
+  return invoke("mpv_set_loop", { a, b });
+}
+
+export async function mpvClearLoop(): Promise<void> {
+  return invoke("mpv_clear_loop");
+}
+
+export async function mpvTimePos(): Promise<number> {
+  return invoke("mpv_time_pos");
+}
+
+export async function mpvDuration(): Promise<number> {
+  return invoke("mpv_duration");
+}
@@ -0,0 +1,58 @@
+import {
+  serverUrl, quality, clips, spread, shortSide, portraitRatio,
+  format, hwEncode, profile, subprofiles
+} from "./stores";
+import { setServer } from "./api";
+import { get } from "svelte/store";
+
+const KEY = "8cut-settings";
+
+interface Settings {
+  serverUrl: string;
+  quality: string;
+  clips: number;
+  spread: number;
+  shortSide: number | null;
+  portraitRatio: string | null;
+  format: string;
+  hwEncode: boolean;
+  profile: string;
+  subprofiles: string[];
+}
+
+export function saveSettings() {
+  const data: Settings = {
+    serverUrl: get(serverUrl),
+    quality: get(quality),
+    clips: get(clips),
+    spread: get(spread),
+    shortSide: get(shortSide),
+    portraitRatio: get(portraitRatio),
+    format: get(format),
+    hwEncode: get(hwEncode),
+    profile: get(profile),
+    subprofiles: get(subprofiles),
+  };
+  localStorage.setItem(KEY, JSON.stringify(data));
+}
+
+export function loadSettings() {
+  const raw = localStorage.getItem(KEY);
+  if (!raw) return;
+  try {
+    const data: Settings = JSON.parse(raw);
+    if (data.serverUrl) {
+      serverUrl.set(data.serverUrl);
+      setServer(data.serverUrl);
+    }
+    if (data.quality) quality.set(data.quality);
+    if (data.clips) clips.set(data.clips);
+    if (data.spread) spread.set(data.spread);
+    if (data.shortSide !== undefined) shortSide.set(data.shortSide);
+    if (data.portraitRatio !== undefined) portraitRatio.set(data.portraitRatio);
+    if (data.format) format.set(data.format);
+    if (data.hwEncode !== undefined) hwEncode.set(data.hwEncode);
+    if (data.profile) profile.set(data.profile);
+    if (data.subprofiles) subprofiles.set(data.subprofiles);
+  } catch { /* ignore corrupt settings */ }
+}
@@ -0,0 +1,66 @@
+import { writable, derived } from "svelte/store";
+import type { VideoFile, Marker } from "./api";
+
+// --- Connection ---
+export const serverUrl = writable("http://192.168.1.51:8000");
+
+// --- Files ---
+export const roots = writable<string[]>([]);
+export const files = writable<VideoFile[]>([]);
+export const hiddenFiles = writable<Set<string>>(new Set());
+export const currentFile = writable<VideoFile | null>(null);
+export const hideExported = writable(false);
+export const showHidden = writable(false);
+
+// --- Playback ---
+export const duration = writable(0);
+export const cursor = writable(0);
+export const playPos = writable<number | null>(null);
+export const playing = writable(false);
+export const quality = writable("low");
+
+// --- Timeline ---
+export const markers = writable<Marker[]>([]);
+export const locked = writable(false);
+
+// --- Export settings ---
+export const clips = writable(3);
+export const spread = writable(3.0);
+export const shortSide = writable<number | null>(512);
+export const portraitRatio = writable<string | null>(null);
+export const cropCenter = writable(0.5);
+export const format = writable("MP4");
+export const hwEncode = writable(false);
+export const label = writable("");
+export const category = writable("");
+export const clipName = writable("");
+export const exportFolder = writable("");
+export const encoder = writable("libx264");
+export const trackSubject = writable(false);
+export const randPortrait = writable(false);
+export const randSquare = writable(false);
+
+// --- Profiles ---
+export const profile = writable("default");
+export const subprofiles = writable<string[]>([]);
+
+// --- Export progress ---
+export const exportStatus = writable<string>("idle"); // idle | running | done | error
+export const exportCompleted = writable(0);
+export const exportTotal = writable(0);
+
+// --- Derived ---
+export const clipSpan = derived(
+  [clips, spread],
+  ([$clips, $spread]) => 8.0 + ($clips - 1) * $spread
+);
+
+export const visibleFiles = derived(
+  [files, hiddenFiles, showHidden],
+  ([$files, $hidden, $showHidden]) => {
+    return $files.filter(f => {
+      if (!$showHidden && $hidden.has(f.name)) return false;
+      return true;
+    });
+  }
+);
@@ -0,0 +1,48 @@
+import { getServer } from "./api";
+import { exportStatus, exportCompleted } from "./stores";
+
+let socket: WebSocket | null = null;
+let reconnectDelay = 2000;
+
+export function connectExportWs() {
+  const wsUrl = getServer().replace(/^http/, "ws") + "/ws/export";
+  socket = new WebSocket(wsUrl);
+
+  socket.onopen = () => {
+    reconnectDelay = 2000; // reset backoff on successful connect
+  };
+
+  socket.onmessage = (event) => {
+    try {
+      const msg = JSON.parse(event.data);
+      switch (msg.type) {
+        case "clip_done":
+          exportCompleted.update(n => n + 1);
+          break;
+        case "all_done":
+          exportStatus.set("done");
+          break;
+        case "error":
+          exportStatus.set("error");
+          console.error("Export error:", msg.msg);
+          break;
+      }
+    } catch (e) {
+      console.error("Failed to parse WebSocket message:", e);
+    }
+  };
+
+  socket.onclose = () => {
+    // Reconnect with exponential backoff, max 30s
+    setTimeout(connectExportWs, reconnectDelay);
+    reconnectDelay = Math.min(reconnectDelay * 2, 30000);
+  };
+}
+
+export function disconnectExportWs() {
+  if (socket) {
+    socket.onclose = null; // prevent reconnect
+    socket.close();
+    socket = null;
+  }
+}
@@ -0,0 +1,5 @@
+// Tauri doesn't have a Node.js server to do proper SSR
+// so we use adapter-static with a fallback to index.html to put the site in SPA mode
+// See: https://svelte.dev/docs/kit/single-page-apps
+// See: https://v2.tauri.app/start/frontend/sveltekit/ for more info
+export const ssr = false;
@@ -0,0 +1,251 @@
+<script lang="ts">
+  import { onMount, onDestroy } from "svelte";
+  import FileBrowser from "../components/FileBrowser.svelte";
+  import Timeline from "../components/Timeline.svelte";
+  import ExportPanel from "../components/ExportPanel.svelte";
+  import ProfileBar from "../components/ProfileBar.svelte";
+  import { mpvStart, mpvLoad, mpvSeek, mpvPause, mpvResume, mpvSetLoop, mpvClearLoop, mpvTimePos, mpvDuration } from "$lib/mpv";
+  import { streamUrl, audioUrl, waitForCache, deleteExport, getMarkers } from "$lib/api";
+  import { connectExportWs, disconnectExportWs } from "$lib/ws";
+  import { loadSettings, saveSettings } from "$lib/settings";
+  import {
+    currentFile, cursor, duration, playPos, playing, quality,
+    clips, spread, locked, markers, profile, clipSpan, subprofiles
+  } from "$lib/stores";
+
+  let pollInterval: ReturnType<typeof setInterval>;
+  let exportPanelRef: ExportPanel;
+
+  onMount(async () => {
+    loadSettings();
+
+    await mpvStart();
+    connectExportWs();
+
+    // Poll mpv for time position
+    pollInterval = setInterval(async () => {
+      if ($playing) {
+        try {
+          $playPos = await mpvTimePos();
+        } catch { /* mpv not ready */ }
+      }
+    }, 50);
+
+    // Auto-save settings on changes
+    const unsubs = [
+      quality.subscribe(() => saveSettings()),
+      clips.subscribe(() => saveSettings()),
+      spread.subscribe(() => saveSettings()),
+      profile.subscribe(() => saveSettings()),
+      subprofiles.subscribe(() => saveSettings()),
+    ];
+    return () => unsubs.forEach(u => u());
+  });
+
+  onDestroy(() => {
+    clearInterval(pollInterval);
+    disconnectExportWs();
+  });
+
+  // Load file into mpv when currentFile OR quality changes
+  let loadAbort: AbortController | null = null;
+  $effect(() => {
+    const file = $currentFile;
+    const q = $quality;
+    if (file) {
+      // Cancel any previous polling
+      loadAbort?.abort();
+      const ac = new AbortController();
+      loadAbort = ac;
+
+      const vUrl = streamUrl(file.path, file.root, q);
+      const aUrl = audioUrl(file.path, file.root);
+      waitForCache(file.path, file.root, q, ac.signal).then(() =>
+        mpvLoad(vUrl, aUrl)
+      ).then(async () => {
+        await new Promise(r => setTimeout(r, 500));
+        try { $duration = await mpvDuration(); } catch {}
+      }).catch(() => {}); // aborted or error
+    }
+  });
+
+  async function handleCursorChange(time: number) {
+    await mpvSeek(time);
+  }
+
+  async function handlePlay() {
+    const a = $cursor;
+    const b = $cursor + $clipSpan;
+    await mpvSeek(a);
+    await mpvSetLoop(a, b);
+    await mpvResume();
+    $playing = true;
+  }
+
+  async function handlePause() {
+    await mpvPause();
+    await mpvClearLoop();
+    $playing = false;
+  }
+
+  async function handleMarkerClick(m: { start_time: number; output_path: string }) {
+    if ($locked) {
+      const span = 8.0 + ($clips - 1) * $spread;
+      $cursor = m.start_time + span;
+      await mpvSeek($cursor);
+    } else {
+      $cursor = m.start_time;
+      await mpvSeek(m.start_time);
+    }
+  }
+
+  async function handleMarkerDelete(outputPath: string) {
+    await deleteExport(outputPath);
+    if ($currentFile) {
+      $markers = await getMarkers($currentFile.name, $profile);
+    }
+  }
+
+  function handleKeydown(e: KeyboardEvent) {
+    const tag = (e.target as HTMLElement).tagName;
+    if (tag === "INPUT" || tag === "SELECT" || tag === "TEXTAREA") return;
+
+    switch (e.key) {
+      case " ":
+        e.preventDefault();
+        $playing ? handlePause() : handlePlay();
+        break;
+      case "e":
+      case "E":
+        exportPanelRef?.doExport();
+        break;
+      case "ArrowLeft":
+        $cursor = Math.max(0, $cursor - 1);
+        handleCursorChange($cursor);
+        break;
+      case "ArrowRight":
+        $cursor = Math.min($duration, $cursor + 1);
+        handleCursorChange($cursor);
+        break;
+    }
+
+    const num = parseInt(e.key);
+    if (num >= 1 && num <= 9) {
+      const idx = num - 1;
+      if (idx < $subprofiles.length) {
+        exportPanelRef?.doExport($subprofiles[idx]);
+      }
+    }
+  }
+
+  function fmtTime(s: number): string {
+    const m = Math.floor(s / 60);
+    const sec = (Math.floor(s % 60 * 10) / 10).toFixed(1);
+    return `${m}:${sec.padStart(4, "0")}`;
+  }
+</script>
+
+<svelte:window onkeydown={handleKeydown} />
+
+<main>
+  <div class="layout">
+    <div class="sidebar">
+      <FileBrowser />
+    </div>
+    <div class="content">
+      <ProfileBar />
+      <div class="player-area">
+        <div class="video-placeholder">
+          {#if $currentFile}
+            <p>{$currentFile.name}</p>
+          {:else}
+            <p>Select a file</p>
+          {/if}
+        </div>
+      </div>
+      <Timeline
+        onCursorChange={handleCursorChange}
+        onSeek={handleCursorChange}
+        onMarkerClick={handleMarkerClick}
+        onMarkerDelete={handleMarkerDelete}
+      />
+      <div class="transport">
+        <button onclick={handlePlay} disabled={!$currentFile}>Play</button>
+        <button onclick={handlePause}>Pause</button>
+        <button onclick={() => $locked = !$locked}>
+          {$locked ? "Locked" : "Unlocked"}
+        </button>
+        <span class="time">
+          {#if $duration > 0}
+            {fmtTime($cursor)} / {fmtTime($duration)}
+          {/if}
+        </span>
+        <select bind:value={$quality} style="margin-left:auto">
+          <option value="potato">480p</option>
+          <option value="low">720p</option>
+          <option value="medium">1080p</option>
+          <option value="high">Original</option>
+        </select>
+      </div>
+      <ExportPanel bind:this={exportPanelRef} />
+    </div>
+  </div>
+</main>
+
+<style>
+  :global(body) {
+    margin: 0;
+    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
+    background: #1e1e1e;
+    color: #e0e0e0;
+  }
+  main { height: 100vh; overflow: hidden; }
+  .layout {
+    display: flex;
+    height: 100%;
+  }
+  .sidebar {
+    width: 220px;
+    min-width: 220px;
+    flex-shrink: 0;
+    border-right: 1px solid #333;
+    overflow: hidden;
+  }
+  .content {
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+    overflow: hidden;
+  }
+  .player-area {
+    flex: 1;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    background: #000;
+    min-height: 200px;
+  }
+  .video-placeholder {
+    color: #666;
+    text-align: center;
+  }
+  .transport {
+    display: flex;
+    align-items: center;
+    gap: 6px;
+    padding: 4px 8px;
+    background: #222;
+  }
+  .transport button {
+    background: #333;
+    color: #e0e0e0;
+    border: 1px solid #555;
+    padding: 4px 10px;
+    cursor: pointer;
+  }
+  .time {
+    font-family: monospace;
+    font-size: 13px;
+  }
+  select { background: #2d2d2d; color: #e0e0e0; border: 1px solid #444; }
+</style>
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="26.6" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 308"><path fill="#FF3E00" d="M239.682 40.707C211.113-.182 154.69-12.301 113.895 13.69L42.247 59.356a82.198 82.198 0 0 0-37.135 55.056a86.566 86.566 0 0 0 8.536 55.576a82.425 82.425 0 0 0-12.296 30.719a87.596 87.596 0 0 0 14.964 66.244c28.574 40.893 84.997 53.007 125.787 27.016l71.648-45.664a82.182 82.182 0 0 0 37.135-55.057a86.601 86.601 0 0 0-8.53-55.577a82.409 82.409 0 0 0 12.29-30.718a87.573 87.573 0 0 0-14.963-66.244"></path><path fill="#FFF" d="M106.889 270.841c-23.102 6.007-47.497-3.036-61.103-22.648a52.685 52.685 0 0 1-9.003-39.85a49.978 49.978 0 0 1 1.713-6.693l1.35-4.115l3.671 2.697a92.447 92.447 0 0 0 28.036 14.007l2.663.808l-.245 2.659a16.067 16.067 0 0 0 2.89 10.656a17.143 17.143 0 0 0 18.397 6.828a15.786 15.786 0 0 0 4.403-1.935l71.67-45.672a14.922 14.922 0 0 0 6.734-9.977a15.923 15.923 0 0 0-2.713-12.011a17.156 17.156 0 0 0-18.404-6.832a15.78 15.78 0 0 0-4.396 1.933l-27.35 17.434a52.298 52.298 0 0 1-14.553 6.391c-23.101 6.007-47.497-3.036-61.101-22.649a52.681 52.681 0 0 1-9.004-39.849a49.428 49.428 0 0 1 22.34-33.114l71.664-45.677a52.218 52.218 0 0 1 14.563-6.398c23.101-6.007 47.497 3.036 61.101 22.648a52.685 52.685 0 0 1 9.004 39.85a50.559 50.559 0 0 1-1.713 6.692l-1.35 4.116l-3.67-2.693a92.373 92.373 0 0 0-28.037-14.013l-2.664-.809l.246-2.658a16.099 16.099 0 0 0-2.89-10.656a17.143 17.143 0 0 0-18.398-6.828a15.786 15.786 0 0 0-4.402 1.935l-71.67 45.674a14.898 14.898 0 0 0-6.73 9.975a15.9 15.9 0 0 0 2.709 12.012a17.156 17.156 0 0 0 18.404 6.832a15.841 15.841 0 0 0 4.402-1.935l27.345-17.427a52.147 52.147 0 0 1 14.552-6.397c23.101-6.006 47.497 3.037 61.102 22.65a52.681 52.681 0 0 1 9.003 39.848a49.453 49.453 0 0 1-22.34 33.12l-71.664 45.673a52.218 52.218 0 0 1-14.563 6.398"></path></svg>
@@ -0,0 +1,6 @@
+<svg width="206" height="231" viewBox="0 0 206 231" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M143.143 84C143.143 96.1503 133.293 106 121.143 106C108.992 106 99.1426 96.1503 99.1426 84C99.1426 71.8497 108.992 62 121.143 62C133.293 62 143.143 71.8497 143.143 84Z" fill="#FFC131"/>
+<ellipse cx="84.1426" cy="147" rx="22" ry="22" transform="rotate(180 84.1426 147)" fill="#24C8DB"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M166.738 154.548C157.86 160.286 148.023 164.269 137.757 166.341C139.858 160.282 141 153.774 141 147C141 144.543 140.85 142.121 140.558 139.743C144.975 138.204 149.215 136.139 153.183 133.575C162.73 127.404 170.292 118.608 174.961 108.244C179.63 97.8797 181.207 86.3876 179.502 75.1487C177.798 63.9098 172.884 53.4021 165.352 44.8883C157.82 36.3744 147.99 30.2165 137.042 27.1546C126.095 24.0926 114.496 24.2568 103.64 27.6274C92.7839 30.998 83.1319 37.4317 75.8437 46.1553C74.9102 47.2727 74.0206 48.4216 73.176 49.5993C61.9292 50.8488 51.0363 54.0318 40.9629 58.9556C44.2417 48.4586 49.5653 38.6591 56.679 30.1442C67.0505 17.7298 80.7861 8.57426 96.2354 3.77762C111.685 -1.01901 128.19 -1.25267 143.769 3.10474C159.348 7.46215 173.337 16.2252 184.056 28.3411C194.775 40.457 201.767 55.4101 204.193 71.404C206.619 87.3978 204.374 103.752 197.73 118.501C191.086 133.25 180.324 145.767 166.738 154.548ZM41.9631 74.275L62.5557 76.8042C63.0459 72.813 63.9401 68.9018 65.2138 65.1274C57.0465 67.0016 49.2088 70.087 41.9631 74.275Z" fill="#FFC131"/>
+<path fill-rule="evenodd" clip-rule="evenodd" d="M38.4045 76.4519C47.3493 70.6709 57.2677 66.6712 67.6171 64.6132C65.2774 70.9669 64 77.8343 64 85.0001C64 87.1434 64.1143 89.26 64.3371 91.3442C60.0093 92.8732 55.8533 94.9092 51.9599 97.4256C42.4128 103.596 34.8505 112.392 30.1816 122.756C25.5126 133.12 23.9357 144.612 25.6403 155.851C27.3449 167.09 32.2584 177.598 39.7906 186.112C47.3227 194.626 57.153 200.784 68.1003 203.846C79.0476 206.907 90.6462 206.743 101.502 203.373C112.359 200.002 122.011 193.568 129.299 184.845C130.237 183.722 131.131 182.567 131.979 181.383C143.235 180.114 154.132 176.91 164.205 171.962C160.929 182.49 155.596 192.319 148.464 200.856C138.092 213.27 124.357 222.426 108.907 227.222C93.458 232.019 76.9524 232.253 61.3736 227.895C45.7948 223.538 31.8055 214.775 21.0867 202.659C10.3679 190.543 3.37557 175.59 0.949823 159.596C-1.47592 143.602 0.768139 127.248 7.41237 112.499C14.0566 97.7497 24.8183 85.2327 38.4045 76.4519ZM163.062 156.711L163.062 156.711C162.954 156.773 162.846 156.835 162.738 156.897C162.846 156.835 162.954 156.773 163.062 156.711Z" fill="#24C8DB"/>
+</svg>
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="31.88" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 257"><defs><linearGradient id="IconifyId1813088fe1fbc01fb466" x1="-.828%" x2="57.636%" y1="7.652%" y2="78.411%"><stop offset="0%" stop-color="#41D1FF"></stop><stop offset="100%" stop-color="#BD34FE"></stop></linearGradient><linearGradient id="IconifyId1813088fe1fbc01fb467" x1="43.376%" x2="50.316%" y1="2.242%" y2="89.03%"><stop offset="0%" stop-color="#FFEA83"></stop><stop offset="8.333%" stop-color="#FFDD35"></stop><stop offset="100%" stop-color="#FFA800"></stop></linearGradient></defs><path fill="url(#IconifyId1813088fe1fbc01fb466)" d="M255.153 37.938L134.897 252.976c-2.483 4.44-8.862 4.466-11.382.048L.875 37.958c-2.746-4.814 1.371-10.646 6.827-9.67l120.385 21.517a6.537 6.537 0 0 0 2.322-.004l117.867-21.483c5.438-.991 9.574 4.796 6.877 9.62Z"></path><path fill="url(#IconifyId1813088fe1fbc01fb467)" d="M185.432.063L96.44 17.501a3.268 3.268 0 0 0-2.634 3.014l-5.474 92.456a3.268 3.268 0 0 0 3.997 3.378l24.777-5.718c2.318-.535 4.413 1.507 3.936 3.838l-7.361 36.047c-.495 2.426 1.782 4.5 4.151 3.78l15.304-4.649c2.372-.72 4.652 1.36 4.15 3.788l-11.698 56.621c-.732 3.542 3.979 5.473 5.943 2.437l1.313-2.028l72.516-144.72c1.215-2.423-.88-5.186-3.54-4.672l-25.505 4.922c-2.396.462-4.435-1.77-3.759-4.114l16.646-57.705c.677-2.35-1.37-4.583-3.769-4.113Z"></path></svg>
@@ -0,0 +1,18 @@
+// Tauri doesn't have a Node.js server to do proper SSR
+// so we use adapter-static with a fallback to index.html to put the site in SPA mode
+// See: https://svelte.dev/docs/kit/single-page-apps
+// See: https://v2.tauri.app/start/frontend/sveltekit/ for more info
+import adapter from "@sveltejs/adapter-static";
+import { vitePreprocess } from "@sveltejs/vite-plugin-svelte";
+
+/** @type {import('@sveltejs/kit').Config} */
+const config = {
+  preprocess: vitePreprocess(),
+  kit: {
+    adapter: adapter({
+      fallback: "index.html",
+    }),
+  },
+};
+
+export default config;
@@ -0,0 +1,19 @@
+{
+  "extends": "./.svelte-kit/tsconfig.json",
+  "compilerOptions": {
+    "allowJs": true,
+    "checkJs": true,
+    "esModuleInterop": true,
+    "forceConsistentCasingInFileNames": true,
+    "resolveJsonModule": true,
+    "skipLibCheck": true,
+    "sourceMap": true,
+    "strict": true,
+    "moduleResolution": "bundler"
+  }
+  // Path aliases are handled by https://svelte.dev/docs/kit/configuration#alias
+  // except $lib which is handled by https://svelte.dev/docs/kit/configuration#files
+  //
+  // If you want to overwrite includes/excludes, make sure to copy over the relevant includes/excludes
+  // from the referenced tsconfig.json - TypeScript does not merge them in
+}
@@ -0,0 +1,32 @@
+import { defineConfig } from "vite";
+import { sveltekit } from "@sveltejs/kit/vite";
+
+// @ts-expect-error process is a nodejs global
+const host = process.env.TAURI_DEV_HOST;
+
+// https://vite.dev/config/
+export default defineConfig(async () => ({
+  plugins: [sveltekit()],
+
+  // Vite options tailored for Tauri development and only applied in `tauri dev` or `tauri build`
+  //
+  // 1. prevent Vite from obscuring rust errors
+  clearScreen: false,
+  // 2. tauri expects a fixed port, fail if that port is not available
+  server: {
+    port: 1420,
+    strictPort: true,
+    host: host || false,
+    hmr: host
+      ? {
+          protocol: "ws",
+          host,
+          port: 1421,
+        }
+      : undefined,
+    watch: {
+      // 3. tell Vite to ignore watching `src-tauri`
+      ignored: ["**/src-tauri/**"],
+    },
+  },
+}));
@@ -0,0 +1,55 @@
+import json
+import os
+
+
+def build_annotation_json_path(folder: str) -> str:
+    return os.path.join(folder, "dataset.json")
+
+
+def remove_clip_annotation(folder: str, clip_path: str) -> None:
+    """Remove the entry for *clip_path* from <folder>/dataset.json if present."""
+    json_path = build_annotation_json_path(folder)
+    if not os.path.exists(json_path):
+        return
+    abs_path = os.path.abspath(clip_path)
+    with open(json_path, "r", encoding="utf-8") as f:
+        try:
+            entries = json.load(f)
+        except (json.JSONDecodeError, ValueError):
+            return
+    entries = [e for e in entries if e.get("path") != abs_path]
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(entries, f, indent=2, ensure_ascii=False)
+        f.write("\n")
+
+
+def upsert_clip_annotation(folder: str, clip_path: str, label: str) -> None:
+    """Insert or update one entry in <folder>/dataset.json.
+
+    Each entry stores a path relative to *folder* and the sound label.
+    Matches on ``path``; if an entry for the same clip already exists it is
+    replaced (overwrite-export case).  Nothing is written when *label* is
+    empty.
+    """
+    if not label.strip():
+        return
+    os.makedirs(folder, exist_ok=True)
+    json_path = build_annotation_json_path(folder)
+    entries: list[dict] = []
+    if os.path.exists(json_path):
+        with open(json_path, "r", encoding="utf-8") as f:
+            try:
+                entries = json.load(f)
+            except (json.JSONDecodeError, ValueError):
+                entries = []
+    abs_path = os.path.abspath(clip_path)
+    entry: dict = {"path": abs_path, "label": label}
+    for i, e in enumerate(entries):
+        if e.get("path") == abs_path:
+            entries[i] = entry
+            break
+    else:
+        entries.append(entry)
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(entries, f, indent=2, ensure_ascii=False)
+        f.write("\n")
@@ -0,0 +1,394 @@
+"""Audio scanning — embedding-based classifier for audio event detection."""
+
+import hashlib
+import os
+import numpy as np
+import librosa
+
+from .paths import _log
+
+_SR = 16000           # lower sr = faster
+_WINDOW = 8.0         # seconds
+_MODEL_DIR = os.path.join(os.path.expanduser("~"), ".8cut_models")
+_W2V_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".8cut_cache", "w2v")
+
+# ---------------------------------------------------------------------------
+# Embedding extraction (lazy-loaded)
+# ---------------------------------------------------------------------------
+
+_w2v_model = None
+_w2v_device = None
+_w2v_model_name = None
+
+# Supported embedding models — name → embed_dim
+_EMBED_MODELS = {
+    "WAV2VEC2_BASE":       768,
+    "WAV2VEC2_LARGE":      1024,
+    "WAV2VEC2_LARGE_LV60K":1024,
+    "HUBERT_BASE":         768,
+    "HUBERT_LARGE":        1024,
+    "HUBERT_XLARGE":       1280,
+    "BEATS":               768,
+}
+_DEFAULT_EMBED_MODEL = "WAV2VEC2_BASE"
+
+_BEATS_CHECKPOINT = os.path.join(
+    os.path.expanduser("~"), ".cache", "huggingface", "hub",
+    "models--lpepino--beats_ckpts", "snapshots",
+    "5b53b0404df452a3a607d7e67687227730e5bad1", "BEATs_iter3_plus_AS2M.pt",
+)
+
+
+def _get_w2v_model(model_name: str | None = None):
+    """Lazy-load an embedding model. Reloads if model_name differs from cached."""
+    global _w2v_model, _w2v_device, _w2v_model_name
+    if model_name is None:
+        model_name = _DEFAULT_EMBED_MODEL
+    if _w2v_model is None or _w2v_model_name != model_name:
+        import torch
+        _w2v_device = "cuda" if torch.cuda.is_available() else "cpu"
+
+        if model_name == "BEATS":
+            from .beats_model import BEATs, BEATsConfig
+            checkpoint = torch.load(_BEATS_CHECKPOINT, map_location=_w2v_device,
+                                    weights_only=False)
+            cfg = BEATsConfig(checkpoint['cfg'])
+            _w2v_model = BEATs(cfg)
+            _w2v_model.load_state_dict(checkpoint['model'])
+            _w2v_model.to(_w2v_device)
+        else:
+            import torchaudio
+            bundle = getattr(torchaudio.pipelines, model_name)
+            _w2v_model = bundle.get_model().to(_w2v_device)
+
+        _w2v_model.eval()
+        _w2v_model_name = model_name
+        _log(f"audio_scan: {model_name} loaded on {_w2v_device}")
+    return _w2v_model, _w2v_device
+
+
+def _embed_dim(model_name: str | None = None) -> int:
+    """Return embedding dimension for a model name."""
+    if model_name is None:
+        model_name = _DEFAULT_EMBED_MODEL
+    return _EMBED_MODELS.get(model_name, 768)
+
+
+def _w2v_cache_path(video_path: str, hop: float, window: float,
+                    model_name: str | None = None) -> str:
+    """Return cache file path for a video's embeddings (includes model name)."""
+    if model_name is None:
+        model_name = _DEFAULT_EMBED_MODEL
+    abspath = os.path.abspath(video_path)
+    mtime = os.path.getmtime(abspath)
+    key = f"{abspath}|{mtime}|{hop}|{window}|{model_name}"
+    h = hashlib.sha256(key.encode()).hexdigest()[:16]
+    return os.path.join(_W2V_CACHE_DIR, f"{h}.npz")
+
+
+def _extract_w2v_windows(y: np.ndarray, sr: int = _SR,
+                         hop: float = 1.0, window: float = _WINDOW,
+                         video_path: str | None = None,
+                         cancel_flag: object = None,
+                         model_name: str | None = None,
+                         ) -> tuple[np.ndarray, np.ndarray]:
+    """Extract embeddings for all sliding windows using a torchaudio model.
+
+    If video_path is given, results are cached to disk for fast re-scans.
+    Returns (timestamps, embeddings) where embeddings is (N, D).
+    """
+    edim = _embed_dim(model_name)
+
+    # Try loading from cache
+    cache_file = None
+    if video_path:
+        try:
+            cache_file = _w2v_cache_path(video_path, hop, window, model_name)
+            if os.path.exists(cache_file):
+                data = np.load(cache_file)
+                _log(f"audio_scan: cache hit ({cache_file})")
+                return data["timestamps"], data["embeddings"]
+        except Exception as e:
+            _log(f"audio_scan: cache read failed: {e}")
+
+    win_samples = int(window * sr)
+    hop_samples = int(hop * sr)
+    n_windows = max(0, (len(y) - win_samples) // hop_samples + 1)
+
+    if n_windows == 0:
+        return np.array([]), np.empty((0, edim))
+
+    import torch
+    model, device = _get_w2v_model(model_name)
+    is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
+    batch_size = 16
+    timestamps = np.arange(n_windows) * hop
+    embeddings = []
+
+    for batch_start in range(0, n_windows, batch_size):
+        if cancel_flag and getattr(cancel_flag, '_cancel', False):
+            return np.array([]), np.empty((0, edim))
+        batch_end = min(batch_start + batch_size, n_windows)
+        chunks = []
+        for i in range(batch_start, batch_end):
+            start = i * hop_samples
+            chunks.append(y[start:start + win_samples])
+        with torch.no_grad():
+            waveforms = torch.from_numpy(np.stack(chunks)).float().to(device)
+            if is_beats:
+                padding_mask = torch.zeros_like(waveforms, dtype=torch.bool)
+                features, _ = model.extract_features(waveforms, padding_mask=padding_mask)
+            else:
+                features, _ = model(waveforms)
+            batch_emb = features.mean(dim=1).cpu().numpy()
+        embeddings.append(batch_emb)
+
+    result_ts = timestamps
+    result_emb = np.vstack(embeddings)
+
+    # Save to cache
+    if cache_file:
+        try:
+            os.makedirs(_W2V_CACHE_DIR, exist_ok=True)
+            np.savez(cache_file, timestamps=result_ts, embeddings=result_emb)
+            _log(f"audio_scan: w2v cache saved ({cache_file})")
+        except Exception as e:
+            _log(f"audio_scan: cache write failed: {e}")
+
+    return result_ts, result_emb
+
+
+def _extract_w2v_targeted(y: np.ndarray, sr: int, gt_intense: list[float],
+                          gt_soft: list[float], tolerance: float = 12.0,
+                          neg_margin: float = 120.0,
+                          model_name: str | None = None,
+                          ) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """Extract embeddings only near positives and distant negatives.
+
+    Returns (timestamps, embeddings, labels) where labels: 1=pos, -1=neg, 0=ambig.
+    """
+    edim = _embed_dim(model_name)
+    duration = len(y) / sr
+    win_samples = int(_WINDOW * sr)
+    all_gt = list(gt_intense) + list(gt_soft)
+
+    # Positive windows: every second near intense markers
+    pos_times = set()
+    for gt in gt_intense:
+        for offset in range(-int(tolerance), int(tolerance) + 1):
+            t = gt + offset
+            if 0 <= t <= duration - _WINDOW:
+                pos_times.add(int(t))
+
+    # Negative windows: every 4s, far from any marker
+    neg_times = set()
+    for t in range(0, int(duration - _WINDOW), 4):
+        if min((abs(t - g) for g in all_gt), default=9999) > neg_margin:
+            neg_times.add(t)
+
+    all_times = sorted(pos_times | neg_times)
+    # Filter out windows that go past the end
+    valid_times = [t for t in all_times if int(t * sr) + win_samples <= len(y)]
+
+    if not valid_times:
+        return np.array([]), np.zeros((0, edim)), np.array([], dtype=int)
+
+    import torch
+    model, device = _get_w2v_model(model_name)
+    batch_size = 16
+    timestamps_list: list[float] = []
+    embeddings_list: list[np.ndarray] = []
+
+    is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
+
+    for batch_start in range(0, len(valid_times), batch_size):
+        batch_end = min(batch_start + batch_size, len(valid_times))
+        chunks = []
+        for t in valid_times[batch_start:batch_end]:
+            start = int(t * sr)
+            chunks.append(y[start:start + win_samples])
+            timestamps_list.append(float(t))
+        with torch.no_grad():
+            waveforms = torch.from_numpy(np.stack(chunks)).float().to(device)
+            if is_beats:
+                padding_mask = torch.zeros_like(waveforms, dtype=torch.bool)
+                features, _ = model.extract_features(waveforms, padding_mask=padding_mask)
+            else:
+                features, _ = model(waveforms)
+            batch_emb = features.mean(dim=1).cpu().numpy()
+        embeddings_list.append(batch_emb)
+
+    timestamps = np.array(timestamps_list)
+    embeddings = np.vstack(embeddings_list)
+
+    labels = np.zeros(len(timestamps), dtype=int)
+    for i, t in enumerate(timestamps):
+        di = min((abs(t - g) for g in gt_intense), default=9999)
+        da = min((abs(t - g) for g in all_gt), default=9999)
+        if di < tolerance:
+            labels[i] = 1
+        elif da > neg_margin:
+            labels[i] = -1
+    return timestamps, embeddings, labels
+
+
+# ---------------------------------------------------------------------------
+# Classifier mode — train / save / load / scan
+# ---------------------------------------------------------------------------
+
+def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
+                     model_path: str | None = None,
+                     tolerance: float = 12.0,
+                     neg_margin: float = 120.0,
+                     embed_model: str | None = None) -> dict:
+    """Train a classifier from labeled videos.
+
+    Args:
+        video_infos: list of (video_path, intense_times, soft_times)
+        model_path: if given, save model to this path
+        tolerance/neg_margin: labeling parameters
+        embed_model: embedding model name (e.g. "HUBERT_BASE", "BEATS"), defaults to WAV2VEC2_BASE
+
+    Returns:
+        dict with 'classifier', 'embed_model', and metadata, or None on failure.
+    """
+    from sklearn.ensemble import GradientBoostingClassifier
+
+    all_X, all_y = [], []
+
+    for vi, (vpath, gt_intense, gt_soft) in enumerate(video_infos):
+        _log(f"audio_scan: training [{vi+1}/{len(video_infos)}] {os.path.basename(vpath)}")
+        y, _ = librosa.load(vpath, sr=_SR, mono=True)
+
+        timestamps, embeddings, labels = _extract_w2v_targeted(
+            y, _SR, gt_intense, gt_soft, tolerance, neg_margin,
+            model_name=embed_model,
+        )
+        if len(timestamps) == 0:
+            continue
+        # Per-video z-score normalize
+        vid_mean = embeddings.mean(axis=0)
+        vid_std = np.maximum(embeddings.std(axis=0), 1e-6)
+        normed = (embeddings - vid_mean) / vid_std
+        for i in range(len(labels)):
+            if labels[i] == 1:
+                all_X.append(normed[i])
+                all_y.append(1)
+            elif labels[i] == -1:
+                all_X.append(normed[i])
+                all_y.append(0)
+
+    if not all_X:
+        _log("audio_scan: no training samples collected")
+        return None
+
+    X = np.stack(all_X)
+    y_arr = np.array(all_y)
+    n_pos = (y_arr == 1).sum()
+    n_neg = (y_arr == 0).sum()
+    _log(f"audio_scan: training set — {n_pos} positive, {n_neg} negative")
+
+    if n_pos == 0 or n_neg == 0:
+        _log(f"audio_scan: need both classes — {n_pos} pos, {n_neg} neg")
+        return None
+
+    # Subsample negatives for balance
+    rng = np.random.RandomState(42)
+    pos_idx = np.where(y_arr == 1)[0]
+    neg_idx = np.where(y_arr == 0)[0]
+    n_neg_sample = min(len(neg_idx), len(pos_idx) * 3)
+    neg_sample = rng.choice(neg_idx, n_neg_sample, replace=False)
+    train_idx = np.concatenate([pos_idx, neg_sample])
+    rng.shuffle(train_idx)
+
+    clf = GradientBoostingClassifier(
+        n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42,
+    )
+    clf.fit(X[train_idx], y_arr[train_idx])
+    _log("audio_scan: classifier trained")
+
+    model = {"classifier": clf, "n_features": X.shape[1],
+             "embed_model": embed_model or _DEFAULT_EMBED_MODEL}
+
+    if model_path:
+        import joblib
+        parent = os.path.dirname(model_path)
+        if parent:
+            os.makedirs(parent, exist_ok=True)
+        joblib.dump(model, model_path)
+        _log(f"audio_scan: model saved to {model_path}")
+
+    return model
+
+
+def load_classifier(model_path: str) -> dict | None:
+    """Load a saved classifier model."""
+    if not os.path.exists(model_path):
+        return None
+    import joblib
+    return joblib.load(model_path)
+
+
+def default_model_path(profile_name: str = "default") -> str:
+    """Return the default path for a profile's classifier model."""
+    return os.path.join(_MODEL_DIR, f"{profile_name}.joblib")
+
+
+# ---------------------------------------------------------------------------
+# Scanning
+# ---------------------------------------------------------------------------
+
+def scan_video(
+    video_path: str,
+    model: dict = None,
+    threshold: float = 0.30,
+    hop: float = 1.0,
+    window: float = _WINDOW,
+    cancel_flag: object = None,
+) -> list[tuple[float, float, float]]:
+    """Scan a video for matching audio regions using a trained classifier.
+
+    Returns list of (start_time, end_time, score) above threshold.
+    """
+    if model is None:
+        _log("audio_scan: no model provided")
+        return []
+
+    _log(f"audio_scan: loading {video_path}")
+    y, sr = librosa.load(video_path, sr=_SR, mono=True)
+    duration = len(y) / sr
+    _log(f"audio_scan: {duration:.1f}s loaded, extracting features...")
+
+    if cancel_flag and getattr(cancel_flag, '_cancel', False):
+        return []
+
+    clf = model["classifier"]
+    embed_model = model.get("embed_model")
+
+    _log(f"audio_scan: extracting embeddings ({embed_model or 'default'})...")
+    timestamps, window_vectors = _extract_w2v_windows(
+        y, sr, hop=hop, window=window, video_path=video_path,
+        cancel_flag=cancel_flag, model_name=embed_model,
+    )
+    if len(timestamps) == 0:
+        _log("audio_scan: video shorter than window")
+        return []
+
+    # Per-video z-score normalize
+    vid_mean = window_vectors.mean(axis=0)
+    vid_std = np.maximum(window_vectors.std(axis=0), 1e-6)
+    normed = (window_vectors - vid_mean) / vid_std
+
+    _log(f"audio_scan: classifying {len(normed)} windows...")
+
+    if cancel_flag and getattr(cancel_flag, '_cancel', False):
+        return []
+
+    probs = clf.predict_proba(normed)[:, 1]
+    mask = probs >= threshold
+    results = [
+        (timestamps[i], timestamps[i] + window, float(probs[i]))
+        for i in np.nonzero(mask)[0]
+    ]
+    _log(f"audio_scan: {len(results)} regions above threshold {threshold}")
+    return results
@@ -0,0 +1,783 @@
+# --------------------------------------------------------
+# BEATs: Audio Pre-Training with Acoustic Tokenizers (https://arxiv.org/abs/2212.09058)
+# Github source: https://github.com/microsoft/unilm/tree/master/beats
+# Copyright (c) 2022 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Based on fairseq code bases
+# https://github.com/pytorch/fairseq
+# --------------------------------------------------------
+
+import math
+import numpy as np
+from typing import Dict, Optional, Tuple
+import torch
+from torch import Tensor, nn
+import torch.nn.functional as F
+from torch.nn import LayerNorm, Parameter
+from .beats_modules import (
+    GradMultiply,
+    SamePad,
+    get_activation_fn,
+    GLU_Linear,
+    quant_noise,
+)
+
+
+class TransformerEncoder(nn.Module):
+    def __init__(self, args):
+        super().__init__()
+
+        self.dropout = args.dropout
+        self.embedding_dim = args.encoder_embed_dim
+
+        self.pos_conv = nn.Conv1d(
+            self.embedding_dim,
+            self.embedding_dim,
+            kernel_size=args.conv_pos,
+            padding=args.conv_pos // 2,
+            groups=args.conv_pos_groups,
+        )
+        dropout = 0
+        std = math.sqrt((4 * (1.0 - dropout)) / (args.conv_pos * self.embedding_dim))
+        nn.init.normal_(self.pos_conv.weight, mean=0, std=std)
+        nn.init.constant_(self.pos_conv.bias, 0)
+
+        self.pos_conv = nn.utils.weight_norm(self.pos_conv, name="weight", dim=2)
+        self.pos_conv = nn.Sequential(self.pos_conv, SamePad(args.conv_pos), nn.GELU())
+
+        if hasattr(args, "relative_position_embedding"):
+            self.relative_position_embedding = args.relative_position_embedding
+            self.num_buckets = args.num_buckets
+            self.max_distance = args.max_distance
+        else:
+            self.relative_position_embedding = False
+            self.num_buckets = 0
+            self.max_distance = 0
+
+        self.layers = nn.ModuleList(
+            [
+                TransformerSentenceEncoderLayer(
+                    embedding_dim=self.embedding_dim,
+                    ffn_embedding_dim=args.encoder_ffn_embed_dim,
+                    num_attention_heads=args.encoder_attention_heads,
+                    dropout=self.dropout,
+                    attention_dropout=args.attention_dropout,
+                    activation_dropout=args.activation_dropout,
+                    activation_fn=args.activation_fn,
+                    layer_norm_first=args.layer_norm_first,
+                    deep_norm=args.deep_norm,
+                    has_relative_attention_bias=self.relative_position_embedding,
+                    num_buckets=self.num_buckets,
+                    max_distance=self.max_distance,
+                    gru_rel_pos=args.gru_rel_pos,
+                    encoder_layers=args.encoder_layers,
+                )
+                for i in range(args.encoder_layers)
+            ]
+        )
+        if self.relative_position_embedding:
+            for i in range(1, args.encoder_layers):
+                del self.layers[i].self_attn.relative_attention_bias
+                self.layers[i].self_attn.relative_attention_bias = self.layers[0].self_attn.relative_attention_bias
+
+        self.layer_norm_first = args.layer_norm_first
+        self.layer_norm = LayerNorm(self.embedding_dim)
+        self.layerdrop = args.encoder_layerdrop
+
+        self.apply(init_bert_params)
+
+        if args.deep_norm:
+            deep_norm_beta = math.pow(8 * args.encoder_layers, -1 / 4)
+            for i in range(args.encoder_layers):
+                nn.init.xavier_normal_(self.layers[i].self_attn.k_proj.weight, gain=1)
+                nn.init.xavier_normal_(self.layers[i].self_attn.v_proj.weight, gain=deep_norm_beta)
+                nn.init.xavier_normal_(self.layers[i].self_attn.q_proj.weight, gain=1)
+                nn.init.xavier_normal_(self.layers[i].self_attn.out_proj.weight, gain=deep_norm_beta)
+                nn.init.xavier_normal_(self.layers[i].fc1.weight, gain=deep_norm_beta)
+                nn.init.xavier_normal_(self.layers[i].fc2.weight, gain=deep_norm_beta)
+
+        self.layer_wise_gradient_decay_ratio = getattr(args, "layer_wise_gradient_decay_ratio", 1)
+
+    def forward(self, x, padding_mask=None, layer=None):
+        x, layer_results = self.extract_features(x, padding_mask, layer)
+
+        if self.layer_norm_first and layer is None:
+            x = self.layer_norm(x)
+
+        return x, layer_results
+
+    def extract_features(self, x, padding_mask=None, tgt_layer=None):
+
+        if padding_mask is not None:
+            x[padding_mask] = 0
+
+        x_conv = self.pos_conv(x.transpose(1, 2))
+        x_conv = x_conv.transpose(1, 2)
+        x = x + x_conv
+
+        if not self.layer_norm_first:
+            x = self.layer_norm(x)
+
+        x = F.dropout(x, p=self.dropout, training=self.training)
+
+        # B x T x C -> T x B x C
+        x = x.transpose(0, 1)
+
+        layer_results = []
+        z = None
+        if tgt_layer is not None:
+            layer_results.append((x, z))
+        r = None
+        pos_bias = None
+        for i, layer in enumerate(self.layers):
+            if self.layer_wise_gradient_decay_ratio != 1.0:
+                x = GradMultiply.apply(x, self.layer_wise_gradient_decay_ratio)
+            dropout_probability = np.random.random()
+            if not self.training or (dropout_probability > self.layerdrop):
+                x, z, pos_bias = layer(x, self_attn_padding_mask=padding_mask, need_weights=False, pos_bias=pos_bias)
+            if tgt_layer is not None:
+                layer_results.append((x, z))
+            if i == tgt_layer:
+                r = x
+                break
+
+        if r is not None:
+            x = r
+
+        # T x B x C -> B x T x C
+        x = x.transpose(0, 1)
+
+        return x, layer_results
+
+
+class TransformerSentenceEncoderLayer(nn.Module):
+    def __init__(
+            self,
+            embedding_dim: float = 768,
+            ffn_embedding_dim: float = 3072,
+            num_attention_heads: float = 8,
+            dropout: float = 0.1,
+            attention_dropout: float = 0.1,
+            activation_dropout: float = 0.1,
+            activation_fn: str = "relu",
+            layer_norm_first: bool = False,
+            deep_norm: bool = False,
+            has_relative_attention_bias: bool = False,
+            num_buckets: int = 0,
+            max_distance: int = 0,
+            rescale_init: bool = False,
+            gru_rel_pos: bool = False,
+            encoder_layers: int = 0,
+    ) -> None:
+
+        super().__init__()
+        self.embedding_dim = embedding_dim
+        self.dropout = dropout
+        self.activation_dropout = activation_dropout
+
+        self.activation_name = activation_fn
+        self.activation_fn = get_activation_fn(activation_fn)
+        self.self_attn = MultiheadAttention(
+            self.embedding_dim,
+            num_attention_heads,
+            dropout=attention_dropout,
+            self_attention=True,
+            has_relative_attention_bias=has_relative_attention_bias,
+            num_buckets=num_buckets,
+            max_distance=max_distance,
+            rescale_init=rescale_init,
+            gru_rel_pos=gru_rel_pos,
+        )
+
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(self.activation_dropout)
+        self.dropout3 = nn.Dropout(dropout)
+
+        self.layer_norm_first = layer_norm_first
+
+        self.self_attn_layer_norm = LayerNorm(self.embedding_dim)
+
+        if self.activation_name == "glu":
+            self.fc1 = GLU_Linear(self.embedding_dim, ffn_embedding_dim, "swish")
+        else:
+            self.fc1 = nn.Linear(self.embedding_dim, ffn_embedding_dim)
+        self.fc2 = nn.Linear(ffn_embedding_dim, self.embedding_dim)
+
+        self.final_layer_norm = LayerNorm(self.embedding_dim)
+
+        self.deep_norm = deep_norm
+        if self.deep_norm:
+            self.deep_norm_alpha = math.pow(2 * encoder_layers, 1 / 4)
+        else:
+            self.deep_norm_alpha = 1
+
+    def forward(
+            self,
+            x: torch.Tensor,
+            self_attn_mask: torch.Tensor = None,
+            self_attn_padding_mask: torch.Tensor = None,
+            need_weights: bool = False,
+            pos_bias=None
+    ):
+        residual = x
+
+        if self.layer_norm_first:
+            x = self.self_attn_layer_norm(x)
+            x, attn, pos_bias = self.self_attn(
+                query=x,
+                key=x,
+                value=x,
+                key_padding_mask=self_attn_padding_mask,
+                need_weights=False,
+                attn_mask=self_attn_mask,
+                position_bias=pos_bias
+            )
+            x = self.dropout1(x)
+            x = residual + x
+
+            residual = x
+            x = self.final_layer_norm(x)
+            if self.activation_name == "glu":
+                x = self.fc1(x)
+            else:
+                x = self.activation_fn(self.fc1(x))
+            x = self.dropout2(x)
+            x = self.fc2(x)
+            x = self.dropout3(x)
+            x = residual + x
+        else:
+            x, attn, pos_bias = self.self_attn(
+                query=x,
+                key=x,
+                value=x,
+                key_padding_mask=self_attn_padding_mask,
+                need_weights=need_weights,
+                attn_mask=self_attn_mask,
+                position_bias=pos_bias
+            )
+
+            x = self.dropout1(x)
+            x = residual * self.deep_norm_alpha + x
+
+            x = self.self_attn_layer_norm(x)
+
+            residual = x
+            if self.activation_name == "glu":
+                x = self.fc1(x)
+            else:
+                x = self.activation_fn(self.fc1(x))
+            x = self.dropout2(x)
+            x = self.fc2(x)
+            x = self.dropout3(x)
+            x = residual * self.deep_norm_alpha + x
+            x = self.final_layer_norm(x)
+
+        return x, attn, pos_bias
+
+
+class MultiheadAttention(nn.Module):
+    """Multi-headed attention.
+
+    See "Attention Is All You Need" for more details.
+    """
+
+    def __init__(
+            self,
+            embed_dim,
+            num_heads,
+            kdim=None,
+            vdim=None,
+            dropout=0.0,
+            bias=True,
+            add_bias_kv=False,
+            add_zero_attn=False,
+            self_attention=False,
+            encoder_decoder_attention=False,
+            q_noise=0.0,
+            qn_block_size=8,
+            has_relative_attention_bias=False,
+            num_buckets=32,
+            max_distance=128,
+            gru_rel_pos=False,
+            rescale_init=False,
+    ):
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.kdim = kdim if kdim is not None else embed_dim
+        self.vdim = vdim if vdim is not None else embed_dim
+        self.qkv_same_dim = self.kdim == embed_dim and self.vdim == embed_dim
+
+        self.num_heads = num_heads
+        self.dropout_module = nn.Dropout(dropout)
+
+        self.has_relative_attention_bias = has_relative_attention_bias
+        self.num_buckets = num_buckets
+        self.max_distance = max_distance
+        if self.has_relative_attention_bias:
+            self.relative_attention_bias = nn.Embedding(num_buckets, num_heads)
+
+        self.head_dim = embed_dim // num_heads
+        self.q_head_dim = self.head_dim
+        self.k_head_dim = self.head_dim
+        assert (
+                self.head_dim * num_heads == self.embed_dim
+        ), "embed_dim must be divisible by num_heads"
+        self.scaling = self.head_dim ** -0.5
+
+        self.self_attention = self_attention
+        self.encoder_decoder_attention = encoder_decoder_attention
+
+        assert not self.self_attention or self.qkv_same_dim, (
+            "Self-attention requires query, key and " "value to be of the same size"
+        )
+
+        k_bias = True
+        if rescale_init:
+            k_bias = False
+
+        k_embed_dim = embed_dim
+        q_embed_dim = embed_dim
+
+        self.k_proj = quant_noise(
+            nn.Linear(self.kdim, k_embed_dim, bias=k_bias), q_noise, qn_block_size
+        )
+        self.v_proj = quant_noise(
+            nn.Linear(self.vdim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+        self.q_proj = quant_noise(
+            nn.Linear(embed_dim, q_embed_dim, bias=bias), q_noise, qn_block_size
+        )
+
+        self.out_proj = quant_noise(
+            nn.Linear(embed_dim, embed_dim, bias=bias), q_noise, qn_block_size
+        )
+
+        if add_bias_kv:
+            self.bias_k = Parameter(torch.Tensor(1, 1, embed_dim))
+            self.bias_v = Parameter(torch.Tensor(1, 1, embed_dim))
+        else:
+            self.bias_k = self.bias_v = None
+
+        self.add_zero_attn = add_zero_attn
+
+        self.gru_rel_pos = gru_rel_pos
+        if self.gru_rel_pos:
+            self.grep_linear = nn.Linear(self.q_head_dim, 8)
+            self.grep_a = nn.Parameter(torch.ones(1, num_heads, 1, 1))
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        if self.qkv_same_dim:
+            # Empirically observed the convergence to be much better with
+            # the scaled initialization
+            nn.init.xavier_uniform_(self.k_proj.weight, gain=1 / math.sqrt(2))
+            nn.init.xavier_uniform_(self.v_proj.weight, gain=1 / math.sqrt(2))
+            nn.init.xavier_uniform_(self.q_proj.weight, gain=1 / math.sqrt(2))
+        else:
+            nn.init.xavier_uniform_(self.k_proj.weight)
+            nn.init.xavier_uniform_(self.v_proj.weight)
+            nn.init.xavier_uniform_(self.q_proj.weight)
+
+        nn.init.xavier_uniform_(self.out_proj.weight)
+        if self.out_proj.bias is not None:
+            nn.init.constant_(self.out_proj.bias, 0.0)
+        if self.bias_k is not None:
+            nn.init.xavier_normal_(self.bias_k)
+        if self.bias_v is not None:
+            nn.init.xavier_normal_(self.bias_v)
+        if self.has_relative_attention_bias:
+            nn.init.xavier_normal_(self.relative_attention_bias.weight)
+
+    def _relative_positions_bucket(self, relative_positions, bidirectional=True):
+        num_buckets = self.num_buckets
+        max_distance = self.max_distance
+        relative_buckets = 0
+
+        if bidirectional:
+            num_buckets = num_buckets // 2
+            relative_buckets += (relative_positions > 0).to(torch.long) * num_buckets
+            relative_positions = torch.abs(relative_positions)
+        else:
+            relative_positions = -torch.min(relative_positions, torch.zeros_like(relative_positions))
+
+        max_exact = num_buckets // 2
+        is_small = relative_positions < max_exact
+
+        relative_postion_if_large = max_exact + (
+                torch.log(relative_positions.float() / max_exact)
+                / math.log(max_distance / max_exact)
+                * (num_buckets - max_exact)
+        ).to(torch.long)
+        relative_postion_if_large = torch.min(
+            relative_postion_if_large, torch.full_like(relative_postion_if_large, num_buckets - 1)
+        )
+
+        relative_buckets += torch.where(is_small, relative_positions, relative_postion_if_large)
+        return relative_buckets
+
+    def compute_bias(self, query_length, key_length):
+        context_position = torch.arange(query_length, dtype=torch.long)[:, None]
+        memory_position = torch.arange(key_length, dtype=torch.long)[None, :]
+        relative_position = memory_position - context_position
+        relative_position_bucket = self._relative_positions_bucket(
+            relative_position,
+            bidirectional=True
+        )
+        relative_position_bucket = relative_position_bucket.to(self.relative_attention_bias.weight.device)
+        values = self.relative_attention_bias(relative_position_bucket)
+        values = values.permute([2, 0, 1])
+        return values
+
+    def forward(
+            self,
+            query,
+            key: Optional[Tensor],
+            value: Optional[Tensor],
+            key_padding_mask: Optional[Tensor] = None,
+            incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]] = None,
+            need_weights: bool = True,
+            static_kv: bool = False,
+            attn_mask: Optional[Tensor] = None,
+            before_softmax: bool = False,
+            need_head_weights: bool = False,
+            position_bias: Optional[Tensor] = None
+    ) -> Tuple[Tensor, Optional[Tensor], Optional[Tensor]]:
+        """Input shape: Time x Batch x Channel
+
+        Args:
+            key_padding_mask (ByteTensor, optional): mask to exclude
+                keys that are pads, of shape `(batch, src_len)`, where
+                padding elements are indicated by 1s.
+            need_weights (bool, optional): return the attention weights,
+                averaged over heads (default: False).
+            attn_mask (ByteTensor, optional): typically used to
+                implement causal attention, where the mask prevents the
+                attention from looking forward in time (default: None).
+            before_softmax (bool, optional): return the raw attention
+                weights and values before the attention softmax.
+            need_head_weights (bool, optional): return the attention
+                weights for each head. Implies *need_weights*. Default:
+                return the average attention weights over all heads.
+        """
+        if need_head_weights:
+            need_weights = True
+
+        is_tpu = query.device.type == "xla"
+
+        tgt_len, bsz, embed_dim = query.size()
+        src_len = tgt_len
+        assert embed_dim == self.embed_dim
+        assert list(query.size()) == [tgt_len, bsz, embed_dim]
+        if key is not None:
+            src_len, key_bsz, _ = key.size()
+            if not torch.jit.is_scripting():
+                assert key_bsz == bsz
+                assert value is not None
+                assert src_len, bsz == value.shape[:2]
+
+        if self.has_relative_attention_bias and position_bias is None:
+            position_bias = self.compute_bias(tgt_len, src_len)
+            position_bias = position_bias.unsqueeze(0).repeat(bsz, 1, 1, 1).view(bsz * self.num_heads, tgt_len, src_len)
+
+        if incremental_state is not None:
+            saved_state = self._get_input_buffer(incremental_state)
+            if saved_state is not None and "prev_key" in saved_state:
+                # previous time steps are cached - no need to recompute
+                # key and value if they are static
+                if static_kv:
+                    assert self.encoder_decoder_attention and not self.self_attention
+                    key = value = None
+        else:
+            saved_state = None
+
+        if self.self_attention:
+            q = self.q_proj(query)
+            k = self.k_proj(query)
+            v = self.v_proj(query)
+        elif self.encoder_decoder_attention:
+            # encoder-decoder attention
+            q = self.q_proj(query)
+            if key is None:
+                assert value is None
+                k = v = None
+            else:
+                k = self.k_proj(key)
+                v = self.v_proj(key)
+
+        else:
+            assert key is not None and value is not None
+            q = self.q_proj(query)
+            k = self.k_proj(key)
+            v = self.v_proj(value)
+        q *= self.scaling
+        alpha = 32
+        q *= 1 / alpha
+
+        if self.bias_k is not None:
+            assert self.bias_v is not None
+            k = torch.cat([k, self.bias_k.repeat(1, bsz, 1)])
+            v = torch.cat([v, self.bias_v.repeat(1, bsz, 1)])
+            if attn_mask is not None:
+                attn_mask = torch.cat(
+                    [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1
+                )
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [
+                        key_padding_mask,
+                        key_padding_mask.new_zeros(key_padding_mask.size(0), 1),
+                    ],
+                    dim=1,
+                )
+
+        q = (
+            q.contiguous()
+                .view(tgt_len, bsz * self.num_heads, self.q_head_dim)
+                .transpose(0, 1)
+        )
+        if k is not None:
+            k = (
+                k.contiguous()
+                    .view(-1, bsz * self.num_heads, self.k_head_dim)
+                    .transpose(0, 1)
+            )
+        if v is not None:
+            v = (
+                v.contiguous()
+                    .view(-1, bsz * self.num_heads, self.head_dim)
+                    .transpose(0, 1)
+            )
+
+        if saved_state is not None:
+            # saved states are stored with shape (bsz, num_heads, seq_len, head_dim)
+            if "prev_key" in saved_state:
+                _prev_key = saved_state["prev_key"]
+                assert _prev_key is not None
+                prev_key = _prev_key.view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    k = prev_key
+                else:
+                    assert k is not None
+                    k = torch.cat([prev_key, k], dim=1)
+                src_len = k.size(1)
+            if "prev_value" in saved_state:
+                _prev_value = saved_state["prev_value"]
+                assert _prev_value is not None
+                prev_value = _prev_value.view(bsz * self.num_heads, -1, self.head_dim)
+                if static_kv:
+                    v = prev_value
+                else:
+                    assert v is not None
+                    v = torch.cat([prev_value, v], dim=1)
+            prev_key_padding_mask: Optional[Tensor] = None
+            if "prev_key_padding_mask" in saved_state:
+                prev_key_padding_mask = saved_state["prev_key_padding_mask"]
+            assert k is not None and v is not None
+            key_padding_mask = MultiheadAttention._append_prev_key_padding_mask(
+                key_padding_mask=key_padding_mask,
+                prev_key_padding_mask=prev_key_padding_mask,
+                batch_size=bsz,
+                src_len=k.size(1),
+                static_kv=static_kv,
+            )
+
+            saved_state["prev_key"] = k.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state["prev_value"] = v.view(bsz, self.num_heads, -1, self.head_dim)
+            saved_state["prev_key_padding_mask"] = key_padding_mask
+            # In this branch incremental_state is never None
+            assert incremental_state is not None
+            incremental_state = self._set_input_buffer(incremental_state, saved_state)
+        assert k is not None
+        assert k.size(1) == src_len
+
+        # This is part of a workaround to get around fork/join parallelism
+        # not supporting Optional types.
+        if key_padding_mask is not None and key_padding_mask.dim() == 0:
+            key_padding_mask = None
+
+        if key_padding_mask is not None:
+            assert key_padding_mask.size(0) == bsz
+            assert key_padding_mask.size(1) == src_len
+
+        if self.add_zero_attn:
+            assert v is not None
+            src_len += 1
+            k = torch.cat([k, k.new_zeros((k.size(0), 1) + k.size()[2:])], dim=1)
+            v = torch.cat([v, v.new_zeros((v.size(0), 1) + v.size()[2:])], dim=1)
+            if attn_mask is not None:
+                attn_mask = torch.cat(
+                    [attn_mask, attn_mask.new_zeros(attn_mask.size(0), 1)], dim=1
+                )
+            if key_padding_mask is not None:
+                key_padding_mask = torch.cat(
+                    [
+                        key_padding_mask,
+                        torch.zeros(key_padding_mask.size(0), 1).type_as(
+                            key_padding_mask
+                        ),
+                    ],
+                    dim=1,
+                )
+
+        attn_weights = torch.bmm(q, k.transpose(1, 2))
+        attn_weights = (attn_weights - attn_weights.max(dim=-1, keepdim=True)[0]) * alpha
+        attn_weights = self.apply_sparse_mask(attn_weights, tgt_len, src_len, bsz)
+
+        assert list(attn_weights.size()) == [bsz * self.num_heads, tgt_len, src_len]
+
+        if attn_mask is not None:
+            attn_mask = attn_mask.unsqueeze(0)
+            attn_weights += attn_mask
+
+        if key_padding_mask is not None:
+            # don't attend to padding symbols
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            if not is_tpu:
+                attn_weights = attn_weights.masked_fill(
+                    key_padding_mask.unsqueeze(1).unsqueeze(2).to(torch.bool),
+                    float("-inf"),
+                )
+            else:
+                attn_weights = attn_weights.transpose(0, 2)
+                attn_weights = attn_weights.masked_fill(key_padding_mask, float("-inf"))
+                attn_weights = attn_weights.transpose(0, 2)
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        if before_softmax:
+            return attn_weights, v, position_bias
+
+        if position_bias is not None:
+            attn_mask_rel_pos = position_bias
+            if self.gru_rel_pos == 1:
+                query_layer = q.view(bsz, self.num_heads, tgt_len, self.q_head_dim) * alpha / self.scaling
+                _B, _H, _L, __ = query_layer.size()
+                gate_a, gate_b = torch.sigmoid(self.grep_linear(query_layer).view(
+                    _B, _H, _L, 2, 4).sum(-1, keepdim=False)).chunk(2, dim=-1)
+                gate_a_1 = gate_a * (gate_b * self.grep_a - 1.0) + 2.0
+                attn_mask_rel_pos = gate_a_1.view(bsz * self.num_heads, tgt_len, 1) * position_bias
+
+            attn_mask_rel_pos = attn_mask_rel_pos.view(attn_weights.size())
+
+            attn_weights = attn_weights + attn_mask_rel_pos
+
+        attn_weights_float = F.softmax(
+            attn_weights, dim=-1
+        )
+        attn_weights = attn_weights_float.type_as(attn_weights)
+        attn_probs = self.dropout_module(attn_weights)
+
+        assert v is not None
+        attn = torch.bmm(attn_probs, v)
+        assert list(attn.size()) == [bsz * self.num_heads, tgt_len, self.head_dim]
+        attn = attn.transpose(0, 1).contiguous().view(tgt_len, bsz, embed_dim)
+        attn = self.out_proj(attn)
+        attn_weights: Optional[Tensor] = None
+        if need_weights:
+            attn_weights = attn_weights_float.view(
+                bsz, self.num_heads, tgt_len, src_len
+            ).transpose(1, 0)
+            if not need_head_weights:
+                # average attention weights over heads
+                attn_weights = attn_weights.mean(dim=0)
+
+        return attn, attn_weights, position_bias
+
+    @staticmethod
+    def _append_prev_key_padding_mask(
+            key_padding_mask: Optional[Tensor],
+            prev_key_padding_mask: Optional[Tensor],
+            batch_size: int,
+            src_len: int,
+            static_kv: bool,
+    ) -> Optional[Tensor]:
+        # saved key padding masks have shape (bsz, seq_len)
+        if prev_key_padding_mask is not None and static_kv:
+            new_key_padding_mask = prev_key_padding_mask
+        elif prev_key_padding_mask is not None and key_padding_mask is not None:
+            new_key_padding_mask = torch.cat(
+                [prev_key_padding_mask.float(), key_padding_mask.float()], dim=1
+            )
+        # During incremental decoding, as the padding token enters and
+        # leaves the frame, there will be a time when prev or current
+        # is None
+        elif prev_key_padding_mask is not None:
+            if src_len > prev_key_padding_mask.size(1):
+                filler = torch.zeros(
+                    (batch_size, src_len - prev_key_padding_mask.size(1)),
+                    device=prev_key_padding_mask.device,
+                )
+                new_key_padding_mask = torch.cat(
+                    [prev_key_padding_mask.float(), filler.float()], dim=1
+                )
+            else:
+                new_key_padding_mask = prev_key_padding_mask.float()
+        elif key_padding_mask is not None:
+            if src_len > key_padding_mask.size(1):
+                filler = torch.zeros(
+                    (batch_size, src_len - key_padding_mask.size(1)),
+                    device=key_padding_mask.device,
+                )
+                new_key_padding_mask = torch.cat(
+                    [filler.float(), key_padding_mask.float()], dim=1
+                )
+            else:
+                new_key_padding_mask = key_padding_mask.float()
+        else:
+            new_key_padding_mask = prev_key_padding_mask
+        return new_key_padding_mask
+
+    def _get_input_buffer(
+            self, incremental_state: Optional[Dict[str, Dict[str, Optional[Tensor]]]]
+    ) -> Dict[str, Optional[Tensor]]:
+        result = self.get_incremental_state(incremental_state, "attn_state")
+        if result is not None:
+            return result
+        else:
+            empty_result: Dict[str, Optional[Tensor]] = {}
+            return empty_result
+
+    def _set_input_buffer(
+            self,
+            incremental_state: Dict[str, Dict[str, Optional[Tensor]]],
+            buffer: Dict[str, Optional[Tensor]],
+    ):
+        return self.set_incremental_state(incremental_state, "attn_state", buffer)
+
+    def apply_sparse_mask(self, attn_weights, tgt_len: int, src_len: int, bsz: int):
+        return attn_weights
+
+
+def init_bert_params(module):
+    """
+    Initialize the weights specific to the BERT Model.
+    This overrides the default initializations depending on the specified arguments.
+        1. If normal_init_linear_weights is set then weights of linear
+           layer will be initialized using the normal distribution and
+           bais will be set to the specified value.
+        2. If normal_init_embed_weights is set then weights of embedding
+           layer will be initialized using the normal distribution.
+        3. If normal_init_proj_weights is set then weights of
+           in_project_weight for MultiHeadAttention initialized using
+           the normal distribution (to be validated).
+    """
+
+    def normal_(data):
+        # with FSDP, module params will be on CUDA, so we cast them back to CPU
+        # so that the RNG is consistent with and without FSDP
+        data.copy_(
+            data.cpu().normal_(mean=0.0, std=0.02).to(data.device)
+        )
+
+    if isinstance(module, nn.Linear):
+        normal_(module.weight.data)
+        if module.bias is not None:
+            module.bias.data.zero_()
+    if isinstance(module, nn.Embedding):
+        normal_(module.weight.data)
+        if module.padding_idx is not None:
+            module.weight.data[module.padding_idx].zero_()
+    if isinstance(module, MultiheadAttention):
+        normal_(module.q_proj.weight.data)
+        normal_(module.k_proj.weight.data)
+        normal_(module.v_proj.weight.data)
@@ -0,0 +1,179 @@
+# --------------------------------------------------------
+# BEATs: Audio Pre-Training with Acoustic Tokenizers (https://arxiv.org/abs/2212.09058)
+# Github source: https://github.com/microsoft/unilm/tree/master/beats
+# Copyright (c) 2022 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Based on fairseq code bases
+# https://github.com/pytorch/fairseq
+# --------------------------------------------------------
+
+
+import torch
+import torch.nn as nn
+from torch.nn import LayerNorm
+import torchaudio.compliance.kaldi as ta_kaldi
+
+from .beats_backbone import (
+    TransformerEncoder,
+)
+
+import logging
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+class BEATsConfig:
+    def __init__(self, cfg=None):
+        self.input_patch_size: int = -1  # path size of patch embedding
+        self.embed_dim: int = 512  # patch embedding dimension
+        self.conv_bias: bool = False  # include bias in conv encoder
+
+        self.encoder_layers: int = 12  # num encoder layers in the transformer
+        self.encoder_embed_dim: int = 768  # encoder embedding dimension
+        self.encoder_ffn_embed_dim: int = 3072  # encoder embedding dimension for FFN
+        self.encoder_attention_heads: int = 12  # num encoder attention heads
+        self.activation_fn: str = "gelu"  # activation function to use
+
+        self.layer_wise_gradient_decay_ratio: float = 1.0  # ratio for layer-wise gradient decay
+        self.layer_norm_first: bool = False  # apply layernorm first in the transformer
+        self.deep_norm: bool = False  # apply deep_norm first in the transformer
+
+        # dropouts
+        self.dropout: float = 0.1  # dropout probability for the transformer
+        self.attention_dropout: float = 0.1  # dropout probability for attention weights
+        self.activation_dropout: float = 0.0  # dropout probability after activation in FFN
+        self.encoder_layerdrop: float = 0.0  # probability of dropping a tarnsformer layer
+        self.dropout_input: float = 0.0  # dropout to apply to the input (after feat extr)
+
+        # positional embeddings
+        self.conv_pos: int = 128  # number of filters for convolutional positional embeddings
+        self.conv_pos_groups: int = 16  # number of groups for convolutional positional embedding
+
+        # relative position embedding
+        self.relative_position_embedding: bool = False  # apply relative position embedding
+        self.num_buckets: int = 320  # number of buckets for relative position embedding
+        self.max_distance: int = 1280  # maximum distance for relative position embedding
+        self.gru_rel_pos: bool = False  # apply gated relative position embedding
+
+        # label predictor
+        self.finetuned_model: bool = False  # whether the model is a fine-tuned model.
+        self.predictor_dropout: float = 0.1  # dropout probability for the predictor
+        self.predictor_class: int = 527  # target class number for the predictor
+
+        if cfg is not None:
+            self.update(cfg)
+
+    def update(self, cfg: dict):
+        self.__dict__.update(cfg)
+
+
+class BEATs(nn.Module):
+    def __init__(
+            self,
+            cfg: BEATsConfig,
+    ) -> None:
+        super().__init__()
+        logger.info(f"BEATs Config: {cfg.__dict__}")
+
+        self.cfg = cfg
+
+        self.embed = cfg.embed_dim
+        self.post_extract_proj = (
+            nn.Linear(self.embed, cfg.encoder_embed_dim)
+            if self.embed != cfg.encoder_embed_dim
+            else None
+        )
+
+        self.input_patch_size = cfg.input_patch_size
+        self.patch_embedding = nn.Conv2d(1, self.embed, kernel_size=self.input_patch_size, stride=self.input_patch_size,
+                                         bias=cfg.conv_bias)
+
+        self.dropout_input = nn.Dropout(cfg.dropout_input)
+
+        assert not cfg.deep_norm or not cfg.layer_norm_first
+        self.encoder = TransformerEncoder(cfg)
+        self.layer_norm = LayerNorm(self.embed)
+
+        if cfg.finetuned_model:
+            self.predictor_dropout = nn.Dropout(cfg.predictor_dropout)
+            self.predictor = nn.Linear(cfg.encoder_embed_dim, cfg.predictor_class)
+        else:
+            self.predictor = None
+
+    def forward_padding_mask(
+            self,
+            features: torch.Tensor,
+            padding_mask: torch.Tensor,
+    ) -> torch.Tensor:
+        extra = padding_mask.size(1) % features.size(1)
+        if extra > 0:
+            padding_mask = padding_mask[:, :-extra]
+        padding_mask = padding_mask.view(
+            padding_mask.size(0), features.size(1), -1
+        )
+        padding_mask = padding_mask.all(-1)
+        return padding_mask
+
+    def preprocess(
+            self,
+            source: torch.Tensor,
+            fbank_mean: float = 15.41663,
+            fbank_std: float = 6.55582,
+    ) -> torch.Tensor:
+        fbanks = []
+        for waveform in source:
+            waveform = waveform.unsqueeze(0) * 2 ** 15
+            fbank = ta_kaldi.fbank(waveform, num_mel_bins=128, sample_frequency=16000, frame_length=25, frame_shift=10)
+            fbanks.append(fbank)
+        fbank = torch.stack(fbanks, dim=0)
+        fbank = (fbank - fbank_mean) / (2 * fbank_std)
+        return fbank
+
+    def extract_features(
+            self,
+            source: torch.Tensor,
+            padding_mask: Optional[torch.Tensor] = None,
+            fbank_mean: float = 15.41663,
+            fbank_std: float = 6.55582,
+    ):
+        fbank = self.preprocess(source, fbank_mean=fbank_mean, fbank_std=fbank_std)
+
+        if padding_mask is not None:
+            padding_mask = self.forward_padding_mask(fbank, padding_mask)
+
+        fbank = fbank.unsqueeze(1)
+        features = self.patch_embedding(fbank)
+        features = features.reshape(features.shape[0], features.shape[1], -1)
+        features = features.transpose(1, 2)
+        features = self.layer_norm(features)
+
+        if padding_mask is not None:
+            padding_mask = self.forward_padding_mask(features, padding_mask)
+
+        if self.post_extract_proj is not None:
+            features = self.post_extract_proj(features)
+
+        x = self.dropout_input(features)
+
+        x, layer_results = self.encoder(
+            x,
+            padding_mask=padding_mask,
+        )
+
+        if self.predictor is not None:
+            x = self.predictor_dropout(x)
+            logits = self.predictor(x)
+
+            if padding_mask is not None and padding_mask.any():
+                logits[padding_mask] = 0
+                logits = logits.sum(dim=1)
+                logits = logits / (~padding_mask).sum(dim=1).unsqueeze(-1).expand_as(logits)
+            else:
+                logits = logits.mean(dim=1)
+
+            lprobs = torch.sigmoid(logits)
+
+            return lprobs, padding_mask
+        else:
+            return x, padding_mask
@@ -0,0 +1,219 @@
+# --------------------------------------------------------
+# BEATs: Audio Pre-Training with Acoustic Tokenizers (https://arxiv.org/abs/2212.09058)
+# Github source: https://github.com/microsoft/unilm/tree/master/beats
+# Copyright (c) 2022 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Based on fairseq code bases
+# https://github.com/pytorch/fairseq
+# --------------------------------------------------------
+
+import math
+import warnings
+import torch
+from torch import Tensor, nn
+import torch.nn.functional as F
+
+
+class GradMultiply(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, x, scale):
+        ctx.scale = scale
+        res = x.new(x)
+        return res
+
+    @staticmethod
+    def backward(ctx, grad):
+        return grad * ctx.scale, None
+
+
+class SamePad(nn.Module):
+    def __init__(self, kernel_size, causal=False):
+        super().__init__()
+        if causal:
+            self.remove = kernel_size - 1
+        else:
+            self.remove = 1 if kernel_size % 2 == 0 else 0
+
+    def forward(self, x):
+        if self.remove > 0:
+            x = x[:, :, : -self.remove]
+        return x
+
+
+class Swish(nn.Module):
+    def __init__(self):
+        super(Swish, self).__init__()
+        self.act = torch.nn.Sigmoid()
+
+    def forward(self, x):
+        return x * self.act(x)
+
+
+class GLU_Linear(nn.Module):
+    def __init__(self, input_dim, output_dim, glu_type="sigmoid", bias_in_glu=True):
+        super(GLU_Linear, self).__init__()
+
+        self.glu_type = glu_type
+        self.output_dim = output_dim
+
+        if glu_type == "sigmoid":
+            self.glu_act = torch.nn.Sigmoid()
+        elif glu_type == "swish":
+            self.glu_act = Swish()
+        elif glu_type == "relu":
+            self.glu_act = torch.nn.ReLU()
+        elif glu_type == "gelu":
+            self.glu_act = torch.nn.GELU()
+
+        if bias_in_glu:
+            self.linear = nn.Linear(input_dim, output_dim * 2, True)
+        else:
+            self.linear = nn.Linear(input_dim, output_dim * 2, False)
+
+    def forward(self, x):
+        # to be consistent with GLU_Linear, we assume the input always has the #channel (#dim) in the last dimension of the tensor, so need to switch the dimension first for 1D-Conv case
+        x = self.linear(x)
+
+        if self.glu_type == "bilinear":
+            x = (x[:, :, 0:self.output_dim] * x[:, :, self.output_dim:self.output_dim * 2])
+        else:
+            x = (x[:, :, 0:self.output_dim] * self.glu_act(x[:, :, self.output_dim:self.output_dim * 2]))
+
+        return x
+
+
+def gelu_accurate(x):
+    if not hasattr(gelu_accurate, "_a"):
+        gelu_accurate._a = math.sqrt(2 / math.pi)
+    return (
+        0.5 * x * (1 + torch.tanh(gelu_accurate._a * (x + 0.044715 * torch.pow(x, 3))))
+    )
+
+
+def gelu(x: torch.Tensor) -> torch.Tensor:
+    return torch.nn.functional.gelu(x.float()).type_as(x)
+
+
+def get_activation_fn(activation: str):
+    """Returns the activation function corresponding to `activation`"""
+
+    if activation == "relu":
+        return F.relu
+    elif activation == "gelu":
+        return gelu
+    elif activation == "gelu_fast":
+        warnings.warn(
+            "--activation-fn=gelu_fast has been renamed to gelu_accurate"
+        )
+        return gelu_accurate
+    elif activation == "gelu_accurate":
+        return gelu_accurate
+    elif activation == "tanh":
+        return torch.tanh
+    elif activation == "linear":
+        return lambda x: x
+    elif activation == "glu":
+        return lambda x: x
+    else:
+        raise RuntimeError("--activation-fn {} not supported".format(activation))
+
+
+def quant_noise(module, p, block_size):
+    """
+    Wraps modules and applies quantization noise to the weights for
+    subsequent quantization with Iterative Product Quantization as
+    described in "Training with Quantization Noise for Extreme Model Compression"
+
+    Args:
+        - module: nn.Module
+        - p: amount of Quantization Noise
+        - block_size: size of the blocks for subsequent quantization with iPQ
+
+    Remarks:
+        - Module weights must have the right sizes wrt the block size
+        - Only Linear, Embedding and Conv2d modules are supported for the moment
+        - For more detail on how to quantize by blocks with convolutional weights,
+          see "And the Bit Goes Down: Revisiting the Quantization of Neural Networks"
+        - We implement the simplest form of noise here as stated in the paper
+          which consists in randomly dropping blocks
+    """
+
+    # if no quantization noise, don't register hook
+    if p <= 0:
+        return module
+
+    # supported modules
+    assert isinstance(module, (nn.Linear, nn.Embedding, nn.Conv2d))
+
+    # test whether module.weight has the right sizes wrt block_size
+    is_conv = module.weight.ndim == 4
+
+    # 2D matrix
+    if not is_conv:
+        assert (
+            module.weight.size(1) % block_size == 0
+        ), "Input features must be a multiple of block sizes"
+
+    # 4D matrix
+    else:
+        # 1x1 convolutions
+        if module.kernel_size == (1, 1):
+            assert (
+                module.in_channels % block_size == 0
+            ), "Input channels must be a multiple of block sizes"
+        # regular convolutions
+        else:
+            k = module.kernel_size[0] * module.kernel_size[1]
+            assert k % block_size == 0, "Kernel size must be a multiple of block size"
+
+    def _forward_pre_hook(mod, input):
+        # no noise for evaluation
+        if mod.training:
+            if not is_conv:
+                # gather weight and sizes
+                weight = mod.weight
+                in_features = weight.size(1)
+                out_features = weight.size(0)
+
+                # split weight matrix into blocks and randomly drop selected blocks
+                mask = torch.zeros(
+                    in_features // block_size * out_features, device=weight.device
+                )
+                mask.bernoulli_(p)
+                mask = mask.repeat_interleave(block_size, -1).view(-1, in_features)
+
+            else:
+                # gather weight and sizes
+                weight = mod.weight
+                in_channels = mod.in_channels
+                out_channels = mod.out_channels
+
+                # split weight matrix into blocks and randomly drop selected blocks
+                if mod.kernel_size == (1, 1):
+                    mask = torch.zeros(
+                        int(in_channels // block_size * out_channels),
+                        device=weight.device,
+                    )
+                    mask.bernoulli_(p)
+                    mask = mask.repeat_interleave(block_size, -1).view(-1, in_channels)
+                else:
+                    mask = torch.zeros(
+                        weight.size(0), weight.size(1), device=weight.device
+                    )
+                    mask.bernoulli_(p)
+                    mask = (
+                        mask.unsqueeze(2)
+                        .unsqueeze(3)
+                        .repeat(1, 1, mod.kernel_size[0], mod.kernel_size[1])
+                    )
+
+            # scale weights and apply mask
+            mask = mask.to(
+                torch.bool
+            )  # x.bool() is not currently supported in TorchScript
+            s = 1 / (1 - p)
+            mod.weight.data = s * weight.masked_fill(mask, 0)
+
+    module.register_forward_pre_hook(_forward_pre_hook)
+    return module
+
@@ -0,0 +1,353 @@
+import os
+import sqlite3
+import threading
+from datetime import datetime, timezone
+from pathlib import Path
+
+from .paths import _log
+
+
+class ProcessedDB:
+    _SCHEMA_VERSION = 4  # bump when schema changes
+
+    def __init__(self, db_path: str | None = None):
+        if db_path is None:
+            db_path = str(Path.home() / ".8cut.db")
+        self._path = db_path
+        self._lock = threading.Lock()
+        try:
+            self._con = sqlite3.connect(db_path, check_same_thread=False)
+            self._migrate()
+            self._enabled = True
+            _log(f"DB opened: {db_path}")
+        except Exception as e:
+            _log(f"DB unavailable: {e}")
+            self._con = None
+            self._enabled = False
+
+    def _migrate(self) -> None:
+        """Create table if missing, then add any new columns for old DBs."""
+        cols = {
+            row[1]
+            for row in self._con.execute("PRAGMA table_info(processed)").fetchall()
+        }
+        if not cols:
+            # Fresh DB — create from scratch
+            self._con.execute(
+                "CREATE TABLE IF NOT EXISTS processed ("
+                "  id              INTEGER PRIMARY KEY AUTOINCREMENT,"
+                "  filename        TEXT    NOT NULL,"
+                "  start_time      REAL    NOT NULL,"
+                "  output_path     TEXT    NOT NULL,"
+                "  label           TEXT    NOT NULL DEFAULT '',"
+                "  category        TEXT    NOT NULL DEFAULT '',"
+                "  short_side      INTEGER DEFAULT 512,"
+                "  portrait_ratio  TEXT    NOT NULL DEFAULT '',"
+                "  crop_center     REAL    NOT NULL DEFAULT 0.5,"
+                "  format          TEXT    NOT NULL DEFAULT 'MP4',"
+                "  clip_count      INTEGER NOT NULL DEFAULT 3,"
+                "  spread          REAL    NOT NULL DEFAULT 3.0,"
+                "  profile         TEXT    NOT NULL DEFAULT 'default',"
+                "  source_path     TEXT    NOT NULL DEFAULT '',"
+                "  processed_at    TEXT    NOT NULL"
+                ")"
+            )
+        else:
+            # Add missing columns to legacy tables
+            new_cols = {
+                "label":          "TEXT NOT NULL DEFAULT ''",
+                "category":       "TEXT NOT NULL DEFAULT ''",
+                "short_side":     "INTEGER DEFAULT 512",
+                "portrait_ratio": "TEXT NOT NULL DEFAULT ''",
+                "crop_center":    "REAL NOT NULL DEFAULT 0.5",
+                "format":         "TEXT NOT NULL DEFAULT 'MP4'",
+                "clip_count":     "INTEGER NOT NULL DEFAULT 3",
+                "spread":         "REAL NOT NULL DEFAULT 3.0",
+                "profile":        "TEXT NOT NULL DEFAULT 'default'",
+                "source_path":    "TEXT NOT NULL DEFAULT ''",
+            }
+            for col, typedef in new_cols.items():
+                if col not in cols:
+                    self._con.execute(
+                        f"ALTER TABLE processed ADD COLUMN {col} {typedef}"
+                    )
+        self._con.execute(
+            "CREATE INDEX IF NOT EXISTS idx_filename ON processed(filename)"
+        )
+        self._con.execute(
+            "CREATE TABLE IF NOT EXISTS hidden_files ("
+            "  filename  TEXT NOT NULL,"
+            "  profile   TEXT NOT NULL DEFAULT 'default',"
+            "  PRIMARY KEY (filename, profile)"
+            ")"
+        )
+        self._con.commit()
+
+    def add(self, filename: str, start_time: float, output_path: str,
+            label: str = "", category: str = "",
+            short_side: int | None = None, portrait_ratio: str = "",
+            crop_center: float = 0.5, fmt: str = "MP4",
+            clip_count: int = 3, spread: float = 3.0,
+            profile: str = "default", source_path: str = "") -> None:
+        if not self._enabled:
+            return
+        with self._lock:
+            self._con.execute(
+                "INSERT INTO processed"
+                " (filename, start_time, output_path, label, category,"
+                "  short_side, portrait_ratio, crop_center, format,"
+                "  clip_count, spread, profile, source_path, processed_at)"
+                " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+                (filename, start_time, output_path, label, category,
+                 short_side, portrait_ratio, crop_center, fmt,
+                 clip_count, spread, profile, source_path,
+                 datetime.now(timezone.utc).isoformat()),
+            )
+            self._con.commit()
+
+    def get_labels(self) -> list[str]:
+        """Return distinct non-empty labels ordered by most recently used."""
+        if not self._enabled:
+            return []
+        rows = self._con.execute(
+            "SELECT DISTINCT label FROM processed"
+            " WHERE label != '' ORDER BY processed_at DESC"
+        ).fetchall()
+        # Deduplicate while preserving order (DISTINCT on processed_at DESC
+        # may return duplicates if the same label was used multiple times).
+        seen: set[str] = set()
+        result = []
+        for (lbl,) in rows:
+            if lbl not in seen:
+                seen.add(lbl)
+                result.append(lbl)
+        return result
+
+    def get_by_output_path(self, output_path: str) -> dict | None:
+        """Return config dict for an output_path, or None."""
+        if not self._enabled:
+            return None
+        cur = self._con.cursor()
+        cur.row_factory = sqlite3.Row
+        row = cur.execute(
+            "SELECT label, category, short_side, portrait_ratio, crop_center, format,"
+            " clip_count, spread"
+            " FROM processed WHERE output_path = ?",
+            (output_path,),
+        ).fetchone()
+        return dict(row) if row else None
+
+    def delete_by_output_path(self, output_path: str) -> None:
+        if not self._enabled:
+            return
+        with self._lock:
+            self._con.execute("DELETE FROM processed WHERE output_path = ?", (output_path,))
+            self._con.commit()
+
+    def get_group(self, output_path: str) -> list[str]:
+        """Return all output_paths sharing the same (filename, start_time) as *output_path*."""
+        if not self._enabled:
+            return []
+        row = self._con.execute(
+            "SELECT filename, start_time FROM processed WHERE output_path = ?",
+            (output_path,),
+        ).fetchone()
+        if not row:
+            return []
+        rows = self._con.execute(
+            "SELECT output_path FROM processed"
+            " WHERE filename = ? AND start_time = ? ORDER BY output_path",
+            (row[0], row[1]),
+        ).fetchall()
+        return [r[0] for r in rows]
+
+    def delete_group(self, output_path: str) -> list[str]:
+        """Delete all rows sharing the same (filename, start_time) as *output_path*.
+        Returns list of deleted output_paths."""
+        if not self._enabled:
+            return []
+        with self._lock:
+            row = self._con.execute(
+                "SELECT filename, start_time FROM processed WHERE output_path = ?",
+                (output_path,),
+            ).fetchone()
+            if not row:
+                return []
+            filename, start_time = row
+            paths = [r[0] for r in self._con.execute(
+                "SELECT output_path FROM processed WHERE filename = ? AND start_time = ?",
+                (filename, start_time),
+            ).fetchall()]
+            self._con.execute(
+                "DELETE FROM processed WHERE filename = ? AND start_time = ?",
+                (filename, start_time),
+            )
+            self._con.commit()
+            return paths
+
+    def _get_markers_for(self, match: str, profile: str = "default") -> list[tuple[float, int, str]]:
+        rows = self._con.execute(
+            "SELECT start_time, output_path FROM processed"
+            " WHERE filename = ? AND profile = ? ORDER BY start_time",
+            (match, profile),
+        ).fetchall()
+        # Deduplicate by start_time — batch exports share the same cursor.
+        seen_times: dict[float, tuple[float, int, str]] = {}
+        n = 0
+        for t, p in rows:
+            if t not in seen_times:
+                n += 1
+                seen_times[t] = (t, n, p)
+        return list(seen_times.values())
+
+    def get_markers(self, filename: str, profile: str = "default") -> list[tuple[float, int, str]]:
+        """Return [(start_time, marker_number, output_path), ...] for exact
+        filename match, sorted by start_time. Empty list if no match."""
+        if not self._enabled:
+            return []
+        return self._get_markers_for(filename, profile)
+
+    def get_profiles(self) -> list[str]:
+        """Return distinct profile names, ordered alphabetically."""
+        if not self._enabled:
+            return []
+        rows = self._con.execute(
+            "SELECT DISTINCT profile FROM processed ORDER BY profile"
+        ).fetchall()
+        return [r[0] for r in rows]
+
+    def get_all_export_paths(self, profile: str = "default") -> list[str]:
+        """Return all unique output_path values for a given profile."""
+        if not self._enabled:
+            return []
+        rows = self._con.execute(
+            "SELECT DISTINCT output_path FROM processed WHERE profile = ?",
+            (profile,),
+        ).fetchall()
+        return [r[0] for r in rows]
+
+    def get_export_folders(self, profile: str = "default") -> list[str]:
+        """Return distinct export folder names found in output_paths for a profile.
+
+        Export paths follow the structure:
+            .../export_folder/group_dir/clip.mp4
+        The export folder is 2 levels up from the clip file.
+        Returns folder names sorted alphabetically (e.g. ["mp4_Intense", "mp4_Soft"]).
+        """
+        if not self._enabled:
+            return []
+        rows = self._con.execute(
+            "SELECT DISTINCT output_path FROM processed WHERE profile = ?",
+            (profile,),
+        ).fetchall()
+        folder_names: set[str] = set()
+        for (op,) in rows:
+            grandparent = os.path.basename(os.path.dirname(os.path.dirname(op)))
+            if grandparent:
+                folder_names.add(grandparent)
+        return sorted(folder_names)
+
+    def get_training_data(self, profile: str, positive_folder: str,
+                          fallback_video_dir: str = "",
+                          ) -> list[tuple[str, list[float], list[float]]]:
+        """Build training video_infos from DB data.
+
+        Args:
+            profile: profile name
+            positive_folder: export folder name for positive class (e.g. "mp4_Intense")
+            fallback_video_dir: if source_path is empty, try filename in this dir
+
+        Returns:
+            list of (source_video_path, positive_times, soft_times) per video.
+            Soft times = clips from any other export folder.
+        """
+        if not self._enabled:
+            return []
+        rows = self._con.execute(
+            "SELECT filename, start_time, output_path, source_path"
+            " FROM processed WHERE profile = ?",
+            (profile,),
+        ).fetchall()
+
+        # Collect times by video, split by positive vs other folders
+        pos_by_video: dict[str, set[float]] = {}
+        soft_by_video: dict[str, set[float]] = {}
+        source_by_filename: dict[str, str] = {}
+
+        for fn, st, op, sp in rows:
+            if sp:
+                source_by_filename[fn] = sp
+            grandparent = os.path.basename(os.path.dirname(os.path.dirname(op)))
+            if grandparent == positive_folder:
+                pos_by_video.setdefault(fn, set()).add(st)
+            else:
+                soft_by_video.setdefault(fn, set()).add(st)
+
+        result = []
+        for fn in pos_by_video:
+            sp = source_by_filename.get(fn, "")
+            if not sp or not os.path.exists(sp):
+                # Fallback: try video_dir / filename
+                if fallback_video_dir:
+                    sp = os.path.join(fallback_video_dir, fn)
+            if not sp or not os.path.exists(sp):
+                continue
+            gt_pos = sorted(pos_by_video[fn])
+            gt_soft = sorted(soft_by_video.get(fn, set()))
+            result.append((sp, gt_pos, gt_soft))
+        return result
+
+    def get_training_stats(self, profile: str) -> dict[str, dict]:
+        """Return per-subprofile stats for training readiness display.
+
+        Returns dict mapping subprofile_name → {
+            'videos': number of distinct source videos,
+            'clips': total clip count,
+        }
+        """
+        if not self._enabled:
+            return {}
+        rows = self._con.execute(
+            "SELECT filename, output_path FROM processed WHERE profile = ?",
+            (profile,),
+        ).fetchall()
+        folders = self.get_export_folders(profile)
+        stats: dict[str, dict] = {}
+        for folder_name in folders:
+            videos: set[str] = set()
+            clips = 0
+            for fn, op in rows:
+                grandparent = os.path.basename(os.path.dirname(os.path.dirname(op)))
+                if grandparent == folder_name:
+                    videos.add(fn)
+                    clips += 1
+            stats[folder_name] = {"videos": len(videos), "clips": clips}
+        return stats
+
+    def hide_file(self, filename: str, profile: str = "default") -> None:
+        if not self._enabled:
+            return
+        with self._lock:
+            self._con.execute(
+                "INSERT OR IGNORE INTO hidden_files (filename, profile) VALUES (?, ?)",
+                (filename, profile),
+            )
+            self._con.commit()
+
+    def unhide_file(self, filename: str, profile: str = "default") -> None:
+        if not self._enabled:
+            return
+        with self._lock:
+            self._con.execute(
+                "DELETE FROM hidden_files WHERE filename = ? AND profile = ?",
+                (filename, profile),
+            )
+            self._con.commit()
+
+    def get_hidden_files(self, profile: str = "default") -> set[str]:
+        if not self._enabled:
+            return set()
+        rows = self._con.execute(
+            "SELECT filename FROM hidden_files WHERE profile = ?", (profile,)
+        ).fetchall()
+        return {r[0] for r in rows}
@@ -0,0 +1,127 @@
+import os
+import subprocess
+import threading
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Callable
+
+from .ffmpeg import build_ffmpeg_command, build_audio_extract_command
+from .paths import _log
+
+
+class ExportRunner:
+    """Run ffmpeg export jobs in a background thread pool.
+
+    Callbacks:
+        on_clip_done(path: str)
+        on_all_done()
+        on_error(msg: str)
+    """
+
+    def __init__(
+        self,
+        input_path: str,
+        jobs: list[tuple[float, str, str | None, float]],
+        short_side: int | None = None,
+        image_sequence: bool = False,
+        max_workers: int | None = None,
+        encoder: str = "libx264",
+        on_clip_done: Callable[[str], None] | None = None,
+        on_all_done: Callable[[], None] | None = None,
+        on_error: Callable[[str], None] | None = None,
+    ):
+        self._input = input_path
+        self._jobs = jobs
+        self._short_side = short_side
+        self._image_sequence = image_sequence
+        self._max_workers = max_workers
+        self._encoder = encoder
+        self._on_clip_done = on_clip_done
+        self._on_all_done = on_all_done
+        self._on_error = on_error
+        self._cancel = False
+        self._procs: list[subprocess.Popen] = []
+        self._procs_lock = threading.Lock()
+        self._thread: threading.Thread | None = None
+
+    def start(self):
+        self._thread = threading.Thread(target=self._run, daemon=True)
+        self._thread.start()
+
+    def cancel(self):
+        self._cancel = True
+        with self._procs_lock:
+            for proc in self._procs:
+                try:
+                    proc.kill()
+                except OSError:
+                    pass
+
+    def is_running(self) -> bool:
+        return self._thread is not None and self._thread.is_alive()
+
+    def _run_one(self, start: float, output: str,
+                 portrait_ratio: str | None, crop_center: float) -> str:
+        if self._cancel:
+            raise RuntimeError("cancelled")
+        if self._image_sequence:
+            os.makedirs(output, exist_ok=True)
+        cmd = build_ffmpeg_command(
+            self._input, start, output,
+            short_side=self._short_side,
+            portrait_ratio=portrait_ratio,
+            crop_center=crop_center,
+            image_sequence=self._image_sequence,
+            encoder=self._encoder,
+        )
+        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        with self._procs_lock:
+            self._procs.append(proc)
+        try:
+            _, stderr = proc.communicate(timeout=120)
+        except subprocess.TimeoutExpired:
+            proc.kill()
+            raise RuntimeError("ffmpeg timed out")
+        finally:
+            with self._procs_lock:
+                self._procs.remove(proc)
+        if self._cancel:
+            raise RuntimeError("cancelled")
+        if proc.returncode != 0:
+            msg = stderr.decode(errors='replace')[-500:] if stderr else "ffmpeg failed"
+            raise RuntimeError(msg)
+        if self._image_sequence:
+            audio_cmd = build_audio_extract_command(self._input, start, output)
+            audio_result = subprocess.run(audio_cmd, capture_output=True, text=True, timeout=60)
+            if audio_result.returncode != 0:
+                msg = (audio_result.stderr or "audio extraction failed")[-500:]
+                raise RuntimeError(msg)
+        return output
+
+    def _run(self):
+        cap = self._max_workers or (os.cpu_count() or 2)
+        workers = min(len(self._jobs), cap)
+        try:
+            with ThreadPoolExecutor(max_workers=workers) as pool:
+                futures = {
+                    pool.submit(self._run_one, s, o, pr, cc): o
+                    for s, o, pr, cc in self._jobs
+                }
+                for fut in as_completed(futures):
+                    if self._cancel:
+                        break
+                    try:
+                        path = fut.result()
+                        if self._on_clip_done:
+                            self._on_clip_done(path)
+                    except Exception as e:
+                        if "cancelled" not in str(e) and self._on_error:
+                            self._on_error(str(e))
+                        return
+        except Exception as e:
+            if self._on_error:
+                self._on_error(str(e))
+            return
+        if self._cancel:
+            return
+        if self._on_all_done:
+            self._on_all_done()
@@ -0,0 +1,160 @@
+import os
+import re
+import subprocess
+
+from .paths import _bin, _log
+
+
+_RATIOS: dict[str, tuple[int, int]] = {
+    "9:16": (9, 16),
+    "4:5":  (4, 5),
+    "1:1":  (1, 1),
+}
+
+
+def _portrait_crop_filter(ratio: str, crop_center: float) -> str:
+    """Return an ffmpeg crop= filter expression for the given portrait ratio.
+
+    Uses ffmpeg expression syntax so source dimensions are resolved at runtime.
+    Commas inside min()/max() are escaped with \\, to prevent ffmpeg's
+    filtergraph parser from treating them as filter-chain separators.
+    """
+    num, den = _RATIOS[ratio]
+    cw = f"ih*{num}/{den}"
+    x = f"max(0\\,min((iw-{cw})*{crop_center}\\,iw-{cw}))"
+    return f"crop={cw}:ih:{x}:0"
+
+
+def resolve_keyframe(
+    keyframes: list[tuple[float, float, str | None, bool, bool]],
+    t: float,
+    tolerance: float = 0.05,
+) -> tuple[float, float, str | None, bool, bool] | None:
+    """Return the latest keyframe at or before *t*, or None."""
+    result = None
+    for kf in keyframes:
+        if kf[0] <= t + tolerance:
+            result = kf
+        else:
+            break
+    return result
+
+
+def apply_keyframes_to_jobs(
+    jobs: list[tuple[float, str, str | None, float]],
+    keyframes: list[tuple[float, float, str | None, bool, bool]],
+    base_center: float,
+    base_ratio: str | None,
+    base_rand_p: bool,
+    base_rand_s: bool,
+) -> list[tuple[float, str, str | None, float, bool, bool]]:
+    """Resolve each job's crop state from keyframes, returning widened tuples.
+
+    Returns list of (start, path, ratio, center, rand_portrait, rand_square).
+    """
+    result = []
+    for s, o, _r, _c in jobs:
+        kf = resolve_keyframe(keyframes, s)
+        if kf is not None:
+            _, center, ratio, rp, rs = kf
+        else:
+            center, ratio, rp, rs = base_center, base_ratio, base_rand_p, base_rand_s
+        result.append((s, o, ratio, center, rp, rs))
+    return result
+
+
+def build_ffmpeg_command(
+    input_path: str, start: float, output_path: str,
+    short_side: int | None = None,
+    portrait_ratio: str | None = None,
+    crop_center: float = 0.5,
+    image_sequence: bool = False,
+    encoder: str = "libx264",
+) -> list[str]:
+    # -ss before -i: fast input-seeking. Safe here because we always re-encode,
+    # so there is no keyframe-alignment issue from pre-input seek.
+    # Image sequences always use libwebp, so skip HW encoder setup.
+    use_hw_vaapi = encoder == "h264_vaapi" and not image_sequence
+    cmd = [_bin("ffmpeg"), "-y"]
+
+    # VAAPI needs a device for hardware context.
+    if use_hw_vaapi:
+        cmd += ["-hwaccel", "vaapi", "-hwaccel_output_format", "vaapi",
+                "-vaapi_device", "/dev/dri/renderD128"]
+
+    cmd += [
+        "-threads", "0",
+        "-ss", str(start),
+        "-i", input_path,
+        "-t", "8",
+    ]
+
+    filters: list[str] = []
+    if portrait_ratio is not None:
+        filters.append(_portrait_crop_filter(portrait_ratio, crop_center))
+    if short_side is not None:
+        # Scale so the shorter dimension equals short_side.
+        filters.append(
+            f"scale='if(lt(iw,ih),{short_side},-2)':'if(lt(iw,ih),-2,{short_side})':flags=lanczos"
+        )
+
+    # VAAPI: decoded frames are GPU surfaces. CPU filters need hwdownload first.
+    if use_hw_vaapi:
+        if filters:
+            filters.insert(0, "hwdownload")
+            filters.insert(1, "format=nv12")
+        filters.append("format=nv12")
+        filters.append("hwupload")
+
+    if filters:
+        cmd += ["-vf", ",".join(filters)]
+
+    if image_sequence:
+        cmd += [
+            "-an",
+            "-c:v", "libwebp",
+            "-quality", "92",
+            "-compression_level", "1",
+            os.path.join(output_path, "frame_%04d.webp"),
+        ]
+    else:
+        cmd += ["-c:v", encoder, "-c:a", "pcm_s16le", output_path]
+    return cmd
+
+
+def build_audio_extract_command(input_path: str, start: float, sequence_dir: str) -> list[str]:
+    """Return an ffmpeg command that extracts audio to <sequence_dir>.wav."""
+    audio_path = sequence_dir + ".wav"
+    return [
+        _bin("ffmpeg"), "-y",
+        "-ss", str(start),
+        "-i", input_path,
+        "-t", "8",
+        "-vn",
+        "-c:a", "pcm_s16le",
+        audio_path,
+    ]
+
+
+def detect_hw_encoders() -> list[str]:
+    """Probe ffmpeg for available H.264 hardware encoders."""
+    _HW_ENCODERS = ["h264_nvenc", "h264_vaapi", "h264_qsv", "h264_amf", "h264_videotoolbox"]
+    try:
+        result = subprocess.run(
+            [_bin("ffmpeg"), "-hide_banner", "-encoders"],
+            capture_output=True, text=True, timeout=5,
+        )
+        if result.returncode != 0:
+            return []
+        output = result.stdout
+    except Exception:
+        return []
+    available = []
+    for enc in _HW_ENCODERS:
+        if re.search(rf'\b{enc}\b', output):
+            available.append(enc)
+    if available:
+        _log(f"HW encoders detected: {', '.join(available)}")
+    else:
+        _log("No HW encoders detected — GPU export unavailable")
+    return available
@@ -0,0 +1,44 @@
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+
+
+def _frozen_path() -> Path:
+    if getattr(sys, "frozen", False):
+        return Path(sys._MEIPASS)
+    return Path(__file__).resolve().parent.parent
+
+
+def _bin(name: str) -> str:
+    """Resolve a binary name (e.g. 'ffmpeg') to its full path in frozen builds."""
+    p = _frozen_path() / name
+    if p.exists():
+        return str(p)
+    return name  # fall back to PATH
+
+
+def _log(*args) -> None:
+    """Print a timestamped log line to stderr."""
+    ts = datetime.now().strftime("%H:%M:%S")
+    print(f"[8-cut {ts}]", *args, file=sys.stderr)
+
+
+def build_export_path(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
+    group = f"{basename}_{counter:03d}"
+    name = f"{group}_{sub}" if sub is not None else group
+    return os.path.join(folder, group, name + ".mp4")
+
+
+def build_sequence_dir(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
+    group = f"{basename}_{counter:03d}"
+    name = f"{group}_{sub}" if sub is not None else group
+    return os.path.join(folder, group, name)
+
+
+def format_time(seconds: float) -> str:
+    m = int(seconds // 60)
+    # Floor-truncate to 1 dp (not round) — prevents "X:60.0" rollover when
+    # seconds is e.g. 59.95.
+    s = int(seconds % 60 * 10) / 10
+    return f"{m}:{s:04.1f}"
@@ -0,0 +1,104 @@
+import os
+import subprocess
+import tempfile
+
+from .paths import _bin, _log
+
+_yolo_model = None
+
+
+def _get_yolo():
+    """Lazy-load YOLOv8-nano. Returns None if ultralytics is not installed."""
+    global _yolo_model
+    if _yolo_model is None:
+        try:
+            from ultralytics import YOLO
+            _yolo_model = YOLO("yolov8n.pt")
+            _log("YOLO model loaded")
+        except ImportError:
+            _log("ultralytics not installed — tracking disabled")
+            return None
+        except Exception as e:
+            _log(f"YOLO load failed: {e}")
+            return None
+    return _yolo_model
+
+
+def extract_frame_cv(video_path: str, time: float):
+    """Extract a single frame as a numpy array (BGR) via ffmpeg -> temp PNG -> cv2."""
+    try:
+        import cv2
+        import numpy as np
+    except ImportError:
+        return None
+    fd, tmp = tempfile.mkstemp(suffix=".png")
+    os.close(fd)
+    try:
+        cmd = [_bin("ffmpeg"), "-y", "-ss", str(time), "-i", video_path,
+               "-frames:v", "1", tmp]
+        result = subprocess.run(cmd, capture_output=True, timeout=10)
+        if result.returncode != 0:
+            return None
+        return cv2.imread(tmp)
+    except Exception:
+        return None
+    finally:
+        if os.path.exists(tmp):
+            os.unlink(tmp)
+
+
+def detect_subject_center(
+    video_path: str, time: float, target_cls: int | None, last_x: float, last_y: float,
+) -> tuple[int | None, float, float] | None:
+    """Detect objects at *time* and return (class_id, norm_x, norm_y) of the
+    best match to (target_cls, last_x, last_y).  Returns None on failure."""
+    model = _get_yolo()
+    if model is None:
+        return None
+    frame = extract_frame_cv(video_path, time)
+    if frame is None:
+        return None
+    results = model(frame, verbose=False)
+    if not results or len(results[0].boxes) == 0:
+        return None
+    h, w = frame.shape[:2]
+    dets = []
+    for box in results[0].boxes:
+        x1, y1, x2, y2 = box.xyxy[0].tolist()
+        cls = int(box.cls[0])
+        cx = (x1 + x2) / 2 / w
+        cy = (y1 + y2) / 2 / h
+        dets.append((cls, cx, cy))
+    # Prefer same class, nearest to last known position.
+    def score(d):
+        cls_penalty = 0 if (target_cls is None or d[0] == target_cls) else 1.0
+        dist = (d[1] - last_x) ** 2 + (d[2] - last_y) ** 2
+        return cls_penalty + dist
+    best = min(dets, key=score)
+    return best
+
+
+def track_centers_for_jobs(
+    video_path: str, cursor: float, crop_center: float,
+    starts: list[float],
+) -> list[float]:
+    """Run detection at the cursor (to identify the target) then at each start
+    time.  Returns a list of horizontal crop centers (one per start)."""
+    ref = detect_subject_center(video_path, cursor, None, crop_center, 0.5)
+    if ref is None:
+        _log("Tracking: no detection at cursor, using fixed center")
+        return [crop_center] * len(starts)
+    target_cls, last_x, last_y = ref
+    _log(f"Tracking: target class={target_cls} at ({last_x:.2f}, {last_y:.2f})")
+    centers = []
+    for t in starts:
+        det = detect_subject_center(video_path, t, target_cls, last_x, last_y)
+        if det is not None:
+            _, cx, cy = det
+            _log(f"  t={t:.2f}s → center={cx:.3f}")
+            centers.append(cx)
+            last_x, last_y = cx, cy
+        else:
+            _log(f"  t={t:.2f}s → lost, reusing {last_x:.3f}")
+            centers.append(last_x)
+    return centers
@@ -0,0 +1,24 @@
+services:
+  8cut:
+    build: .
+    ports:
+      - "8000:8000"
+    volumes:
+      - /path/to/videos:/videos:ro
+      - /path/to/exports:/exports
+      - 8cut-data:/data
+    environment:
+      MEDIA_DIRS: /videos
+      EXPORT_DIR: /exports
+      DB_PATH: /data/8cut.db
+      CACHE_DIR: /data/cache
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+
+volumes:
+  8cut-data:
@@ -0,0 +1,148 @@
+# 8-cut Client Design
+
+## Goal
+
+Build a Tauri + Svelte desktop client that connects to the 8-cut server API for remote video editing. Full feature parity with the Qt app. Targets Linux first, then Mac.
+
+## Architecture
+
+```
+Tauri app (Rust shell + Svelte webview)
+├── mpv sidecar (bundled binary)
+│   ├── plays video: http://server/api/stream/{path}?quality=low
+│   ├── plays audio: http://server/api/audio/{path}
+│   └── controlled via JSON IPC socket
+├── Svelte UI
+│   ├── File browser
+│   ├── Canvas timeline (markers, cursor, play region)
+│   ├── Canvas crop overlay
+│   ├── Export controls + WebSocket progress
+│   └── Settings panel (profile, subprofiles, quality)
+└── Rust backend
+    ├── Spawn/manage mpv process + IPC
+    ├── Proxy server API calls (avoid CORS)
+    └── Tauri commands exposed to Svelte frontend
+```
+
+## Playback
+
+mpv runs as a sidecar process, controlled via JSON IPC socket. Two streams:
+- Video: `http://server/api/stream/{path}?root={root}&quality={quality}` (transcoded, no audio)
+- Audio: `http://server/api/audio/{path}?root={root}` (full quality WAV)
+
+mpv's `--audio-file=` flag syncs both streams with frame-accurate seeking.
+
+Quality presets: potato (480p), low (720p), medium (1080p), high (original).
+
+## Features
+
+### File management
+- Browse server video roots (`GET /api/roots`, `GET /api/files`)
+- Hide/unhide files per profile (`POST/DELETE /api/hidden/{filename}`)
+- Sort by name/size, filter hidden
+
+### Playback
+- Play/pause/resume from pause point
+- AB-loop with current spread/clips settings
+- Play region adapts to spread changes without restarting
+- Quality selector
+
+### Timeline (Canvas)
+- Cursor position, markers, play position indicator
+- Click to seek, drag cursor
+- Lock mode: cursor locked to marker, double-click jumps to end of clip span
+- Autoclip: when paused, auto-adjust clip count to fit pause position
+
+### Crop & keyframes
+- Portrait ratio selector (9:16, 4:5, 1:1, off)
+- Crop center slider with live canvas overlay
+- Crop keyframes at arbitrary timeline positions
+- Subject tracking (triggered server-side)
+- Random portrait/square toggles
+
+### Export
+- Configurable: clips, spread, short side, format (MP4/WebP sequence)
+- Label + category annotation
+- Encoder selection (libx264 / h264_nvenc)
+- Subprofiles with folder suffix routing
+- Number keys 1-9 for subprofile quick export, E for main
+- WebSocket progress (`WS /ws/export`), per-clip completion
+- Delete/re-export from marker context menu
+
+### Profiles
+- Profile switcher, markers reload per profile
+- Subprofile management (add/remove)
+
+### Settings
+- Server URL (configurable)
+- Default quality preset
+- All settings persisted client-side via Tauri store
+
+## Server API endpoints used
+
+```
+GET    /api/roots
+GET    /api/files?root={root}
+GET    /api/video/{path}?root={root}
+GET    /api/stream/{path}?root={root}&quality={quality}
+GET    /api/audio/{path}?root={root}
+GET    /api/cache/status/{path}?root={root}
+GET    /api/markers/{filename}?profile={profile}
+GET    /api/profiles
+GET    /api/labels
+POST   /api/export
+GET    /api/export/{job_id}
+DELETE /api/export?output_path={path}
+POST   /api/hidden/{filename}?profile={profile}
+DELETE /api/hidden/{filename}?profile={profile}
+GET    /api/hidden?profile={profile}
+WS     /ws/export
+```
+
+## Project structure
+
+```
+client/
+├── src-tauri/
+│   ├── src/
+│   │   ├── main.rs          (Tauri entry, app setup)
+│   │   ├── mpv.rs           (mpv sidecar spawn + IPC)
+│   │   ├── commands.rs      (Tauri commands for Svelte)
+│   │   └── lib.rs
+│   ├── Cargo.toml
+│   └── tauri.conf.json
+├── src/
+│   ├── App.svelte
+│   ├── lib/
+│   │   ├── api.ts           (server API client)
+│   │   ├── mpv.ts           (mpv IPC bridge via Tauri commands)
+│   │   ├── ws.ts            (WebSocket export progress)
+│   │   └── stores.ts        (Svelte stores: files, markers, settings)
+│   ├── components/
+│   │   ├── FileBrowser.svelte
+│   │   ├── Timeline.svelte
+│   │   ├── CropOverlay.svelte
+│   │   ├── ExportPanel.svelte
+│   │   ├── SettingsPanel.svelte
+│   │   └── ProfileBar.svelte
+│   └── main.ts
+├── package.json
+└── vite.config.ts
+```
+
+## Implementation order
+
+1. Scaffold Tauri + Svelte project
+2. mpv sidecar: spawn, IPC, basic play/pause/seek
+3. API client module + server connection
+4. File browser component
+5. Video playback: load file → stream URL → mpv
+6. Canvas timeline: cursor, seek, markers
+7. Export panel + WebSocket progress
+8. Crop overlay + keyframes
+9. Lock mode, autoclip, play region
+10. Profiles, subprofiles, hidden files
+11. Keyboard shortcuts
+12. Settings persistence
+13. Package for Linux (.deb / .AppImage)
+14. Package for Mac (.dmg)
@@ -0,0 +1,207 @@
+# 8-cut Server API Design
+
+## Goal
+
+Run 8-cut as a FastAPI server on Unraid (Docker) so a Tauri desktop client on Mac can edit remotely over WireGuard — no file transfers, no auth.
+
+## Architecture
+
+```
+Unraid (Docker container):
+  FastAPI + ffmpeg + SQLite
+  ├── /api/files         list videos from mounted volumes
+  ├── /api/stream/{path} transcoded video (cached, no audio)
+  ├── /api/audio/{path}  full-quality audio (cached, passthrough)
+  ├── /api/video/{path}  raw file (for reference/download)
+  ├── /api/markers       CRUD markers per profile
+  ├── /api/profiles      list/create profiles
+  ├── /api/export        trigger + manage exports
+  ├── /api/labels        label history
+  ├── /api/hidden        hidden file management
+  └── ws://…/ws/export   real-time export progress
+
+Mac (Tauri + Svelte + libmpv):
+  ├── mpv plays stream URL (video) + audio URL separately
+  ├── Canvas timeline + crop overlay + keyframes
+  ├── Full UI: profiles, subprofiles, settings
+  └── Stateless — all state lives on server
+```
+
+## Docker mounts
+
+| Mount       | Purpose                        | Env var      |
+|-------------|--------------------------------|--------------|
+| `/videos`   | Source video files (read-only)  | `MEDIA_DIRS` |
+| `/exports`  | Export output                  | `EXPORT_DIR` |
+| `/data`     | SQLite DB + transcode cache    | `DB_PATH`, `CACHE_DIR` |
+
+`MEDIA_DIRS` supports multiple paths: `/videos1,/videos2`.
+
+## Video streaming with transcode cache
+
+The client needs low-bitrate video for scrubbing over the network but full-quality audio for accurate editing.
+
+**Flow:**
+1. Client requests `/api/stream/{path}?quality=low`
+2. Server checks cache: `{CACHE_DIR}/{quality}/{hash}.mp4`
+3. If cached → serve with range requests (instant seeking)
+4. If not → start background ffmpeg transcode, return `202 Accepted` with job ID
+5. Client polls or gets WebSocket notification when ready
+6. Audio: `/api/audio/{path}` extracts audio (passthrough, fast) to cache on first request
+
+**Quality presets:**
+
+| Preset   | Resolution | Bitrate  |
+|----------|-----------|----------|
+| `potato` | 480p      | ~500 Kbps |
+| `low`    | 720p      | ~2 Mbps  |
+| `medium` | 1080p     | ~5 Mbps  |
+| `high`   | original  | ~10 Mbps |
+
+Each quality level cached separately. Client can switch quality — mpv reloads the URL.
+
+**mpv on client:**
+```
+video = http://server/api/stream/file.mp4?quality=low
+audio = http://server/api/audio/file.mp4
+```
+mpv's `--audio-file=` flag plays both in sync with frame-accurate seeking.
+
+## API endpoints
+
+### Files
+```
+GET /api/files?root={root}
+  → [{path, name, size, duration?, markers_count}]
+
+GET /api/video/{path}
+  → raw file with range requests
+
+GET /api/stream/{path}?quality=low|medium|high|potato
+  → cached transcoded video (no audio), range requests
+  → 202 if transcode in progress
+
+GET /api/audio/{path}
+  → cached full-quality audio, range requests
+  → 202 if extraction in progress
+
+GET /api/cache/status/{path}
+  → {qualities: {potato: "ready", low: "transcoding", ...}, audio: "ready"}
+```
+
+### Markers & profiles
+```
+GET    /api/markers/{filename}?profile=default
+  → [{start_time, marker_number, output_path}]
+
+GET    /api/profiles
+  → ["default", "intense", ...]
+
+GET    /api/labels
+  → ["dog barking", "rain", ...]
+```
+
+### Export
+```
+POST   /api/export
+  body: {input_path, cursor, folder_suffix?, name, clips, spread,
+         short_side?, portrait_ratio?, crop_center, format,
+         label?, category?, profile, crop_keyframes?,
+         rand_portrait?, rand_square?, track_subject?}
+  → {job_id}
+
+GET    /api/export/{job_id}
+  → {status, completed, total, outputs: [...]}
+
+DELETE /api/export/{output_path}
+  → delete from DB + disk
+
+WS     /ws/export
+  → server pushes: {type: "clip_done", path: "..."} | {type: "all_done"} | {type: "error", msg: "..."}
+```
+
+### Hidden files
+```
+POST   /api/hidden/{filename}?profile=default
+DELETE /api/hidden/{filename}?profile=default
+GET    /api/hidden?profile=default
+  → ["file1.mp4", "file2.mp4"]
+```
+
+## Code reuse from main.py
+
+**Extracted to shared module (used by both server and Qt app):**
+- `ProcessedDB` — SQLite operations
+- `build_ffmpeg_command` — ffmpeg command construction
+- `build_audio_extract_command`
+- `build_export_path` / `build_sequence_dir`
+- `detect_hw_encoders`
+- `upsert_clip_annotation` / `remove_clip_annotation`
+- `apply_keyframes_to_jobs` / `resolve_keyframe`
+- `track_centers_for_jobs` (subject tracking)
+
+**Server-specific (new):**
+- FastAPI app + route handlers
+- Transcode cache manager
+- Export worker (plain threading, replaces QThread-based ExportWorker)
+- File listing / media root scanning
+- WebSocket export progress broadcaster
+
+**Tauri client (new, Svelte):**
+- mpv integration via Tauri plugin or sidecar
+- Canvas-based timeline widget
+- Canvas-based crop overlay
+- All UI controls
+- API client module
+
+## Dockerfile
+
+```dockerfile
+FROM python:3.12-slim
+RUN apt-get update && apt-get install -y ffmpeg && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY server/ .
+RUN pip install --no-cache-dir fastapi uvicorn
+EXPOSE 8000
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
+```
+
+## Project structure
+
+```
+8-cut/
+├── main.py              (existing Qt app, unchanged)
+├── core/                (shared logic, extracted from main.py)
+│   ├── __init__.py
+│   ├── db.py            (ProcessedDB)
+│   ├── ffmpeg.py        (build commands, detect encoders)
+│   ├── export.py        (ExportWorker — plain threading)
+│   ├── paths.py         (build_export_path, build_sequence_dir)
+│   └── annotations.py   (dataset.json helpers)
+├── server/
+│   ├── app.py           (FastAPI app)
+│   ├── routes/
+│   │   ├── files.py
+│   │   ├── stream.py
+│   │   ├── markers.py
+│   │   ├── export.py
+│   │   └── hidden.py
+│   ├── cache.py         (transcode cache manager)
+│   ├── ws.py            (WebSocket handler)
+│   └── config.py        (env vars, settings)
+├── client/              (Tauri + Svelte — future)
+│   └── ...
+├── Dockerfile
+└── docker-compose.yml
+```
+
+## Implementation order
+
+1. Extract shared logic from main.py → `core/`
+2. Update main.py to import from `core/` (verify Qt app still works)
+3. Build FastAPI server with file listing + video serving
+4. Add transcode cache + audio extraction
+5. Add markers/profiles/labels/hidden API
+6. Add export endpoint + WebSocket progress
+7. Dockerfile + docker-compose
+8. (Later) Tauri client
@@ -0,0 +1,948 @@
+# Server API Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** Extract shared logic from main.py into a `core/` package, then build the FastAPI server that serves video files, manages the DB, and runs exports.
+
+**Architecture:** Shared logic (DB, ffmpeg, paths, annotations, tracking) moves to `core/`. Both `main.py` (Qt app) and `server/` import from `core/`. The server adds HTTP video streaming with transcode cache, REST endpoints, and WebSocket export progress.
+
+**Tech Stack:** Python 3.12, FastAPI, uvicorn, SQLite, ffmpeg
+
+---
+
+### Task 1: Create core/ package — paths and helpers
+
+**Files:**
+- Create: `core/__init__.py`
+- Create: `core/paths.py`
+
+**Step 1: Create core/__init__.py**
+
+```python
+# empty — package marker
+```
+
+**Step 2: Create core/paths.py**
+
+Extract from main.py lines 36-74: `_frozen_path`, `_bin`, `_log`, `build_export_path`, `build_sequence_dir`, `format_time`.
+
+```python
+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+
+
+def _frozen_path() -> Path:
+    if getattr(sys, "frozen", False):
+        return Path(sys._MEIPASS)
+    return Path(__file__).resolve().parent.parent
+
+
+def _bin(name: str) -> str:
+    p = _frozen_path() / name
+    if p.exists():
+        return str(p)
+    return name
+
+
+def _log(*args) -> None:
+    ts = datetime.now().strftime("%H:%M:%S")
+    print(f"[8-cut {ts}]", *args, file=sys.stderr)
+
+
+def build_export_path(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
+    group = f"{basename}_{counter:03d}"
+    name = f"{group}_{sub}" if sub is not None else group
+    return os.path.join(folder, group, name + ".mp4")
+
+
+def build_sequence_dir(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
+    group = f"{basename}_{counter:03d}"
+    name = f"{group}_{sub}" if sub is not None else group
+    return os.path.join(folder, group, name)
+
+
+def format_time(seconds: float) -> str:
+    m = int(seconds // 60)
+    s = int(seconds % 60 * 10) / 10
+    return f"{m}:{s:04.1f}"
+```
+
+**Step 3: Commit**
+
+```bash
+git add core/
+git commit -m "feat: create core/paths module with shared path helpers"
+```
+
+---
+
+### Task 2: Create core/ffmpeg.py
+
+**Files:**
+- Create: `core/ffmpeg.py`
+
+**Step 1: Create core/ffmpeg.py**
+
+Extract from main.py lines 77-112 and 244-289: `_RATIOS`, `_portrait_crop_filter`, `resolve_keyframe`, `apply_keyframes_to_jobs`, `build_ffmpeg_command`, `build_audio_extract_command`, `detect_hw_encoders`. (Lines 115-188 are also ffmpeg-related. Lines 191-241 are annotations — extracted separately in Task 4.)
+
+```python
+import os
+import re
+import subprocess
+
+from .paths import _bin, _log
+
+
+_RATIOS: dict[str, tuple[int, int]] = {
+    "9:16": (9, 16),
+    "4:5":  (4, 5),
+    "1:1":  (1, 1),
+}
+
+
+def _portrait_crop_filter(ratio: str, crop_center: float) -> str:
+    num, den = _RATIOS[ratio]
+    cw = f"ih*{num}/{den}"
+    x = f"max(0\\,min((iw-{cw})*{crop_center}\\,iw-{cw}))"
+    return f"crop={cw}:ih:{x}:0"
+
+
+def resolve_keyframe(
+    keyframes: list[tuple[float, float, str | None, bool, bool]],
+    t: float,
+    tolerance: float = 0.05,
+) -> tuple[float, float, str | None, bool, bool] | None:
+    result = None
+    for kf in keyframes:
+        if kf[0] <= t + tolerance:
+            result = kf
+        else:
+            break
+    return result
+
+
+def apply_keyframes_to_jobs(
+    jobs: list[tuple[float, str, str | None, float]],
+    keyframes: list[tuple[float, float, str | None, bool, bool]],
+    base_center: float,
+    base_ratio: str | None,
+    base_rand_p: bool,
+    base_rand_s: bool,
+) -> list[tuple[float, str, str | None, float, bool, bool]]:
+    result = []
+    for s, o, _r, _c in jobs:
+        kf = resolve_keyframe(keyframes, s)
+        if kf is not None:
+            _, center, ratio, rp, rs = kf
+        else:
+            center, ratio, rp, rs = base_center, base_ratio, base_rand_p, base_rand_s
+        result.append((s, o, ratio, center, rp, rs))
+    return result
+
+
+def build_ffmpeg_command(
+    input_path: str, start: float, output_path: str,
+    short_side: int | None = None,
+    portrait_ratio: str | None = None,
+    crop_center: float = 0.5,
+    image_sequence: bool = False,
+    encoder: str = "libx264",
+) -> list[str]:
+    use_hw_vaapi = encoder == "h264_vaapi" and not image_sequence
+    cmd = [_bin("ffmpeg"), "-y"]
+    if use_hw_vaapi:
+        cmd += ["-hwaccel", "vaapi", "-hwaccel_output_format", "vaapi",
+                "-vaapi_device", "/dev/dri/renderD128"]
+    cmd += ["-threads", "0", "-ss", str(start), "-i", input_path, "-t", "8"]
+    filters: list[str] = []
+    if portrait_ratio is not None:
+        filters.append(_portrait_crop_filter(portrait_ratio, crop_center))
+    if short_side is not None:
+        filters.append(
+            f"scale='if(lt(iw,ih),{short_side},-2)':'if(lt(iw,ih),-2,{short_side})':flags=lanczos"
+        )
+    if use_hw_vaapi:
+        if filters:
+            filters.insert(0, "hwdownload")
+            filters.insert(1, "format=nv12")
+        filters.append("format=nv12")
+        filters.append("hwupload")
+    if filters:
+        cmd += ["-vf", ",".join(filters)]
+    if image_sequence:
+        cmd += ["-an", "-c:v", "libwebp", "-quality", "92", "-compression_level", "1",
+                os.path.join(output_path, "frame_%04d.webp")]
+    else:
+        cmd += ["-c:v", encoder, "-c:a", "pcm_s16le", output_path]
+    return cmd
+
+
+def build_audio_extract_command(input_path: str, start: float, sequence_dir: str) -> list[str]:
+    audio_path = sequence_dir + ".wav"
+    return [_bin("ffmpeg"), "-y", "-ss", str(start), "-i", input_path,
+            "-t", "8", "-vn", "-c:a", "pcm_s16le", audio_path]
+
+
+def detect_hw_encoders() -> list[str]:
+    _HW_ENCODERS = ["h264_nvenc", "h264_vaapi", "h264_qsv", "h264_amf", "h264_videotoolbox"]
+    try:
+        result = subprocess.run(
+            [_bin("ffmpeg"), "-hide_banner", "-encoders"],
+            capture_output=True, text=True, timeout=5,
+        )
+        if result.returncode != 0:
+            return []
+        output = result.stdout
+    except Exception:
+        return []
+    available = []
+    for enc in _HW_ENCODERS:
+        if re.search(rf'\b{enc}\b', output):
+            available.append(enc)
+    if available:
+        _log(f"HW encoders detected: {', '.join(available)}")
+    else:
+        _log("No HW encoders detected — GPU export unavailable")
+    return available
+```
+
+**Step 2: Commit**
+
+```bash
+git add core/ffmpeg.py
+git commit -m "feat: create core/ffmpeg module with ffmpeg helpers"
+```
+
+---
+
+### Task 3: Create core/db.py
+
+**Files:**
+- Create: `core/db.py`
+
+**Step 1: Create core/db.py**
+
+Extract the entire `ProcessedDB` class from main.py lines 398-626. Import `_log` from `core.paths`.
+
+```python
+import sqlite3
+from datetime import datetime, timezone
+from pathlib import Path
+
+from .paths import _log
+
+
+class ProcessedDB:
+    _SCHEMA_VERSION = 3
+
+    def __init__(self, db_path: str | None = None):
+        # ... exact copy of existing class ...
+```
+
+Copy the full class body verbatim — all methods unchanged.
+
+**Step 2: Commit**
+
+```bash
+git add core/db.py
+git commit -m "feat: create core/db module with ProcessedDB"
+```
+
+---
+
+### Task 4: Create core/annotations.py
+
+**Files:**
+- Create: `core/annotations.py`
+
+**Step 1: Create core/annotations.py**
+
+Extract from main.py lines 191-241: `build_annotation_json_path`, `remove_clip_annotation`, `upsert_clip_annotation`.
+
+```python
+import json
+import os
+
+
+def build_annotation_json_path(folder: str) -> str:
+    return os.path.join(folder, "dataset.json")
+
+
+def remove_clip_annotation(folder: str, clip_path: str) -> None:
+    json_path = build_annotation_json_path(folder)
+    if not os.path.exists(json_path):
+        return
+    abs_path = os.path.abspath(clip_path)
+    with open(json_path, "r", encoding="utf-8") as f:
+        try:
+            entries = json.load(f)
+        except (json.JSONDecodeError, ValueError):
+            return
+    entries = [e for e in entries if e.get("path") != abs_path]
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(entries, f, indent=2, ensure_ascii=False)
+        f.write("\n")
+
+
+def upsert_clip_annotation(folder: str, clip_path: str, label: str) -> None:
+    if not label.strip():
+        return
+    os.makedirs(folder, exist_ok=True)
+    json_path = build_annotation_json_path(folder)
+    entries: list[dict] = []
+    if os.path.exists(json_path):
+        with open(json_path, "r", encoding="utf-8") as f:
+            try:
+                entries = json.load(f)
+            except (json.JSONDecodeError, ValueError):
+                entries = []
+    abs_path = os.path.abspath(clip_path)
+    entry: dict = {"path": abs_path, "label": label}
+    for i, e in enumerate(entries):
+        if e.get("path") == abs_path:
+            entries[i] = entry
+            break
+    else:
+        entries.append(entry)
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(entries, f, indent=2, ensure_ascii=False)
+        f.write("\n")
+```
+
+**Step 2: Commit**
+
+```bash
+git add core/annotations.py
+git commit -m "feat: create core/annotations module"
+```
+
+---
+
+### Task 5: Create core/export.py
+
+**Files:**
+- Create: `core/export.py`
+
+**Step 1: Create core/export.py**
+
+A plain-threading version of `ExportWorker` (no QThread dependency). Used by the server. The Qt app continues using its own QThread-based worker.
+
+```python
+import os
+import subprocess
+import threading
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Callable
+
+from .ffmpeg import build_ffmpeg_command, build_audio_extract_command
+from .paths import _bin, _log
+
+
+class ExportRunner:
+    """Run ffmpeg export jobs in a background thread pool.
+
+    Callbacks:
+        on_clip_done(path: str)
+        on_all_done()
+        on_error(msg: str)
+    """
+
+    def __init__(
+        self,
+        input_path: str,
+        jobs: list[tuple[float, str, str | None, float]],
+        short_side: int | None = None,
+        image_sequence: bool = False,
+        max_workers: int | None = None,
+        encoder: str = "libx264",
+        on_clip_done: Callable[[str], None] | None = None,
+        on_all_done: Callable[[], None] | None = None,
+        on_error: Callable[[str], None] | None = None,
+    ):
+        self._input = input_path
+        self._jobs = jobs
+        self._short_side = short_side
+        self._image_sequence = image_sequence
+        self._max_workers = max_workers
+        self._encoder = encoder
+        self._on_clip_done = on_clip_done
+        self._on_all_done = on_all_done
+        self._on_error = on_error
+        self._cancel = False
+        self._procs: list[subprocess.Popen] = []
+        self._procs_lock = threading.Lock()
+        self._thread: threading.Thread | None = None
+
+    def start(self):
+        self._thread = threading.Thread(target=self._run, daemon=True)
+        self._thread.start()
+
+    def cancel(self):
+        self._cancel = True
+        with self._procs_lock:
+            for proc in self._procs:
+                try:
+                    proc.kill()
+                except OSError:
+                    pass
+
+    def is_running(self) -> bool:
+        return self._thread is not None and self._thread.is_alive()
+
+    def _run_one(self, start: float, output: str,
+                 portrait_ratio: str | None, crop_center: float) -> str:
+        if self._cancel:
+            raise RuntimeError("cancelled")
+        if self._image_sequence:
+            os.makedirs(output, exist_ok=True)
+        cmd = build_ffmpeg_command(
+            self._input, start, output,
+            short_side=self._short_side,
+            portrait_ratio=portrait_ratio,
+            crop_center=crop_center,
+            image_sequence=self._image_sequence,
+            encoder=self._encoder,
+        )
+        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        with self._procs_lock:
+            self._procs.append(proc)
+        try:
+            _, stderr = proc.communicate(timeout=120)
+        except subprocess.TimeoutExpired:
+            proc.kill()
+            raise RuntimeError("ffmpeg timed out")
+        finally:
+            with self._procs_lock:
+                self._procs.remove(proc)
+        if self._cancel:
+            raise RuntimeError("cancelled")
+        if proc.returncode != 0:
+            msg = stderr.decode(errors='replace')[-500:] if stderr else "ffmpeg failed"
+            raise RuntimeError(msg)
+        if self._image_sequence:
+            audio_cmd = build_audio_extract_command(self._input, start, output)
+            subprocess.run(audio_cmd, capture_output=True, text=True, timeout=60)
+        return output
+
+    def _run(self):
+        cap = self._max_workers or (os.cpu_count() or 2)
+        workers = min(len(self._jobs), cap)
+        try:
+            with ThreadPoolExecutor(max_workers=workers) as pool:
+                futures = {
+                    pool.submit(self._run_one, s, o, pr, cc): o
+                    for s, o, pr, cc in self._jobs
+                }
+                for fut in as_completed(futures):
+                    if self._cancel:
+                        break
+                    try:
+                        path = fut.result()
+                        if self._on_clip_done:
+                            self._on_clip_done(path)
+                    except Exception as e:
+                        if "cancelled" not in str(e) and self._on_error:
+                            self._on_error(str(e))
+        except Exception as e:
+            if self._on_error:
+                self._on_error(str(e))
+            return
+        if self._cancel:
+            return
+        if self._on_all_done:
+            self._on_all_done()
+```
+
+**Step 2: Commit**
+
+```bash
+git add core/export.py
+git commit -m "feat: create core/export module with ExportRunner"
+```
+
+---
+
+### Task 6: Create core/tracking.py
+
+**Files:**
+- Create: `core/tracking.py`
+
+**Step 1: Create core/tracking.py**
+
+Extract from main.py lines 294-395: YOLO tracking functions.
+
+```python
+import os
+import subprocess
+import tempfile
+
+from .paths import _bin, _log
+
+_yolo_model = None
+
+
+def _get_yolo():
+    global _yolo_model
+    if _yolo_model is None:
+        try:
+            from ultralytics import YOLO
+            _yolo_model = YOLO("yolov8n.pt")
+            _log("YOLO model loaded")
+        except ImportError:
+            _log("ultralytics not installed — tracking disabled")
+            return None
+        except Exception as e:
+            _log(f"YOLO load failed: {e}")
+            return None
+    return _yolo_model
+
+
+def extract_frame_cv(video_path: str, time: float):
+    try:
+        import cv2
+        import numpy as np
+    except ImportError:
+        return None
+    fd, tmp = tempfile.mkstemp(suffix=".png")
+    os.close(fd)
+    try:
+        cmd = [_bin("ffmpeg"), "-y", "-ss", str(time), "-i", video_path,
+               "-frames:v", "1", tmp]
+        result = subprocess.run(cmd, capture_output=True, timeout=10)
+        if result.returncode != 0:
+            return None
+        return cv2.imread(tmp)
+    except Exception:
+        return None
+    finally:
+        if os.path.exists(tmp):
+            os.unlink(tmp)
+
+
+def detect_subject_center(
+    video_path: str, time: float, target_cls: int | None, last_x: float, last_y: float,
+) -> tuple[int | None, float, float] | None:
+    model = _get_yolo()
+    if model is None:
+        return None
+    frame = extract_frame_cv(video_path, time)
+    if frame is None:
+        return None
+    results = model(frame, verbose=False)
+    if not results or len(results[0].boxes) == 0:
+        return None
+    h, w = frame.shape[:2]
+    dets = []
+    for box in results[0].boxes:
+        x1, y1, x2, y2 = box.xyxy[0].tolist()
+        cls = int(box.cls[0])
+        cx = (x1 + x2) / 2 / w
+        cy = (y1 + y2) / 2 / h
+        dets.append((cls, cx, cy))
+    def score(d):
+        cls_penalty = 0 if (target_cls is None or d[0] == target_cls) else 1.0
+        dist = (d[1] - last_x) ** 2 + (d[2] - last_y) ** 2
+        return cls_penalty + dist
+    best = min(dets, key=score)
+    return best
+
+
+def track_centers_for_jobs(
+    video_path: str, cursor: float, crop_center: float,
+    starts: list[float],
+) -> list[float]:
+    ref = detect_subject_center(video_path, cursor, None, crop_center, 0.5)
+    if ref is None:
+        _log("Tracking: no detection at cursor, using fixed center")
+        return [crop_center] * len(starts)
+    target_cls, last_x, last_y = ref
+    _log(f"Tracking: target class={target_cls} at ({last_x:.2f}, {last_y:.2f})")
+    centers = []
+    for t in starts:
+        det = detect_subject_center(video_path, t, target_cls, last_x, last_y)
+        if det is not None:
+            _, cx, cy = det
+            _log(f"  t={t:.2f}s → center={cx:.3f}")
+            centers.append(cx)
+            last_x, last_y = cx, cy
+        else:
+            _log(f"  t={t:.2f}s → lost, reusing {last_x:.3f}")
+            centers.append(last_x)
+    return centers
+```
+
+**Step 2: Commit**
+
+```bash
+git add core/tracking.py
+git commit -m "feat: create core/tracking module with YOLO subject tracking"
+```
+
+---
+
+### Task 7: Update main.py to import from core/
+
+**Files:**
+- Modify: `main.py`
+
+**Step 1: Replace function definitions with imports**
+
+At the top of main.py, after the existing stdlib imports (line 17), add:
+
+```python
+from core.paths import _bin, _log, build_export_path, build_sequence_dir, format_time
+from core.ffmpeg import (
+    _RATIOS, resolve_keyframe, apply_keyframes_to_jobs,
+    build_ffmpeg_command, build_audio_extract_command, detect_hw_encoders,
+)
+from core.db import ProcessedDB
+from core.annotations import remove_clip_annotation, upsert_clip_annotation
+from core.tracking import track_centers_for_jobs
+```
+
+**Step 2: Delete the extracted function definitions and dead imports**
+
+Remove definitions from main.py:
+- Lines 36-74: `_frozen_path`, `_bin`, `_log`, `build_export_path`, `build_sequence_dir`, `format_time`
+- Lines 77-188: `resolve_keyframe`, `apply_keyframes_to_jobs`, `build_ffmpeg_command`, `build_audio_extract_command`
+- Lines 191-241: annotation functions (`build_annotation_json_path`, `remove_clip_annotation`, `upsert_clip_annotation`)
+- Lines 244-289: `detect_hw_encoders`, `_RATIOS`, `_portrait_crop_filter`
+- Lines 294-395: tracking functions (`_yolo_model`, `_get_yolo`, `extract_frame_cv`, `detect_subject_center`, `track_centers_for_jobs`)
+- Lines 398-626: `ProcessedDB` class
+
+Remove now-dead stdlib imports from the top of main.py:
+- `re` (only used in `detect_hw_encoders`)
+- `json` (only used in annotation functions)
+- `sqlite3` (only used in `ProcessedDB`)
+- `tempfile` (only used in `extract_frame_cv`)
+- `datetime`, `timezone` from the datetime import (only used in `_log` and `ProcessedDB`)
+
+Keep in main.py:
+- `_SELVA_CATEGORIES` (UI constant, line 291)
+- `_RATIOS` reference — imported from core.ffmpeg
+- `ExportWorker` (QThread-based, stays in main.py — the server uses `core.export.ExportRunner` instead)
+- `_DBWorker` and `FrameGrabber` (QThread-based, stay in main.py)
+
+**Step 3: Verify Qt app still works**
+
+```bash
+python main.py
+```
+
+Open a video, export a clip, check markers — verify nothing broke.
+
+**Step 4: Commit**
+
+```bash
+git add main.py
+git commit -m "refactor: import shared logic from core/ instead of inline definitions"
+```
+
+---
+
+### Task 8: Create server/config.py
+
+**Files:**
+- Create: `server/__init__.py` (empty package marker)
+- Create: `server/config.py`
+
+**Step 1: Create `server/__init__.py`**
+
+```python
+# empty — package marker
+```
+
+**Step 2: Create config**
+
+```python
+import os
+from pathlib import Path
+
+
+MEDIA_DIRS: list[str] = [
+    d.strip() for d in os.environ.get("MEDIA_DIRS", str(Path.home())).split(",") if d.strip()
+]
+EXPORT_DIR: str = os.environ.get("EXPORT_DIR", str(Path.home() / "8cut-exports"))
+DB_PATH: str = os.environ.get("DB_PATH", str(Path.home() / ".8cut.db"))
+CACHE_DIR: str = os.environ.get("CACHE_DIR", str(Path.home() / ".8cut-cache"))
+HOST: str = os.environ.get("HOST", "0.0.0.0")
+PORT: int = int(os.environ.get("PORT", "8000"))
+
+VIDEO_EXTENSIONS = {".mp4", ".mkv", ".avi", ".mov", ".webm", ".ts", ".flv", ".wmv"}
+
+QUALITY_PRESETS = {
+    "potato": {"height": 480, "bitrate": "500k"},
+    "low":    {"height": 720, "bitrate": "2M"},
+    "medium": {"height": 1080, "bitrate": "5M"},
+    "high":   {"height": 0, "bitrate": "10M"},  # 0 = original resolution
+}
+```
+
+**Step 2: Commit**
+
+```bash
+git add server/
+git commit -m "feat: create server/config with env var settings and quality presets"
+```
+
+---
+
+### Task 9: Create server/app.py — FastAPI skeleton + file listing
+
+**Files:**
+- Create: `server/app.py`
+- Create: `server/routes/__init__.py`
+- Create: `server/routes/files.py`
+
+**Step 1: Create FastAPI app**
+
+`server/app.py`:
+```python
+from fastapi import FastAPI
+from .routes import files, stream, markers, export, hidden
+
+app = FastAPI(title="8-cut Server")
+app.include_router(files.router, prefix="/api")
+app.include_router(stream.router, prefix="/api")
+app.include_router(markers.router, prefix="/api")
+app.include_router(export.router, prefix="/api")
+app.include_router(hidden.router, prefix="/api")
+```
+
+**Step 2: Create file listing route**
+
+`server/routes/files.py`:
+```python
+import os
+from fastapi import APIRouter, Query
+from ..config import MEDIA_DIRS, VIDEO_EXTENSIONS
+
+router = APIRouter()
+
+
+def _scan_videos(root: str) -> list[dict]:
+    results = []
+    for dirpath, _, filenames in os.walk(root):
+        for f in sorted(filenames):
+            if os.path.splitext(f)[1].lower() in VIDEO_EXTENSIONS:
+                full = os.path.join(dirpath, f)
+                rel = os.path.relpath(full, root)
+                results.append({
+                    "name": f,
+                    "path": rel,
+                    "root": root,
+                    "size": os.path.getsize(full),
+                })
+    return results
+
+
+@router.get("/files")
+def list_files(root: str | None = Query(None)):
+    dirs = [root] if root and root in MEDIA_DIRS else MEDIA_DIRS
+    files = []
+    for d in dirs:
+        files.extend(_scan_videos(d))
+    return files
+
+
+@router.get("/roots")
+def list_roots():
+    return MEDIA_DIRS
+```
+
+**Step 3: Create `server/routes/__init__.py`**
+
+```python
+# empty — package marker
+```
+
+**Step 4: Create stub routers** so app.py imports don't fail. Each file gets a minimal router — later tasks fill in the real endpoints.
+
+`server/routes/stream.py`:
+```python
+from fastapi import APIRouter
+router = APIRouter()
+```
+
+`server/routes/markers.py`:
+```python
+from fastapi import APIRouter
+router = APIRouter()
+```
+
+`server/routes/export.py`:
+```python
+from fastapi import APIRouter
+router = APIRouter()
+```
+
+`server/routes/hidden.py`:
+```python
+from fastapi import APIRouter
+router = APIRouter()
+```
+
+**Step 5: Commit**
+
+```bash
+git add server/
+git commit -m "feat: add FastAPI app with file listing endpoint"
+```
+
+---
+
+### Task 10: Create server/routes/stream.py — video serving + transcode cache
+
+**Files:**
+- Create: `server/cache.py`
+- Create: `server/routes/stream.py`
+
+**Step 1: Create cache manager**
+
+`server/cache.py` handles:
+- Computing cache paths from source file hash + quality
+- Checking cache status
+- Launching background ffmpeg transcodes
+- Tracking in-progress jobs
+
+**Step 2: Create stream routes**
+
+```
+GET /api/video/{path}  — raw file, range requests
+GET /api/stream/{path}?quality=low — cached transcode, range requests (202 if not ready)
+GET /api/audio/{path}  — cached audio extraction, range requests (202 if not ready)
+GET /api/cache/status/{path} — cache status for all qualities
+```
+
+**Step 3: Commit**
+
+```bash
+git add server/cache.py server/routes/stream.py
+git commit -m "feat: add video streaming with transcode cache and audio extraction"
+```
+
+---
+
+### Task 11: Create server/routes/markers.py — DB endpoints
+
+**Files:**
+- Create: `server/routes/markers.py`
+
+**Step 1: Create markers/profiles/labels routes**
+
+```
+GET  /api/markers/{filename}?profile=default
+GET  /api/profiles
+GET  /api/labels
+```
+
+Uses `ProcessedDB` singleton from `core.db`.
+
+**Step 2: Commit**
+
+```bash
+git add server/routes/markers.py
+git commit -m "feat: add markers, profiles, and labels API endpoints"
+```
+
+---
+
+### Task 12: Create server/routes/export.py + WebSocket
+
+**Files:**
+- Create: `server/routes/export.py`
+- Create: `server/ws.py`
+
+**Step 1: Create export routes + WS**
+
+```
+POST   /api/export        — start export job
+GET    /api/export/{id}   — check job status
+DELETE /api/export/{path} — delete export from DB + disk
+WS     /ws/export         — real-time progress
+```
+
+Uses `ExportRunner` from `core.export`.
+
+**Step 2: Commit**
+
+```bash
+git add server/routes/export.py server/ws.py
+git commit -m "feat: add export endpoint with WebSocket progress"
+```
+
+---
+
+### Task 13: Create server/routes/hidden.py
+
+**Files:**
+- Create: `server/routes/hidden.py`
+
+**Step 1: Create hidden file routes**
+
+```
+POST   /api/hidden/{filename}?profile=default
+DELETE /api/hidden/{filename}?profile=default
+GET    /api/hidden?profile=default
+```
+
+**Step 2: Commit**
+
+```bash
+git add server/routes/hidden.py
+git commit -m "feat: add hidden files API endpoints"
+```
+
+---
+
+### Task 14: Create Dockerfile + docker-compose.yml
+
+**Files:**
+- Create: `Dockerfile`
+- Create: `docker-compose.yml`
+
+**Step 1: Create Dockerfile**
+
+```dockerfile
+FROM python:3.12-slim
+RUN apt-get update && apt-get install -y ffmpeg && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY core/ core/
+COPY server/ server/
+# Note: ultralytics + opencv-python needed only if subject tracking is used.
+# Add them here if tracking is required on the server.
+RUN pip install --no-cache-dir fastapi uvicorn
+EXPOSE 8000
+CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
+```
+
+**Step 2: Create docker-compose.yml**
+
+```yaml
+services:
+  8cut:
+    build: .
+    ports:
+      - "8000:8000"
+    volumes:
+      - /path/to/videos:/videos:ro
+      - /path/to/exports:/exports
+      - 8cut-data:/data
+    environment:
+      MEDIA_DIRS: /videos
+      EXPORT_DIR: /exports
+      DB_PATH: /data/8cut.db
+      CACHE_DIR: /data/cache
+
+volumes:
+  8cut-data:
+```
+
+**Step 3: Commit**
+
+```bash
+git add Dockerfile docker-compose.yml
+git commit -m "feat: add Dockerfile and docker-compose for server deployment"
+```
@@ -0,0 +1,97 @@
+# Audio Similarity Scanning — Design
+
+**Goal:** Scan a video's audio track and highlight segments that match the sound profile of existing reference clips, so the user can quickly find similar moments without scrubbing manually.
+
+**Runs in:** Python/Qt client (`main.py`), not the server.
+
+---
+
+## Core Module: `core/audio_scan.py`
+
+New module alongside `core/tracking.py`. Two main functions:
+
+- `build_profile(clip_paths: list[str]) -> dict` — extracts MFCCs (20 coefficients) from each clip using `librosa`, returns a profile containing both the averaged vector and individual clip vectors.
+- `scan_video(video_path: str, profile: dict, mode: str, threshold: float, hop: float) -> list[tuple[float, float, float]]` — slides an 8s window across the video's audio, returns `(start_time, end_time, score)` tuples for segments above threshold.
+
+### Feature Extraction
+
+- Audio loaded via `librosa.load()` (handles video files directly, mono, 22050Hz).
+- MFCCs: `librosa.feature.mfcc(n_mfcc=20)`, averaged over time axis to produce a single vector per window/clip.
+- Similarity: cosine similarity (`numpy` dot product on L2-normalized vectors).
+
+### Matching Modes
+
+- **Average mode:** Compare each window to the mean of all reference MFCC vectors. Fast, good when references are homogeneous.
+- **Nearest mode:** Compare each window to every reference vector, take the max score. Better when references have variety within the style.
+
+### Parameters
+
+- `threshold` (float, 0.0–1.0): minimum cosine similarity to include a segment. Default 0.7.
+- `hop` (float, seconds): step size for the sliding window. Default 1.0s.
+- Window size fixed at 8s to match reference clip length.
+
+---
+
+## UI Integration in `main.py`
+
+### Controls
+
+Added near the existing tracking checkbox area:
+
+- **"Scan" button** — triggers audio scan on current video.
+- **Threshold slider** (0.0–1.0, step 0.05) — controls match strictness.
+- **Mode combobox** — "Average" / "Nearest".
+- **Reference source combobox** — "Current Profile" / "Custom Folder" (shows folder picker when "Custom Folder" selected).
+
+### Scan Workflow
+
+1. User clicks Scan.
+2. Reference clips collected: either all export `output_path` values from the current profile (via DB) or all audio/video files in a custom folder.
+3. Scan runs in a `QThread` so UI stays responsive.
+4. On completion, results sent to Timeline widget via signal.
+
+### Timeline Display
+
+- New `set_scan_regions(regions: list[tuple[float, float, float]])` method on Timeline.
+- Drawn as semi-transparent colored rectangles behind existing markers.
+- Color intensity proportional to score (brighter = higher match).
+- Cleared on file change or re-scan.
+
+### Keyboard Shortcut
+
+- `S` — jump cursor to the next scan region (similar to `M` for next marker).
+
+---
+
+## Data Flow
+
+```
+Reference clips (DB export paths or folder)
+    |
+librosa.load() each -> MFCC vectors (20-dim)
+    |
+Profile: { mean_vector, clip_vectors[] }
+    |
+Current video -> librosa.load() full audio (mono 22050Hz)
+    |
+Sliding 8s window (hop=1s) -> MFCC per window
+    |
+Cosine similarity vs profile -> score per position
+    |
+Threshold filter -> [(start, end, score), ...]
+    |
+Timeline: semi-transparent highlight regions
+```
+
+## Performance
+
+- 2-hour video at 22050Hz mono ~ 380MB memory.
+- MFCC extraction + sliding window: ~10-30s.
+- QThread keeps UI responsive.
+
+## What This Does NOT Do
+
+- No DB schema changes — scan results are ephemeral (visual only).
+- No auto-export — user decides what to cut.
+- No server integration — runs entirely in the Python client.
+- No GPU/ML model dependency — just librosa + numpy.
@@ -0,0 +1,739 @@
+# Audio Similarity Scanning — Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** Scan a video's audio track to find segments matching a reference sound profile, displayed as highlighted regions on the timeline.
+
+**Architecture:** New `core/audio_scan.py` module extracts MFCC features from reference clips and slides an 8s window across the target video's audio, scoring each position via cosine similarity. A `ScanWorker` QThread runs the scan in the background, and results are drawn as semi-transparent rectangles on the existing Timeline widget.
+
+**Tech Stack:** Python 3, librosa 0.11, numpy, PyQt6
+
+---
+
+### Task 1: Core audio_scan module — build_profile
+
+**Files:**
+- Create: `core/audio_scan.py`
+- Create: `tests/test_audio_scan.py`
+
+**Step 1: Write the tests**
+
+```python
+# tests/test_audio_scan.py
+import tempfile, os
+import numpy as np
+from core.audio_scan import build_profile, _extract_mfcc
+
+
+def _make_wav(path: str, duration: float = 8.0, sr: int = 22050):
+    """Create a short sine-wave WAV file for testing."""
+    import soundfile as sf
+    t = np.linspace(0, duration, int(sr * duration), endpoint=False)
+    audio = 0.5 * np.sin(2 * np.pi * 440 * t)
+    sf.write(path, audio, sr)
+
+
+def test_extract_mfcc_returns_1d_vector():
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        _make_wav(f.name)
+    try:
+        vec = _extract_mfcc(f.name)
+        assert vec.shape == (20,)
+        assert not np.isnan(vec).any()
+    finally:
+        os.unlink(f.name)
+
+
+def test_build_profile_single_clip():
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        _make_wav(f.name)
+    try:
+        profile = build_profile([f.name])
+        assert "mean_vector" in profile
+        assert "clip_vectors" in profile
+        assert profile["mean_vector"].shape == (20,)
+        assert len(profile["clip_vectors"]) == 1
+    finally:
+        os.unlink(f.name)
+
+
+def test_build_profile_multiple_clips():
+    paths = []
+    try:
+        for i in range(3):
+            f = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+            freq = 440 + i * 200
+            import soundfile as sf
+            t = np.linspace(0, 8.0, 22050 * 8, endpoint=False)
+            sf.write(f.name, 0.5 * np.sin(2 * np.pi * freq * t), 22050)
+            paths.append(f.name)
+            f.close()
+
+        profile = build_profile(paths)
+        assert len(profile["clip_vectors"]) == 3
+        assert profile["mean_vector"].shape == (20,)
+    finally:
+        for p in paths:
+            os.unlink(p)
+
+
+def test_build_profile_skips_missing_files():
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        _make_wav(f.name)
+    try:
+        profile = build_profile([f.name, "/no/such/file.wav"])
+        assert len(profile["clip_vectors"]) == 1
+    finally:
+        os.unlink(f.name)
+
+
+def test_build_profile_empty_returns_none():
+    result = build_profile([])
+    assert result is None
+```
+
+**Step 2: Run tests to verify they fail**
+
+Run: `cd /media/p5/8-cut && python -m pytest tests/test_audio_scan.py -v`
+Expected: FAIL with `ModuleNotFoundError: No module named 'core.audio_scan'`
+
+**Step 3: Write the implementation**
+
+```python
+# core/audio_scan.py
+"""Audio similarity scanning — MFCC-based profile matching."""
+
+import numpy as np
+import librosa
+
+from .paths import _log
+
+_N_MFCC = 20
+_SR = 22050
+
+
+def _extract_mfcc(path: str, sr: int = _SR) -> np.ndarray:
+    """Load audio from a file and return a mean MFCC vector (20-dim)."""
+    y, _ = librosa.load(path, sr=sr, mono=True)
+    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=_N_MFCC)
+    return mfcc.mean(axis=1)  # average over time → (20,)
+
+
+def build_profile(clip_paths: list[str]) -> dict | None:
+    """Extract MFCCs from reference clips.
+
+    Returns dict with:
+      - mean_vector: averaged MFCC across all clips (20,)
+      - clip_vectors: list of individual MFCC vectors
+    Returns None if no clips could be loaded.
+    """
+    vectors = []
+    for p in clip_paths:
+        try:
+            vec = _extract_mfcc(p)
+            vectors.append(vec)
+        except Exception as e:
+            _log(f"audio_scan: skip {p}: {e}")
+    if not vectors:
+        return None
+    arr = np.stack(vectors)
+    return {
+        "mean_vector": arr.mean(axis=0),
+        "clip_vectors": vectors,
+    }
+```
+
+**Step 4: Run tests to verify they pass**
+
+Run: `cd /media/p5/8-cut && python -m pytest tests/test_audio_scan.py -v`
+Expected: all 5 PASS
+
+**Step 5: Commit**
+
+```bash
+git add core/audio_scan.py tests/test_audio_scan.py
+git commit -m "feat: add audio_scan module with build_profile"
+```
+
+---
+
+### Task 2: Core audio_scan module — scan_video
+
+**Files:**
+- Modify: `core/audio_scan.py`
+- Modify: `tests/test_audio_scan.py`
+
+**Step 1: Write the tests**
+
+Add to `tests/test_audio_scan.py`:
+
+```python
+from core.audio_scan import scan_video
+
+
+def test_scan_video_finds_matching_region():
+    """A video made of the same sine wave as the reference should match."""
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as ref:
+        _make_wav(ref.name, duration=8.0)
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as vid:
+        _make_wav(vid.name, duration=20.0)
+    try:
+        profile = build_profile([ref.name])
+        regions = scan_video(vid.name, profile, mode="average", threshold=0.5, hop=1.0)
+        assert len(regions) > 0
+        for start, end, score in regions:
+            assert abs((end - start) - 8.0) < 1e-9
+            assert score >= 0.5
+            assert score >= 0.5
+    finally:
+        os.unlink(ref.name)
+        os.unlink(vid.name)
+
+
+def test_scan_video_nearest_mode():
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as ref:
+        _make_wav(ref.name, duration=8.0)
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as vid:
+        _make_wav(vid.name, duration=20.0)
+    try:
+        profile = build_profile([ref.name])
+        regions = scan_video(vid.name, profile, mode="nearest", threshold=0.5, hop=1.0)
+        assert len(regions) > 0
+    finally:
+        os.unlink(ref.name)
+        os.unlink(vid.name)
+
+
+def test_scan_video_high_threshold_no_match():
+    """Different frequencies with very high threshold should not match."""
+    import soundfile as sf
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as ref:
+        t = np.linspace(0, 8.0, 22050 * 8, endpoint=False)
+        sf.write(ref.name, 0.5 * np.sin(2 * np.pi * 440 * t), 22050)
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as vid:
+        # White noise — very different from sine wave
+        sf.write(vid.name, np.random.randn(22050 * 20).astype(np.float32) * 0.1, 22050)
+    try:
+        profile = build_profile([ref.name])
+        regions = scan_video(vid.name, profile, mode="average", threshold=0.99, hop=1.0)
+        assert len(regions) == 0
+    finally:
+        os.unlink(ref.name)
+        os.unlink(vid.name)
+```
+
+**Step 2: Run tests to verify they fail**
+
+Run: `cd /media/p5/8-cut && python -m pytest tests/test_audio_scan.py::test_scan_video_finds_matching_region -v`
+Expected: FAIL with `ImportError: cannot import name 'scan_video'`
+
+**Step 3: Write the implementation**
+
+Add to `core/audio_scan.py`:
+
+```python
+def _cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
+    """Cosine similarity between two vectors.
+
+    Returns value in [-1, 1]. Negative means anti-correlated (very
+    dissimilar). For threshold filtering this is fine — negative scores
+    never exceed the threshold. Scores near 0 may be uncorrelated or
+    weakly anti-correlated.
+    """
+    na = np.linalg.norm(a)
+    nb = np.linalg.norm(b)
+    if na == 0 or nb == 0:
+        return 0.0
+    return float(np.dot(a, b) / (na * nb))
+
+
+def scan_video(
+    video_path: str,
+    profile: dict,
+    mode: str = "average",
+    threshold: float = 0.7,
+    hop: float = 1.0,
+    window: float = 8.0,
+    cancel_flag: object = None,
+) -> list[tuple[float, float, float]]:
+    """Slide a window across the video audio and score against the profile.
+
+    Args:
+        video_path: path to video/audio file
+        profile: dict from build_profile()
+        mode: "average" (compare to mean) or "nearest" (max over all clips)
+        threshold: minimum cosine similarity to include
+        hop: step size in seconds
+        window: window size in seconds (default 8s)
+        cancel_flag: object with _cancel bool attribute; checked each iteration
+
+    Returns:
+        list of (start_time, end_time, score) for regions above threshold
+    """
+    _log(f"audio_scan: loading {video_path}")
+    y, sr = librosa.load(video_path, sr=_SR, mono=True)
+    duration = len(y) / sr
+    _log(f"audio_scan: {duration:.1f}s loaded, scanning with hop={hop}s")
+
+    win_samples = int(window * sr)
+    hop_samples = int(hop * sr)
+
+    results = []
+    pos = 0
+    while pos + win_samples <= len(y):
+        if cancel_flag and getattr(cancel_flag, '_cancel', False):
+            _log("audio_scan: cancelled")
+            return results
+
+        chunk = y[pos : pos + win_samples]
+        mfcc = librosa.feature.mfcc(y=chunk, sr=sr, n_mfcc=_N_MFCC)
+        vec = mfcc.mean(axis=1)
+
+        if mode == "nearest":
+            score = max(
+                _cosine_similarity(vec, cv) for cv in profile["clip_vectors"]
+            )
+        else:  # average
+            score = _cosine_similarity(vec, profile["mean_vector"])
+
+        if score >= threshold:
+            start_t = pos / sr
+            results.append((start_t, start_t + window, score))
+
+        pos += hop_samples
+
+    _log(f"audio_scan: {len(results)} regions above threshold {threshold}")
+    return results
+```
+
+**Step 4: Run tests to verify they pass**
+
+Run: `cd /media/p5/8-cut && python -m pytest tests/test_audio_scan.py -v`
+Expected: all 8 PASS
+
+**Step 5: Commit**
+
+```bash
+git add core/audio_scan.py tests/test_audio_scan.py
+git commit -m "feat: add scan_video with average and nearest modes"
+```
+
+---
+
+### Task 3: Timeline — draw scan regions
+
+**Files:**
+- Modify: `main.py` (Timeline class, around lines 209-260 and 300-375)
+
+**Step 1: Add scan region storage to Timeline.__init__**
+
+In `main.py`, find the Timeline class `__init__` method (around line 198). After `self._markers` initialization (line 209), add:
+
+```python
+self._scan_regions: list[tuple[float, float, float]] = []  # (start, end, score)
+```
+
+**Step 2: Add set_scan_regions method**
+
+After the `set_markers` method (line 249-252), add:
+
+```python
+def set_scan_regions(self, regions: list[tuple[float, float, float]]) -> None:
+    """regions: list of (start_time, end_time, score)"""
+    self._scan_regions = regions
+    self.update()
+
+def clear_scan_regions(self) -> None:
+    self._scan_regions = []
+    self.update()
+```
+
+**Step 3: Draw scan regions in paintEvent**
+
+In `paintEvent` (starts around line 282), find the marker drawing section (line 363, comment `# ── export markers`). BEFORE that section, add:
+
+```python
+# ── scan regions ──────────────────────────────────────────────
+if self._scan_regions and self._duration > 0:
+    for (start, end, score) in self._scan_regions:
+        x1 = int(start / self._duration * w)
+        x2 = int(end / self._duration * w)
+        alpha = int(40 + score * 80)  # 40–120 opacity
+        p.fillRect(x1, rh, x2 - x1, h - rh, QColor(100, 200, 255, alpha))
+```
+
+**Step 4: Verify manually**
+
+Run: `cd /media/p5/8-cut && python main.py`
+Expected: app starts without errors. No scan regions visible yet (none set).
+
+**Step 5: Commit**
+
+```bash
+git add main.py
+git commit -m "feat: timeline scan region rendering"
+```
+
+---
+
+### Task 4: ScanWorker QThread
+
+**Files:**
+- Modify: `main.py` (add ScanWorker class, after ExportWorker around line 165)
+
+**Step 1: Add the ScanWorker class**
+
+After the `ExportWorker` class (ends around line 165), add:
+
+```python
+class ScanWorker(QThread):
+    """Runs audio similarity scan off the main thread."""
+    finished = pyqtSignal(list)   # emits list of (start, end, score)
+    error = pyqtSignal(str)
+    progress = pyqtSignal(str)    # status message
+
+    def __init__(self, video_path: str, clip_paths: list[str],
+                 mode: str = "average", threshold: float = 0.7):
+        super().__init__()
+        self._video_path = video_path
+        self._clip_paths = clip_paths
+        self._mode = mode
+        self._threshold = threshold
+        self._cancel = False
+
+    def cancel(self) -> None:
+        self._cancel = True
+
+    def run(self):
+        from core.audio_scan import build_profile, scan_video
+        try:
+            self.progress.emit(f"Building profile from {len(self._clip_paths)} clips...")
+            profile = build_profile(self._clip_paths)
+            if self._cancel:
+                return
+            if profile is None:
+                self.error.emit("No valid reference clips found")
+                return
+            self.progress.emit("Scanning audio...")
+            regions = scan_video(
+                self._video_path, profile,
+                mode=self._mode, threshold=self._threshold,
+                cancel_flag=self,
+            )
+            if not self._cancel:
+                self.finished.emit(regions)
+        except Exception as e:
+            if not self._cancel:
+                self.error.emit(str(e))
+```
+
+**Step 2: Verify import works**
+
+Run: `cd /media/p5/8-cut && python -c "from main import ScanWorker; print('ok')"`
+Expected: `ok`
+
+**Step 3: Commit**
+
+```bash
+git add main.py
+git commit -m "feat: add ScanWorker QThread for background scanning"
+```
+
+---
+
+### Task 5: DB helper — get_all_export_paths
+
+**Files:**
+- Modify: `core/db.py`
+- Modify: `tests/test_audio_scan.py`
+
+**Step 1: Write the test**
+
+Add to `tests/test_audio_scan.py`:
+
+```python
+def test_db_get_all_export_paths():
+    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
+        path = f.name
+    try:
+        from core.db import ProcessedDB
+        db = ProcessedDB(path)
+        db.add("a.mp4", 10.0, "/out/a_001.mp4", profile="test")
+        db.add("b.mp4", 20.0, "/out/b_001.mp4", profile="test")
+        db.add("c.mp4", 30.0, "/out/c_001.mp4", profile="other")
+        paths = db.get_all_export_paths("test")
+        assert set(paths) == {"/out/a_001.mp4", "/out/b_001.mp4"}
+    finally:
+        os.unlink(path)
+```
+
+**Step 2: Run test to verify it fails**
+
+Run: `cd /media/p5/8-cut && python -m pytest tests/test_audio_scan.py::test_db_get_all_export_paths -v`
+Expected: FAIL with `AttributeError: 'ProcessedDB' object has no attribute 'get_all_export_paths'`
+
+**Step 3: Write the implementation**
+
+Add to `core/db.py`, after the `get_markers` method. Note: no lock needed — follows
+the codebase convention where read-only methods don't acquire the lock.
+
+```python
+def get_all_export_paths(self, profile: str = "default") -> list[str]:
+    """Return all unique output_path values for a given profile."""
+    if not self._enabled:
+        return []
+    rows = self._con.execute(
+        "SELECT DISTINCT output_path FROM processed WHERE profile = ?",
+        (profile,),
+    ).fetchall()
+    return [r[0] for r in rows]
+```
+
+**Step 4: Run test to verify it passes**
+
+Run: `cd /media/p5/8-cut && python -m pytest tests/test_audio_scan.py::test_db_get_all_export_paths -v`
+Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add core/db.py tests/test_audio_scan.py
+git commit -m "feat: add get_all_export_paths to ProcessedDB"
+```
+
+---
+
+### Task 6: UI controls for audio scanning
+
+**Files:**
+- Modify: `main.py` (MainWindow class — control creation ~1490-1575, layout ~1620-1640)
+
+**Step 1: Add scan control widgets**
+
+In the MainWindow `__init__`, find the control creation section. After `self._chk_track` (around line 1501), add:
+
+```python
+# ── audio scan controls ──────────────────────────────────────
+self._btn_scan = QPushButton("Scan")
+self._btn_scan.setToolTip("Scan current video for audio segments matching reference clips")
+self._btn_scan.clicked.connect(self._start_scan)
+
+self._sld_threshold = QDoubleSpinBox()
+self._sld_threshold.setRange(0.0, 1.0)
+self._sld_threshold.setSingleStep(0.05)
+self._sld_threshold.setValue(0.7)
+self._sld_threshold.setPrefix("Thr: ")
+self._sld_threshold.setToolTip("Similarity threshold (0=match everything, 1=exact match)")
+
+self._cmb_scan_mode = QComboBox()
+self._cmb_scan_mode.addItems(["Average", "Nearest"])
+self._cmb_scan_mode.setToolTip("Average: compare to mean profile\nNearest: compare to closest clip")
+
+self._cmb_scan_ref = QComboBox()
+self._cmb_scan_ref.addItems(["Current Profile", "Custom Folder"])
+self._cmb_scan_ref.currentIndexChanged.connect(self._on_scan_ref_changed)
+self._scan_folder: str = ""
+
+self._scan_worker: ScanWorker | None = None
+```
+
+**Step 2: Add controls to settings_row layout**
+
+Find the `settings_row` assembly (around line 1620). Before `settings_row.addStretch()` (around line 1635), add:
+
+```python
+settings_row.addWidget(self._btn_scan)
+settings_row.addWidget(self._sld_threshold)
+settings_row.addWidget(self._cmb_scan_mode)
+settings_row.addWidget(self._cmb_scan_ref)
+```
+
+**Step 3: Add handler methods**
+
+Add these methods to MainWindow (after `_jump_to_next_marker` around line 2410):
+
+```python
+def _on_scan_ref_changed(self, index: int) -> None:
+    if index == 1:  # Custom Folder
+        folder = QFileDialog.getExistingDirectory(self, "Select reference clip folder")
+        if folder:
+            self._scan_folder = folder
+        else:
+            self._cmb_scan_ref.setCurrentIndex(0)
+
+def _cleanup_scan_worker(self) -> None:
+    """Disconnect signals and schedule deletion of old scan worker."""
+    if self._scan_worker is not None:
+        try:
+            self._scan_worker.finished.disconnect()
+            self._scan_worker.error.disconnect()
+            self._scan_worker.progress.disconnect()
+        except TypeError:
+            pass  # already disconnected
+        self._scan_worker.deleteLater()
+        self._scan_worker = None
+
+def _start_scan(self) -> None:
+    if not self._file_path:
+        self._show_status("No video loaded")
+        return
+    if self._scan_worker and self._scan_worker.isRunning():
+        self._show_status("Scan already running")
+        return
+
+    # Clean up previous worker
+    self._cleanup_scan_worker()
+
+    # Collect reference clip paths
+    if self._cmb_scan_ref.currentIndex() == 0:
+        # Current profile — all exports across all files in this profile
+        clip_paths = [p for p in self._db.get_all_export_paths(self._profile)
+                      if os.path.exists(p)]
+    else:
+        # Custom folder
+        if not self._scan_folder:
+            self._show_status("No reference folder selected")
+            return
+        exts = (".mp4", ".mkv", ".avi", ".mov", ".wav", ".mp3", ".flac")
+        clip_paths = [
+            os.path.join(self._scan_folder, f)
+            for f in sorted(os.listdir(self._scan_folder))
+            if f.lower().endswith(exts)
+        ]
+
+    if not clip_paths:
+        self._show_status("No reference clips found")
+        return
+
+    mode = self._cmb_scan_mode.currentText().lower()
+    threshold = self._sld_threshold.value()
+
+    self._btn_scan.setEnabled(False)
+    self._scan_file_path = self._file_path  # remember which file we're scanning
+    self._show_status(f"Scanning with {len(clip_paths)} reference clips...")
+
+    self._scan_worker = ScanWorker(self._file_path, clip_paths, mode, threshold)
+    self._scan_worker.finished.connect(self._on_scan_done)
+    self._scan_worker.error.connect(self._on_scan_error)
+    self._scan_worker.progress.connect(self._show_status)
+    self._scan_worker.start()
+
+def _on_scan_done(self, regions: list) -> None:
+    self._btn_scan.setEnabled(True)
+    # Ignore stale results if the user switched files during scan
+    if self._file_path != getattr(self, '_scan_file_path', None):
+        return
+    self._timeline.set_scan_regions(regions)
+    self._show_status(f"Scan complete: {len(regions)} matching regions")
+
+def _on_scan_error(self, msg: str) -> None:
+    self._btn_scan.setEnabled(True)
+    self._show_status(f"Scan error: {msg}")
+```
+
+**Step 4: Verify manually**
+
+Run: `cd /media/p5/8-cut && python main.py`
+Expected: Scan button, threshold spinner, mode dropdown, and reference source dropdown visible in the settings row. Clicking Scan with no file loaded shows "No video loaded" in status.
+
+**Step 5: Commit**
+
+```bash
+git add main.py
+git commit -m "feat: add scan UI controls and start_scan handler"
+```
+
+---
+
+### Task 7: Keyboard shortcut — jump to next scan region
+
+**Files:**
+- Modify: `main.py`
+
+**Step 1: Add the keyboard shortcut**
+
+Find the shortcut definitions (around line 1728, where `QShortcut(QKeySequence("M"), ...)` is defined). Add after it:
+
+```python
+QShortcut(QKeySequence("S"), self, context=ctx).activated.connect(self._jump_to_next_scan_region)
+```
+
+**Step 2: Add the jump method**
+
+After `_on_scan_error` (or after `_jump_to_next_marker`), add:
+
+```python
+def _jump_to_next_scan_region(self) -> None:
+    regions = sorted(self._timeline._scan_regions, key=lambda r: r[0])
+    if not regions:
+        return
+    for (start, _end, _score) in regions:
+        if start > self._cursor + 0.1:
+            self._step_cursor(start - self._cursor)
+            return
+    # Wrap to first region
+    self._step_cursor(regions[0][0] - self._cursor)
+```
+
+**Step 3: Update help text**
+
+Find the help/shortcuts tooltip (around line 1757). Add a row:
+
+```python
+"<tr><td><b>S</b></td><td>Jump to next scan region</td></tr>"
+```
+
+**Step 4: Clear scan regions and cancel running scan on file change**
+
+Find `_load_file` method (around line 1931). After the existing marker/state resets, add:
+
+```python
+self._timeline.clear_scan_regions()
+if self._scan_worker and self._scan_worker.isRunning():
+    self._scan_worker.cancel()
+self._cleanup_scan_worker()
+self._btn_scan.setEnabled(True)
+```
+
+**Step 5: Verify manually**
+
+Run: `cd /media/p5/8-cut && python main.py`
+Expected: S key does nothing when no scan regions exist. After a scan, S jumps through matched regions.
+
+**Step 6: Commit**
+
+```bash
+git add main.py
+git commit -m "feat: add S shortcut and clear scan on file change"
+```
+
+---
+
+### Task 8: Final integration test
+
+**Step 1: End-to-end manual test**
+
+1. Open the app: `cd /media/p5/8-cut && python main.py`
+2. Load a video file
+3. Export a few clips (these become the reference)
+4. Set reference source to "Current Profile"
+5. Click "Scan"
+6. Verify: status shows progress messages, then "Scan complete: N matching regions"
+7. Verify: cyan-tinted regions appear on the timeline
+8. Press S to jump through scan regions
+9. Change threshold and re-scan — verify different number of regions
+10. Switch mode to "Nearest" and re-scan
+11. Switch reference to "Custom Folder", pick a folder with clips
+12. Re-scan and verify results
+
+**Step 2: Run all tests**
+
+Run: `cd /media/p5/8-cut && python -m pytest tests/ -v`
+Expected: all tests PASS
+
+**Step 3: Final commit**
+
+```bash
+git add -A
+git commit -m "feat: audio similarity scanning complete"
+```
@@ -1,4 +1,25 @@
+# Core GUI
 PyQt6>=6.4
 python-mpv>=1.0
-pytest>=7.0
+
+# Audio & ML
+librosa>=0.10
+numpy>=1.24
+scikit-learn>=1.3
+joblib>=1.3
+soundfile>=0.12
+
+# Deep learning (torch installed separately for CUDA support)
+# torch and torchaudio are installed via --index-url in setup_env.sh
+torchaudio>=2.0
+
+# Object detection
 ultralytics>=8.0
+
+# Server API
+fastapi>=0.100
+pydantic>=2.0
+uvicorn>=0.23
+
+# Dev
+pytest>=7.0
@@ -0,0 +1,29 @@
+from fastapi import FastAPI, WebSocket
+from fastapi.middleware.cors import CORSMiddleware
+
+from core.db import ProcessedDB
+from .config import DB_PATH
+from .routes import files, stream, markers, export, hidden
+from . import ws
+
+app = FastAPI(title="8-cut Server")
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+db = ProcessedDB(DB_PATH)
+
+app.include_router(files.router, prefix="/api")
+app.include_router(stream.router, prefix="/api")
+app.include_router(markers.router, prefix="/api")
+app.include_router(export.router, prefix="/api")
+app.include_router(hidden.router, prefix="/api")
+
+
+@app.websocket("/ws/export")
+async def export_ws(websocket: WebSocket):
+    await ws.connect(websocket)
@@ -0,0 +1,171 @@
+import hashlib
+import os
+import subprocess
+import threading
+from enum import Enum
+
+from core.paths import _bin, _log
+from .config import CACHE_DIR, QUALITY_PRESETS
+
+
+class CacheStatus(str, Enum):
+    READY = "ready"
+    TRANSCODING = "transcoding"
+    MISSING = "missing"
+    ERROR = "error"
+
+
+_jobs_lock = threading.Lock()
+_active_jobs: dict[str, threading.Thread] = {}
+
+
+def _cache_key(source_path: str) -> str:
+    """Stable hash from absolute source path."""
+    return hashlib.sha256(source_path.encode()).hexdigest()[:16]
+
+
+def cache_path(source_path: str, quality: str) -> str:
+    key = _cache_key(source_path)
+    return os.path.join(CACHE_DIR, quality, f"{key}.mp4")
+
+
+def audio_cache_path(source_path: str) -> str:
+    key = _cache_key(source_path)
+    return os.path.join(CACHE_DIR, "audio", f"{key}.wav")
+
+
+def get_status(source_path: str, quality: str) -> CacheStatus:
+    cp = cache_path(source_path, quality)
+    if os.path.isfile(cp):
+        return CacheStatus.READY
+    job_key = f"{source_path}:{quality}"
+    with _jobs_lock:
+        if job_key in _active_jobs and _active_jobs[job_key].is_alive():
+            return CacheStatus.TRANSCODING
+    return CacheStatus.MISSING
+
+
+def get_audio_status(source_path: str) -> CacheStatus:
+    ap = audio_cache_path(source_path)
+    if os.path.isfile(ap):
+        return CacheStatus.READY
+    job_key = f"{source_path}:audio"
+    with _jobs_lock:
+        if job_key in _active_jobs and _active_jobs[job_key].is_alive():
+            return CacheStatus.TRANSCODING
+    return CacheStatus.MISSING
+
+
+def get_all_statuses(source_path: str) -> dict:
+    result = {}
+    for q in QUALITY_PRESETS:
+        result[q] = get_status(source_path, q)
+    result["audio"] = get_audio_status(source_path)
+    return result
+
+
+def _transcode_worker(source_path: str, quality: str) -> None:
+    preset = QUALITY_PRESETS[quality]
+    out = cache_path(source_path, quality)
+    os.makedirs(os.path.dirname(out), exist_ok=True)
+    tmp = out + ".tmp.mp4"
+
+    cmd = [_bin("ffmpeg"), "-y", "-i", source_path, "-an"]
+
+    if preset["height"] > 0:
+        cmd += [
+            "-vf", f"scale=-2:{preset['height']}:flags=lanczos",
+        ]
+
+    cmd += [
+        "-c:v", "libx264",
+        "-preset", "fast",
+        "-b:v", preset["bitrate"],
+        "-movflags", "+faststart",
+        tmp,
+    ]
+
+    _log(f"Transcode start: {source_path} @ {quality}")
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=3600)
+        if result.returncode == 0:
+            os.rename(tmp, out)
+            _log(f"Transcode done: {out}")
+        else:
+            _log(f"Transcode failed: {result.stderr[-300:]}")
+            if os.path.exists(tmp):
+                os.unlink(tmp)
+    except Exception as e:
+        _log(f"Transcode error: {e}")
+        if os.path.exists(tmp):
+            os.unlink(tmp)
+
+
+def _audio_extract_worker(source_path: str) -> None:
+    out = audio_cache_path(source_path)
+    os.makedirs(os.path.dirname(out), exist_ok=True)
+    tmp = out + ".tmp.wav"
+
+    cmd = [
+        _bin("ffmpeg"), "-y",
+        "-i", source_path,
+        "-vn",
+        "-c:a", "pcm_s16le",
+        tmp,
+    ]
+
+    _log(f"Audio extract start: {source_path}")
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
+        if result.returncode == 0:
+            os.rename(tmp, out)
+            _log(f"Audio extract done: {out}")
+        else:
+            _log(f"Audio extract failed: {result.stderr[-300:]}")
+            if os.path.exists(tmp):
+                os.unlink(tmp)
+    except Exception as e:
+        _log(f"Audio extract error: {e}")
+        if os.path.exists(tmp):
+            os.unlink(tmp)
+
+
+def _prune_dead_jobs() -> None:
+    """Remove finished threads from _active_jobs. Must be called under _jobs_lock."""
+    dead = [k for k, t in _active_jobs.items() if not t.is_alive()]
+    for k in dead:
+        del _active_jobs[k]
+
+
+def ensure_transcode(source_path: str, quality: str) -> CacheStatus:
+    """Start transcode if not cached. Returns current status."""
+    status = get_status(source_path, quality)
+    if status != CacheStatus.MISSING:
+        return status
+
+    job_key = f"{source_path}:{quality}"
+    with _jobs_lock:
+        _prune_dead_jobs()
+        if job_key in _active_jobs and _active_jobs[job_key].is_alive():
+            return CacheStatus.TRANSCODING
+        t = threading.Thread(target=_transcode_worker, args=(source_path, quality), daemon=True)
+        _active_jobs[job_key] = t
+        t.start()
+    return CacheStatus.TRANSCODING
+
+
+def ensure_audio(source_path: str) -> CacheStatus:
+    """Start audio extraction if not cached. Returns current status."""
+    status = get_audio_status(source_path)
+    if status != CacheStatus.MISSING:
+        return status
+
+    job_key = f"{source_path}:audio"
+    with _jobs_lock:
+        _prune_dead_jobs()
+        if job_key in _active_jobs and _active_jobs[job_key].is_alive():
+            return CacheStatus.TRANSCODING
+        t = threading.Thread(target=_audio_extract_worker, args=(source_path,), daemon=True)
+        _active_jobs[job_key] = t
+        t.start()
+    return CacheStatus.TRANSCODING
@@ -0,0 +1,21 @@
+import os
+from pathlib import Path
+
+
+MEDIA_DIRS: list[str] = [
+    d.strip() for d in os.environ.get("MEDIA_DIRS", str(Path.home())).split(",") if d.strip()
+]
+EXPORT_DIR: str = os.environ.get("EXPORT_DIR", str(Path.home() / "8cut-exports"))
+DB_PATH: str = os.environ.get("DB_PATH", str(Path.home() / ".8cut.db"))
+CACHE_DIR: str = os.environ.get("CACHE_DIR", str(Path.home() / ".8cut-cache"))
+HOST: str = os.environ.get("HOST", "0.0.0.0")
+PORT: int = int(os.environ.get("PORT", "8000"))
+
+VIDEO_EXTENSIONS = {".mp4", ".mkv", ".avi", ".mov", ".webm", ".ts", ".flv", ".wmv"}
+
+QUALITY_PRESETS = {
+    "potato": {"height": 480, "bitrate": "500k"},
+    "low":    {"height": 720, "bitrate": "2M"},
+    "medium": {"height": 1080, "bitrate": "5M"},
+    "high":   {"height": 0, "bitrate": "10M"},  # 0 = original resolution
+}
@@ -0,0 +1,227 @@
+import os
+import re
+import shutil
+import threading
+import time
+import uuid
+
+from fastapi import APIRouter, HTTPException, Query
+from pydantic import BaseModel
+
+from core.export import ExportRunner
+from core.paths import build_export_path, build_sequence_dir
+from core.ffmpeg import _RATIOS, apply_keyframes_to_jobs
+from .. import ws as ws_module
+from ..config import EXPORT_DIR, MEDIA_DIRS
+
+router = APIRouter()
+
+_jobs: dict[str, dict] = {}
+_counter_lock = threading.Lock()
+
+_VALID_ENCODERS = {"libx264", "h264_nvenc", "h264_vaapi", "h264_qsv", "h264_amf", "h264_videotoolbox"}
+
+_MAX_FINISHED_JOBS = 200
+
+
+class CropKeyframe(BaseModel):
+    time: float
+    center: float
+    ratio: str | None = None
+    rand_portrait: bool = False
+    rand_square: bool = False
+
+
+class ExportRequest(BaseModel):
+    input_path: str
+    cursor: float
+    name: str
+    clips: int = 3
+    spread: float = 3.0
+    short_side: int | None = None
+    portrait_ratio: str | None = None
+    crop_center: float = 0.5
+    format: str = "MP4"
+    label: str = ""
+    category: str = ""
+    profile: str = "default"
+    folder_suffix: str = ""
+    crop_keyframes: list[CropKeyframe] | None = None
+    rand_portrait: bool = False
+    rand_square: bool = False
+    encoder: str = "libx264"
+
+
+def _next_counter(folder: str, basename: str) -> int:
+    """Scan folder for existing {basename}_NNN dirs and return max + 1."""
+    pattern = re.compile(rf'^{re.escape(basename)}_(\d{{3}})$')
+    highest = 0
+    if os.path.isdir(folder):
+        for entry in os.listdir(folder):
+            m = pattern.match(entry)
+            if m:
+                highest = max(highest, int(m.group(1)))
+    return highest + 1
+
+
+def _validate_input_path(path: str) -> str:
+    """Verify input_path falls under a configured MEDIA_DIR."""
+    real = os.path.realpath(path)
+    for root in MEDIA_DIRS:
+        root_real = os.path.realpath(root)
+        if real == root_real or real.startswith(root_real + os.sep):
+            return real
+    raise HTTPException(status_code=403, detail="input_path outside media directories")
+
+
+@router.post("/export")
+def start_export(req: ExportRequest):
+    from ..app import db
+
+    # Validate inputs
+    input_path = _validate_input_path(req.input_path)
+
+    if req.encoder not in _VALID_ENCODERS:
+        raise HTTPException(status_code=400, detail=f"invalid encoder: {req.encoder}")
+
+    if req.portrait_ratio is not None and req.portrait_ratio not in _RATIOS:
+        raise HTTPException(status_code=400, detail=f"invalid portrait_ratio: {req.portrait_ratio}")
+
+    if req.folder_suffix and ("/" in req.folder_suffix or "\\" in req.folder_suffix or ".." in req.folder_suffix):
+        raise HTTPException(status_code=400, detail="folder_suffix must not contain path separators")
+
+    if "/" in req.name or "\\" in req.name or ".." in req.name:
+        raise HTTPException(status_code=400, detail="name must not contain path separators")
+
+    job_id = str(uuid.uuid4())[:8]
+    folder = EXPORT_DIR
+    if req.folder_suffix:
+        folder = folder.rstrip(os.sep) + "_" + req.folder_suffix
+
+    image_sequence = req.format in ("WebP", "WebP sequence")
+
+    # Lock counter + directory creation to prevent race between concurrent exports
+    with _counter_lock:
+        counter = _next_counter(folder, req.name)
+        jobs = []
+        for i in range(req.clips):
+            start = req.cursor + i * req.spread
+            if image_sequence:
+                out = build_sequence_dir(folder, req.name, counter, sub=i if req.clips > 1 else None)
+            else:
+                out = build_export_path(folder, req.name, counter, sub=i if req.clips > 1 else None)
+            os.makedirs(os.path.dirname(out), exist_ok=True)
+            jobs.append((start, out, req.portrait_ratio, req.crop_center))
+
+    # Apply keyframes if provided — returns 6-tuples, strip back to 4
+    if req.crop_keyframes:
+        kf_tuples = [
+            (kf.time, kf.center, kf.ratio, kf.rand_portrait, kf.rand_square)
+            for kf in req.crop_keyframes
+        ]
+        widened = apply_keyframes_to_jobs(
+            jobs, kf_tuples,
+            req.crop_center, req.portrait_ratio,
+            req.rand_portrait, req.rand_square,
+        )
+        jobs = [(s, o, r, c) for s, o, r, c, _rp, _rs in widened]
+
+    completed = []
+
+    def on_clip_done(path: str):
+        completed.append(path)
+        # Record in DB so markers show up
+        db.add(
+            filename=os.path.basename(input_path),
+            start_time=req.cursor,
+            output_path=path,
+            label=req.label,
+            category=req.category,
+            short_side=req.short_side,
+            portrait_ratio=req.portrait_ratio or "",
+            crop_center=req.crop_center,
+            fmt=req.format,
+            clip_count=req.clips,
+            spread=req.spread,
+            profile=req.profile,
+        )
+        ws_module.broadcast({"type": "clip_done", "job_id": job_id, "path": path})
+
+    def on_all_done():
+        _jobs[job_id]["status"] = "done"
+        _jobs[job_id].pop("runner", None)
+        ws_module.broadcast({"type": "all_done", "job_id": job_id})
+
+    def on_error(msg: str):
+        _jobs[job_id]["status"] = "error"
+        _jobs[job_id]["error"] = msg
+        _jobs[job_id].pop("runner", None)
+        ws_module.broadcast({"type": "error", "job_id": job_id, "msg": msg})
+
+    runner = ExportRunner(
+        input_path=input_path,
+        jobs=jobs,
+        short_side=req.short_side,
+        image_sequence=image_sequence,
+        encoder=req.encoder,
+        on_clip_done=on_clip_done,
+        on_all_done=on_all_done,
+        on_error=on_error,
+    )
+
+    # Evict old finished jobs to prevent unbounded growth
+    finished = [k for k, v in _jobs.items() if v["status"] in ("done", "error")]
+    if len(finished) > _MAX_FINISHED_JOBS:
+        for k in finished[:len(finished) - _MAX_FINISHED_JOBS]:
+            del _jobs[k]
+
+    _jobs[job_id] = {
+        "status": "running",
+        "total": len(jobs),
+        "completed": completed,
+        "runner": runner,
+        "created_at": time.monotonic(),
+    }
+    runner.start()
+
+    return {"job_id": job_id}
+
+
+@router.get("/export/{job_id}")
+def get_export_status(job_id: str):
+    job = _jobs.get(job_id)
+    if job is None:
+        raise HTTPException(status_code=404, detail="job not found")
+    return {
+        "status": job["status"],
+        "total": job["total"],
+        "completed": len(job["completed"]),
+        "outputs": list(job["completed"]),
+        "error": job.get("error"),
+    }
+
+
+def _is_under_export_dir(real_path: str) -> bool:
+    """Check if path is under EXPORT_DIR or any EXPORT_DIR_suffix sibling."""
+    export_real = os.path.realpath(EXPORT_DIR).rstrip(os.sep)
+    # Walk up ancestors — must find EXPORT_DIR or EXPORT_DIR_suffix
+    d = os.path.dirname(real_path)
+    while d != os.path.dirname(d):
+        if d == export_real or d.startswith(export_real + "_"):
+            return True
+        d = os.path.dirname(d)
+    return False
+
+
+@router.delete("/export")
+def delete_export(output_path: str = Query(...)):
+    from ..app import db
+    real = os.path.realpath(output_path)
+    if not _is_under_export_dir(real):
+        raise HTTPException(status_code=403, detail="path outside export directory")
+    db.delete_by_output_path(real)
+    if os.path.isfile(real):
+        os.unlink(real)
+    elif os.path.isdir(real):
+        shutil.rmtree(real)
+    return {"deleted": real}
@@ -0,0 +1,56 @@
+import os
+
+from fastapi import APIRouter, HTTPException, Query
+from fastapi.responses import FileResponse
+
+from ..config import MEDIA_DIRS, VIDEO_EXTENSIONS
+
+router = APIRouter()
+
+
+def _scan_videos(root: str) -> list[dict]:
+    results = []
+    for dirpath, _, filenames in os.walk(root):
+        for f in sorted(filenames):
+            if os.path.splitext(f)[1].lower() in VIDEO_EXTENSIONS:
+                full = os.path.join(dirpath, f)
+                rel = os.path.relpath(full, root)
+                results.append({
+                    "name": f,
+                    "path": rel,
+                    "root": root,
+                    "size": os.path.getsize(full),
+                })
+    return results
+
+
+@router.get("/files")
+def list_files(root: str | None = Query(None)):
+    dirs = [root] if root and root in MEDIA_DIRS else MEDIA_DIRS
+    files = []
+    for d in dirs:
+        files.extend(_scan_videos(d))
+    return files
+
+
+@router.get("/roots")
+def list_roots():
+    return MEDIA_DIRS
+
+
+def _safe_resolve(path: str, root: str) -> str:
+    """Join path to root and verify it stays within the root directory."""
+    if root not in MEDIA_DIRS:
+        raise HTTPException(status_code=400, detail="invalid root")
+    full = os.path.realpath(os.path.join(root, path))
+    if not full.startswith(os.path.realpath(root) + os.sep):
+        raise HTTPException(status_code=403, detail="path outside media root")
+    return full
+
+
+@router.get("/video/{path:path}")
+def serve_video(path: str, root: str = Query(...)):
+    full = _safe_resolve(path, root)
+    if not os.path.isfile(full):
+        raise HTTPException(status_code=404, detail="not found")
+    return FileResponse(full, media_type="video/mp4")
@@ -0,0 +1,25 @@
+from fastapi import APIRouter, Query
+
+router = APIRouter()
+
+
+def _db():
+    from ..app import db
+    return db
+
+
+@router.post("/hidden/{filename}")
+def hide_file(filename: str, profile: str = Query("default")):
+    _db().hide_file(filename, profile)
+    return {"hidden": filename}
+
+
+@router.delete("/hidden/{filename}")
+def unhide_file(filename: str, profile: str = Query("default")):
+    _db().unhide_file(filename, profile)
+    return {"unhidden": filename}
+
+
+@router.get("/hidden")
+def get_hidden(profile: str = Query("default")):
+    return sorted(_db().get_hidden_files(profile))
@@ -0,0 +1,27 @@
+from fastapi import APIRouter, Query
+
+router = APIRouter()
+
+
+def _db():
+    from ..app import db
+    return db
+
+
+@router.get("/markers/{filename}")
+def get_markers(filename: str, profile: str = Query("default")):
+    markers = _db().get_markers(filename, profile)
+    return [
+        {"start_time": t, "marker_number": n, "output_path": p}
+        for t, n, p in markers
+    ]
+
+
+@router.get("/profiles")
+def get_profiles():
+    return _db().get_profiles()
+
+
+@router.get("/labels")
+def get_labels():
+    return _db().get_labels()
@@ -0,0 +1,49 @@
+import os
+
+from fastapi import APIRouter, HTTPException, Query
+from fastapi.responses import FileResponse, JSONResponse
+
+from ..config import MEDIA_DIRS, QUALITY_PRESETS
+from .. import cache
+
+router = APIRouter()
+
+
+def _resolve_source(path: str, root: str) -> str:
+    """Join path to root, verify it stays within root, and exists."""
+    if root not in MEDIA_DIRS:
+        raise HTTPException(status_code=400, detail="invalid root")
+    full = os.path.realpath(os.path.join(root, path))
+    if not full.startswith(os.path.realpath(root) + os.sep):
+        raise HTTPException(status_code=403, detail="path outside media root")
+    if not os.path.isfile(full):
+        raise HTTPException(status_code=404, detail="not found")
+    return full
+
+
+@router.get("/stream/{path:path}")
+def stream_video(path: str, root: str = Query(...), quality: str = Query("low")):
+    if quality not in QUALITY_PRESETS:
+        raise HTTPException(status_code=400, detail=f"invalid quality: {quality}")
+    source = _resolve_source(path, root)
+
+    status = cache.ensure_transcode(source, quality)
+    if status == cache.CacheStatus.READY:
+        return FileResponse(cache.cache_path(source, quality), media_type="video/mp4")
+    return JSONResponse({"status": status, "quality": quality}, status_code=202)
+
+
+@router.get("/audio/{path:path}")
+def stream_audio(path: str, root: str = Query(...)):
+    source = _resolve_source(path, root)
+
+    status = cache.ensure_audio(source)
+    if status == cache.CacheStatus.READY:
+        return FileResponse(cache.audio_cache_path(source), media_type="audio/wav")
+    return JSONResponse({"status": status}, status_code=202)
+
+
+@router.get("/cache/status/{path:path}")
+def cache_status(path: str, root: str = Query(...)):
+    source = _resolve_source(path, root)
+    return cache.get_all_statuses(source)
@@ -0,0 +1,43 @@
+import asyncio
+import json
+import threading
+
+from fastapi import WebSocket, WebSocketDisconnect
+
+_lock = threading.Lock()
+_connections: list[WebSocket] = []
+_loop: asyncio.AbstractEventLoop | None = None
+
+
+async def connect(ws: WebSocket):
+    global _loop
+    _loop = asyncio.get_running_loop()
+    await ws.accept()
+    with _lock:
+        _connections.append(ws)
+    try:
+        while True:
+            await ws.receive_text()  # keep alive
+    except (WebSocketDisconnect, Exception):
+        pass
+    finally:
+        with _lock:
+            if ws in _connections:
+                _connections.remove(ws)
+
+
+def broadcast(msg: dict):
+    """Send a message to all connected WebSocket clients.
+
+    Called from sync code (export callbacks running in background threads),
+    so we schedule sends on uvicorn's event loop.
+    """
+    if _loop is None:
+        return
+    data = json.dumps(msg)
+    with _lock:
+        for ws in list(_connections):
+            try:
+                asyncio.run_coroutine_threadsafe(ws.send_text(data), _loop)
+            except Exception:
+                pass
@@ -0,0 +1,108 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# ──────────────────────────────────────────────────────────────────────
+# 8-cut environment setup — supports conda (miniforge) or python venv
+#
+# Usage:
+#   ./setup_env.sh              # auto-detect (prefers conda if available)
+#   ./setup_env.sh --conda      # force conda
+#   ./setup_env.sh --venv       # force python venv
+# ─��────────────────────────────��───────────────────────────────────────
+
+ENV_NAME="8cut"
+PYTHON_VERSION="3.12"
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+VENV_DIR="$SCRIPT_DIR/.venv"
+
+# CUDA version for PyTorch index URL
+TORCH_INDEX="https://download.pytorch.org/whl/cu128"
+
+# ── Parse args ────────────────────────────────────────────────────────
+
+MODE=""
+for arg in "$@"; do
+    case "$arg" in
+        --conda) MODE="conda" ;;
+        --venv)  MODE="venv"  ;;
+        *)       echo "Unknown arg: $arg"; exit 1 ;;
+    esac
+done
+
+if [ -z "$MODE" ]; then
+    if command -v conda &>/dev/null; then
+        MODE="conda"
+    else
+        MODE="venv"
+    fi
+    echo "Auto-detected mode: $MODE"
+fi
+
+# ── Conda setup ─────────────��─────────────────────────────────────────
+
+setup_conda() {
+    echo "==> Setting up conda environment: $ENV_NAME"
+
+    # Source conda shell hooks if not already active
+    if ! command -v conda &>/dev/null; then
+        echo "conda not found in PATH"
+        exit 1
+    fi
+    eval "$(conda shell.bash hook)"
+
+    if conda env list | grep -qw "$ENV_NAME"; then
+        echo "  Environment '$ENV_NAME' already exists, updating..."
+        conda activate "$ENV_NAME"
+    else
+        echo "  Creating environment '$ENV_NAME' with Python $PYTHON_VERSION..."
+        conda create -y -n "$ENV_NAME" python="$PYTHON_VERSION"
+        conda activate "$ENV_NAME"
+    fi
+
+    echo "  Installing PyTorch + torchaudio (CUDA 12.8)..."
+    pip install torch torchaudio --index-url "$TORCH_INDEX"
+
+    echo "  Installing project dependencies..."
+    pip install -r "$SCRIPT_DIR/requirements.txt"
+
+    echo ""
+    echo "Done! Activate with:"
+    echo "  conda activate $ENV_NAME"
+}
+
+# ── Venv setup ───────��────────────────────────────────────────────────
+
+setup_venv() {
+    echo "==> Setting up Python venv at: $VENV_DIR"
+
+    if [ ! -d "$VENV_DIR" ]; then
+        python3 -m venv "$VENV_DIR"
+        echo "  Created venv"
+    else
+        echo "  Venv already exists, updating..."
+    fi
+
+    source "$VENV_DIR/bin/activate"
+
+    echo "  Installing PyTorch + torchaudio (CUDA 12.8)..."
+    pip install torch torchaudio --index-url "$TORCH_INDEX"
+
+    echo "  Installing project dependencies..."
+    pip install -r "$SCRIPT_DIR/requirements.txt"
+
+    echo ""
+    echo "Done! Activate with:"
+    echo "  source $VENV_DIR/bin/activate"
+}
+
+# ── Run ───────────────────────────────────────────────────────────────
+
+case "$MODE" in
+    conda) setup_conda ;;
+    venv)  setup_venv  ;;
+esac
+
+echo ""
+echo "Verify with:"
+echo "  python -c \"import torch; print('PyTorch', torch.__version__, 'CUDA', torch.version.cuda)\""
+echo "  python -c \"import librosa, torchaudio, sklearn; print('All imports OK')\""
@@ -0,0 +1,40 @@
+import tempfile, os
+import numpy as np
+from core.audio_scan import scan_video, load_classifier, default_model_path
+
+
+def test_scan_video_no_model_returns_empty():
+    """scan_video with no model should return empty list."""
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as vid:
+        import soundfile as sf
+        sf.write(vid.name, np.random.randn(16000 * 20).astype(np.float32) * 0.1, 16000)
+    try:
+        regions = scan_video(vid.name, model=None)
+        assert regions == []
+    finally:
+        os.unlink(vid.name)
+
+
+def test_load_classifier_missing_returns_none():
+    assert load_classifier("/no/such/model.joblib") is None
+
+
+def test_default_model_path_contains_profile():
+    path = default_model_path("test_profile")
+    assert "test_profile" in path
+    assert path.endswith(".joblib")
+
+
+def test_db_get_all_export_paths():
+    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
+        path = f.name
+    try:
+        from core.db import ProcessedDB
+        db = ProcessedDB(path)
+        db.add("a.mp4", 10.0, "/out/a_001.mp4", profile="test")
+        db.add("b.mp4", 20.0, "/out/b_001.mp4", profile="test")
+        db.add("c.mp4", 30.0, "/out/c_001.mp4", profile="other")
+        paths = db.get_all_export_paths("test")
+        assert set(paths) == {"/out/a_001.mp4", "/out/b_001.mp4"}
+    finally:
+        os.unlink(path)
Author	SHA1	Message	Date
Ethanfel	12ed183f1b	feat: integrate training UI, BEATs model, and clean up legacy code - Remove legacy distance-mode scanning (build_profile, _similarity, etc.) and hand-crafted intensity features — pipeline is now embedding-only - Integrate Microsoft BEATs as embedding option alongside wav2vec2/HuBERT - Add TrainDialog with positive class selector, model picker, video dir fallback, and live training stats - Add TrainWorker QThread with cancel support and proper lifecycle cleanup - Add source_path column to DB for robust source video tracking - Add get_export_folders/get_training_data/get_training_stats to DB - Wire source_path in all export DB writes (_on_clip_done, _on_auto_clip_done) - Cancel scan/train workers in closeEvent to prevent use-after-free crashes - Add setup_env.sh supporting both conda and python venv (CUDA 12.8) - Update requirements.txt with all actual dependencies - Update 8cut_train.py with --positive flag for new DB-driven training Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-18 11:52:27 +02:00
Ethanfel	f2c38aee79	feat: rewrite audio scan with MFCC+delta+spectral contrast pipeline Root cause of poor discrimination: MFCC[0] (energy) dominated the feature vector, making cosine similarity see all audio as similar. Changes: - Skip MFCC[0], use 12 coefficients instead of 20 - Add delta MFCCs for temporal dynamics - Add 7-band spectral contrast for tonal vs noise quality - Switch from cosine similarity to euclidean-distance-based score - Pre-compute STFT once for whole file (10-20x faster) - Vectorized sliding window via cumulative sums (no Python loop) - Lower sample rate 22050→16000 Hz (faster, no quality loss) - 62-dim feature vector (was 40-dim mean+std of raw MFCCs) - Default threshold 0.05 (new similarity scale) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 15:28:44 +02:00
Ethanfel	8ab5bdba77	fix: use mean+std MFCC vectors (40-dim) for better discrimination Mean-only vectors were too similar across different audio segments, causing everything to match even at threshold 0.99. Adding std captures temporal dynamics and makes the similarity scores much more spread out. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 09:27:11 +02:00
Ethanfel	c6c5934fe8	fix: threshold step 0.05 → 0.01 for finer control Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 09:21:14 +02:00
Ethanfel	73d5367424	fix: three audio scan bugs — signal shadow, re-entrancy, S-key jump 1. Rename ScanWorker.finished → scan_done to stop shadowing QThread.finished. Previously, cancelled scans leaked the QThread because the custom signal was never emitted. 2. Block signals on combobox reset in _on_scan_ref_changed to prevent re-entrant call when user cancels folder dialog. 3. Merge overlapping scan regions into clusters before S-key navigation so it jumps to the next distinct match, not 1s forward through overlapping windows. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 09:12:24 +02:00
Ethanfel	1e2cebd424	fix: prevent deleteLater on still-running ScanWorker QThread When cancelling a scan during file change, connect finished signal to deleteLater instead of calling it immediately on a running thread. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 09:02:35 +02:00
Ethanfel	c439aca9b9	feat: add S shortcut and clear scan on file change Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 08:59:47 +02:00
Ethanfel	afda9b2d9f	feat: add scan UI controls and start_scan handler Add Scan button, threshold spinner, mode combobox, and reference source combobox to the settings row. Implement handler methods for starting scans, handling results/errors, cleanup of workers, and reference folder selection. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 08:57:56 +02:00
Ethanfel	fd42791c9f	feat: add get_all_export_paths to ProcessedDB Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 08:55:39 +02:00
Ethanfel	4cf54f2642	feat: add ScanWorker QThread for background scanning Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 08:54:20 +02:00
Ethanfel	e7f4de9ec1	feat: timeline scan region rendering Add scan region storage and rendering to TimelineWidget: - _scan_regions list in __init__ for (start, end, score) tuples - set_scan_regions() and clear_scan_regions() methods - paintEvent draws semi-transparent blue rectangles with score-based opacity Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 08:53:18 +02:00
Ethanfel	9cf9e3233f	feat: add scan_video with average and nearest modes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 08:50:47 +02:00
Ethanfel	e17d8f67aa	feat: add audio_scan module with build_profile Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 08:48:18 +02:00
Ethanfel	b1980de6d1	fix: 9 bugs in audio scan implementation plan - Swap Task 5/6 order so get_all_export_paths exists before UI uses it - Remove cosine similarity clamping to preserve anti-correlation signal - Use os.path.exists instead of os.path.isfile (handles image sequences) - Add worker cleanup to disconnect stale signals before new scan - Remove lock from get_all_export_paths (matches read-only convention) - Always use get_all_export_paths for Current Profile (not current-file-first) - Filter export paths with os.path.exists for deleted files - Use abs() for float comparison in tests instead of == - Add cancel_flag to ScanWorker and scan_video for interruptible scans Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 08:43:53 +02:00
Ethanfel	85e0641440	docs: add audio scan implementation plan Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 08:36:56 +02:00
Ethanfel	834b89b682	docs: add audio similarity scanning design Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 08:33:25 +02:00
Ethanfel	a67e189aa0	fix: mpv loadfile index arg, cache polling, and sidebar CSS - Pass integer index (-1) to mpv loadfile command for newer mpv versions - Poll /api/cache/status instead of streaming endpoints to avoid downloading video bodies during readiness checks - Cancel previous polling when selecting a new file - Fix sidebar flex-shrink and file name text overflow Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-17 08:17:23 +02:00
Ethanfel	2b6c56cd15	fix: add CORS middleware to server for Tauri webview requests Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 20:49:53 +02:00
Ethanfel	0f6082061f	feat: add folder navigation to file browser Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 20:25:03 +02:00
Ethanfel	9662b815db	feat: add server URL input to profile bar Type URL + Enter or click Set. Persisted via localStorage. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 20:15:42 +02:00
Ethanfel	9776b83ac5	fix: client bug fixes from review - FileBrowser: reload hidden files when profile changes - WebSocket: wrap JSON.parse in try-catch - WebSocket: exponential backoff on reconnect (2s -> 30s max) - WebSocket: clean up connection on destroy Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 20:09:21 +02:00
Ethanfel	39f873bec2	fix: server bug fixes from review - DB: add threading.Lock on all write methods and multi-step reads - export.py: check audio extraction return code, raise on failure - routes/export: counter race condition fix with _counter_lock - routes/export: delete validation accepts EXPORT_DIR_suffix siblings - routes/export: evict old finished jobs to prevent unbounded growth - client plan: fix 10 bugs (mpv IPC, encodePath, input_path sep, etc.) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 19:53:38 +02:00
Ethanfel	409eb82e5c	feat: configure Linux packaging (deb + AppImage) Renamed to 8-cut, 1200x800 window, .deb builds at 3.9MB. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 19:52:40 +02:00
Ethanfel	297aafa51c	feat: add settings persistence via localStorage Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 19:50:01 +02:00
Ethanfel	b4cf972d59	feat: wire up main app layout with all components Sidebar file browser, canvas timeline, transport bar, export panel, profile bar, keyboard shortcuts, quality-reactive stream reload. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 19:48:54 +02:00
Ethanfel	5cc1e52e75	feat: add profile bar component Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 19:10:32 +02:00
Ethanfel	6bf0b0ae99	feat: add export panel component Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 19:10:03 +02:00
Ethanfel	b6fbda01dd	feat: add canvas-based timeline component Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 19:07:21 +02:00
Ethanfel	51d41f0a56	feat: add file browser component Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 19:06:41 +02:00
Ethanfel	16bd1a9ae0	feat: add mpv TypeScript bridge Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 18:46:35 +02:00
Ethanfel	2036c49b52	feat: add mpv sidecar IPC and Tauri commands Persistent BufReader + request_id matching for correct event handling. Audio-file passed during loadfile for frame-accurate sync. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 18:46:01 +02:00
Ethanfel	b12758c53c	feat: add WebSocket client for export progress Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 18:41:12 +02:00
Ethanfel	3d484952c2	feat: add Svelte stores for app state Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 18:40:43 +02:00
Ethanfel	12dae93671	feat: add server API client module Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 18:34:23 +02:00
Ethanfel	1e65fd6b0f	feat: scaffold Tauri + Svelte client SvelteKit in SPA mode with Tauri v2. Builds and produces .deb bundle. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 18:33:45 +02:00
Ethanfel	f7756320e5	docs: add Tauri + Svelte client implementation plan 15-task plan covering Rust install, Tauri scaffold, mpv sidecar, API client, stores, UI components, keyboard shortcuts, and packaging. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 17:02:56 +02:00
Ethanfel	cd0331d4ce	docs: add Tauri + Svelte client design Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 16:57:29 +02:00
Ethanfel	38c6174f83	ci: disable auto Docker build, manual dispatch only Build locally and push to ghcr.io instead — nvidia/cuda base is too large for GitHub runners. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 15:23:32 +02:00
Ethanfel	5b22bceed2	ci: add GitHub Actions workflow to build Docker image Docker Image / build (push) Has been cancelled Details Triggers on pushes to server branch or version tags when core/, server/, or Docker files change. Pushes to ghcr.io. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 14:48:22 +02:00
Ethanfel	80f21915e3	feat: switch to nvidia/cuda base image for NVENC hw encoding - Base: nvidia/cuda:12.6.3-runtime-ubuntu24.04 - ffmpeg from apt has NVENC support when GPU runtime is available - docker-compose reserves all GPUs via deploy.resources Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 14:47:19 +02:00
Ethanfel	b09ba3fa9e	fix: third-pass review bugs - Switch DELETE /export to query param (path param strips leading /) - Add CropKeyframe Pydantic model for typed keyframe validation - Convert keyframes to tuples before passing to apply_keyframes_to_jobs - Remove dead QFrame import from main.py Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 14:20:20 +02:00
Ethanfel	5b7a55a05d	fix: second-pass review bugs in server and core - ExportRunner: stop batch on first error (was continuing, overwriting error status with done) - Export route: validate input_path against MEDIA_DIRS - Export route: validate encoder, portrait_ratio, folder_suffix, name - Export route: fix format check for WebP sequence - Export route: add _ separator in folder_suffix (match GUI) - Export route: use realpath consistently in delete endpoint - Export route: drop runner ref on completion (prevent memory leak) - ProcessedDB: use cursor-level row_factory (thread-safe) - WebSocket: catch all exceptions in connect, cleanup in finally - Dockerfile: use uvicorn[standard] for websockets support Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 14:10:27 +02:00
Ethanfel	2200da491f	fix: address review bugs in server implementation - Fix keyframe 6-tuple → 4-tuple mismatch crashing ExportRunner - Fix ws.broadcast() using wrong event loop from background threads - Fix export counter hardcoded to 1, now auto-increments - Add path traversal protection to file/stream/delete endpoints - Use proper HTTP error codes (was returning 200 for errors) - Add thread safety to WebSocket connection list - Record exports to DB so markers appear - Move WS endpoint to /ws/export (was /api/ws/export) - Prune dead threads from cache job tracker Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:55:25 +02:00
Ethanfel	3d6469c60c	feat: add Dockerfile and docker-compose for server deployment Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:49:43 +02:00
Ethanfel	6a4ac8b8ed	feat: add hidden files API endpoints Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:49:30 +02:00
Ethanfel	1f6906c946	feat: add export endpoint with WebSocket progress Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:49:16 +02:00
Ethanfel	dfba88a601	feat: add markers/profiles/labels API endpoints Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:48:33 +02:00
Ethanfel	e94c088df0	feat: add video streaming with transcode cache and audio extraction Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:48:06 +02:00
Ethanfel	9569103edd	feat: add FastAPI app with file listing endpoint Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:47:23 +02:00
Ethanfel	079afeee7c	feat: create server/config with env var settings and quality presets Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:44:31 +02:00
Ethanfel	fbbfa6fdce	refactor: import shared logic from core/ instead of inline definitions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:43:44 +02:00
Ethanfel	56920a5247	feat: create core/tracking module with YOLO subject tracking Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:39:52 +02:00
Ethanfel	08c1dd8b33	feat: create core/export module with ExportRunner Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:39:24 +02:00
Ethanfel	2b63ad1857	feat: create core/annotations module Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:38:47 +02:00
Ethanfel	72f6a4e8f5	feat: create core/db module with ProcessedDB Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:38:20 +02:00
Ethanfel	799a2ab353	feat: create core/ffmpeg module with ffmpeg helpers Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:34:59 +02:00
Ethanfel	066f4431ba	feat: create core/paths module with shared path helpers Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:34:17 +02:00
Ethanfel	97f9ef7073	fix: correct bugs in server API implementation plan - Fix line range 38→36 for _frozen_path extraction - Clarify line ranges for ffmpeg vs annotation functions - Remove unused imports (_frozen_path, build_annotation_json_path) from main.py import list - Add step to clean up dead stdlib imports (re, json, sqlite3, tempfile, datetime) - Add explicit stub router code for stream, markers, export, hidden - Add server/__init__.py and server/routes/__init__.py content - Add _DBWorker and FrameGrabber to "keep in main.py" list - Note optional tracking deps in Dockerfile Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:33:36 +02:00
Ethanfel	592e40c1a6	docs: add server API implementation plan Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:24:03 +02:00
Ethanfel	73dd7a1569	docs: add server API design for remote editing via Tauri client Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:20:13 +02:00
Ethanfel	7abf0b4d4c	feat: autoclip, play/pause improvements, number key exports, focus fix - Autoclip (A): adjusts clip count to fit current pause position - Pause no longer resets playback position — stays where paused - Play resumes from pause point instead of restarting - Spread/clips changes update loop end without restarting playback - Number keys 1-9 export to subprofiles - Click-away clears focus from spinboxes so hotkeys work again - Lock mode: double-click marker jumps cursor to end of clip span Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-16 13:19:21 +02:00
Ethanfel	9e5bd4a8ec	feat: add subprofiles, live play loop update, fix lock mode scrub - Subprofiles: lightweight export variants that append a suffix to the export folder (e.g. _soft, _intense). Each gets its own export button in the transport row. Managed via "+" menu, persisted in QSettings. - Play loop now updates immediately when spread/clips spinboxes change. - Lock mode: ignore stale mpv position updates while user is dragging to prevent the play position from jumping back. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-15 23:58:26 +02:00
				`@@ -0,0 +1 @@`
				<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="26.6" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 308"><path fill="#FF3E00" d="M239.682 40.707C211.113-.182 154.69-12.301 113.895 13.69L42.247 59.356a82.198 82.198 0 0 0-37.135 55.056a86.566 86.566 0 0 0 8.536 55.576a82.425 82.425 0 0 0-12.296 30.719a87.596 87.596 0 0 0 14.964 66.244c28.574 40.893 84.997 53.007 125.787 27.016l71.648-45.664a82.182 82.182 0 0 0 37.135-55.057a86.601 86.601 0 0 0-8.53-55.577a82.409 82.409 0 0 0 12.29-30.718a87.573 87.573 0 0 0-14.963-66.244"></path><path fill="#FFF" d="M106.889 270.841c-23.102 6.007-47.497-3.036-61.103-22.648a52.685 52.685 0 0 1-9.003-39.85a49.978 49.978 0 0 1 1.713-6.693l1.35-4.115l3.671 2.697a92.447 92.447 0 0 0 28.036 14.007l2.663.808l-.245 2.659a16.067 16.067 0 0 0 2.89 10.656a17.143 17.143 0 0 0 18.397 6.828a15.786 15.786 0 0 0 4.403-1.935l71.67-45.672a14.922 14.922 0 0 0 6.734-9.977a15.923 15.923 0 0 0-2.713-12.011a17.156 17.156 0 0 0-18.404-6.832a15.78 15.78 0 0 0-4.396 1.933l-27.35 17.434a52.298 52.298 0 0 1-14.553 6.391c-23.101 6.007-47.497-3.036-61.101-22.649a52.681 52.681 0 0 1-9.004-39.849a49.428 49.428 0 0 1 22.34-33.114l71.664-45.677a52.218 52.218 0 0 1 14.563-6.398c23.101-6.007 47.497 3.036 61.101 22.648a52.685 52.685 0 0 1 9.004 39.85a50.559 50.559 0 0 1-1.713 6.692l-1.35 4.116l-3.67-2.693a92.373 92.373 0 0 0-28.037-14.013l-2.664-.809l.246-2.658a16.099 16.099 0 0 0-2.89-10.656a17.143 17.143 0 0 0-18.398-6.828a15.786 15.786 0 0 0-4.402 1.935l-71.67 45.674a14.898 14.898 0 0 0-6.73 9.975a15.9 15.9 0 0 0 2.709 12.012a17.156 17.156 0 0 0 18.404 6.832a15.841 15.841 0 0 0 4.402-1.935l27.345-17.427a52.147 52.147 0 0 1 14.552-6.397c23.101-6.006 47.497 3.037 61.102 22.65a52.681 52.681 0 0 1 9.003 39.848a49.453 49.453 0 0 1-22.34 33.12l-71.664 45.673a52.218 52.218 0 0 1-14.563 6.398"></path></svg>