perf: run waveform ffmpeg at low priority so it yields to mpv on load

The first load of a file decodes the whole audio track in a background thread; nice'ing it (os.nice(15)) reduces disk/CPU contention with mpv during the initial open. Result is cached, so subsequent loads are fast. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
feat: flag playlist files missing from disk (⚠ orange strikethrough)
2026-06-06 12:14:04 +02:00 · 2026-06-06 12:12:03 +02:00 · 2026-06-05 14:19:49 +02:00 · 2026-06-05 14:14:42 +02:00 · 2026-06-05 14:11:11 +02:00 · 2026-06-05 14:08:20 +02:00
5 changed files with 2707 additions and 429 deletions
@@ -3,6 +3,7 @@
 SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
 ENV_NAME="8cut"
 CONDA_PREFIX_BASE="/media/p5/miniforge3"
 export LD_PRELOAD=/usr/lib/libstdc++.so.6
 # 1. Try .venv in project dir
 if [ -f "$SCRIPT_DIR/.venv/bin/activate" ]; then
@@ -67,7 +67,7 @@ _EMBED_MODELS = {
    "EAT":                 768,
    "EAT_LARGE":          1024,
 }
-_DEFAULT_EMBED_MODEL = "WAV2VEC2_BASE"
+_DEFAULT_EMBED_MODEL = "EAT_LARGE"
 _BEATS_CHECKPOINT = os.path.join(
    _DL_CACHE_DIR, "huggingface", "hub",
@@ -674,9 +674,11 @@ def restore_model_version(version_path: str, profile_name: str = "default",
 def list_trained_models(profile_name: str = "default") -> list[str]:
-    """Return embedding model names that have a trained .joblib for *profile_name*.
+    """Return embedding model keys that have a trained .joblib for *profile_name*.
-    Looks for files matching ``{profile}_{MODEL}.joblib`` in the models dir.
+    Looks for files matching ``{profile}_{KEY}.joblib`` in the models dir.
    KEY is either a bare embed model name (e.g. ``EAT_LARGE``) or
    ``{MODEL}_{name}`` for user-named variants.
    """
    prefix = f"{profile_name}_"
    suffix = ".joblib"
@@ -685,13 +687,17 @@ def list_trained_models(profile_name: str = "default") -> list[str]:
        return result
    for fname in os.listdir(_MODEL_DIR):
        if fname.startswith(prefix) and fname.endswith(suffix):
-            model_name = fname[len(prefix):-len(suffix)]
+            key = fname[len(prefix):-len(suffix)]
-            if model_name in _EMBED_MODELS:
+            if key in _EMBED_MODELS:
-                result.append(model_name)
+                result.append(key)
            else:
                for m in _EMBED_MODELS:
                    if key.startswith(m + "_"):
                        result.append(key)
                        break
    # Also check legacy {profile}.joblib
    legacy = os.path.join(_MODEL_DIR, f"{profile_name}.joblib")
    if os.path.exists(legacy) and not result:
        # Legacy model — we don't know the embed model, but it's usable
        result.append("")
    return sorted(result)
@@ -1,4 +1,5 @@
 import os
 import re
 import sqlite3
 import threading
 from datetime import datetime, timezone
@@ -7,6 +8,12 @@ from pathlib import Path
 from .paths import _log
 def _extract_m_number(output_path: str) -> int | None:
    """Extract the manual export number from a path like clip_001_m3_0.mp4."""
    m = re.search(r'_m(\d+)[_.]', os.path.basename(output_path))
    return int(m.group(1)) if m else None
 class ProcessedDB:
    _SCHEMA_VERSION = 4  # bump when schema changes
@@ -46,6 +53,7 @@ class ProcessedDB:
                "  crop_center     REAL    NOT NULL DEFAULT 0.5,"
                "  format          TEXT    NOT NULL DEFAULT 'MP4',"
                "  clip_count      INTEGER NOT NULL DEFAULT 3,"
                "  clip_duration   REAL    NOT NULL DEFAULT 8.0,"
                "  spread          REAL    NOT NULL DEFAULT 3.0,"
                "  profile         TEXT    NOT NULL DEFAULT 'default',"
                "  source_path     TEXT    NOT NULL DEFAULT '',"
@@ -63,6 +71,7 @@ class ProcessedDB:
                "crop_center":    "REAL NOT NULL DEFAULT 0.5",
                "format":         "TEXT NOT NULL DEFAULT 'MP4'",
                "clip_count":     "INTEGER NOT NULL DEFAULT 3",
                "clip_duration":  "REAL NOT NULL DEFAULT 8.0",
                "spread":         "REAL NOT NULL DEFAULT 3.0",
                "profile":        "TEXT NOT NULL DEFAULT 'default'",
                "source_path":    "TEXT NOT NULL DEFAULT ''",
@@ -232,7 +241,8 @@ class ProcessedDB:
            label: str = "", category: str = "",
            short_side: int | None = None, portrait_ratio: str = "",
            crop_center: float = 0.5, fmt: str = "MP4",
-            clip_count: int = 3, spread: float = 3.0,
+            clip_count: int = 3, clip_duration: float = 8.0,
            spread: float = 3.0,
            profile: str = "default", source_path: str = "",
            scan_export: bool = False) -> None:
        if not self._enabled:
@@ -242,16 +252,60 @@ class ProcessedDB:
                "INSERT INTO processed"
                " (filename, start_time, output_path, label, category,"
                "  short_side, portrait_ratio, crop_center, format,"
-                "  clip_count, spread, profile, source_path, scan_export, processed_at)"
+                "  clip_count, clip_duration, spread, profile, source_path,"
-                " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+                "  scan_export, processed_at)"
                " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
                (filename, start_time, output_path, label, category,
                 short_side, portrait_ratio, crop_center, fmt,
-                 clip_count, spread, profile, source_path,
+                 clip_count, clip_duration, spread, profile, source_path,
                 1 if scan_export else 0,
                 datetime.now(timezone.utc).isoformat()),
            )
            self._con.commit()
    def update_source_paths(self, new_dir: str,
                            playlist_paths: list[str] | None = None,
                            profile: str = "") -> int:
        """Re-resolve source_path for all rows whose current path is missing.
        Checks *new_dir* and *playlist_paths* by filename match.
        Returns the number of rows updated.
        """
        if not self._enabled:
            return 0
        lookup: dict[str, str] = {}
        if playlist_paths:
            for p in playlist_paths:
                lookup[os.path.basename(p)] = p
        if new_dir and os.path.isdir(new_dir):
            for f in os.listdir(new_dir):
                fp = os.path.join(new_dir, f)
                if os.path.isfile(fp):
                    lookup[f] = fp
        if not lookup:
            return 0
        query = "SELECT DISTINCT filename, source_path FROM processed"
        params: tuple = ()
        if profile:
            query += " WHERE profile = ?"
            params = (profile,)
        rows = self._con.execute(query, params).fetchall()
        updated = 0
        with self._lock:
            for fn, sp in rows:
                if sp and os.path.exists(sp):
                    continue
                new_path = lookup.get(fn)
                if new_path and os.path.isfile(new_path):
                    self._con.execute(
                        "UPDATE processed SET source_path = ? WHERE filename = ?",
                        (new_path, fn),
                    )
                    updated += 1
            if updated:
                self._con.commit()
        return updated
    def get_labels(self) -> list[str]:
        """Return distinct non-empty labels ordered by most recently used."""
        if not self._enabled:
@@ -278,19 +332,37 @@ class ProcessedDB:
        cur.row_factory = sqlite3.Row
        row = cur.execute(
            "SELECT label, category, short_side, portrait_ratio, crop_center, format,"
-            " clip_count, spread"
+            " clip_count, clip_duration, spread"
            " FROM processed WHERE output_path = ?",
            (output_path,),
        ).fetchone()
        return dict(row) if row else None
-    def delete_by_output_path(self, output_path: str) -> None:
+    def delete_by_output_path(self, output_path: str, profile: str = "") -> None:
        if not self._enabled:
            return
        with self._lock:
-            self._con.execute("DELETE FROM processed WHERE output_path = ?", (output_path,))
+            if profile:
                self._con.execute(
                    "DELETE FROM processed WHERE output_path = ? AND profile = ?",
                    (output_path, profile),
                )
            else:
                self._con.execute(
                    "DELETE FROM processed WHERE output_path = ?", (output_path,),
                )
            self._con.commit()
    def is_path_used_by_other_profiles(self, output_path: str, profile: str) -> bool:
        """Return True if *output_path* is referenced by any profile other than *profile*."""
        if not self._enabled:
            return False
        row = self._con.execute(
            "SELECT 1 FROM processed WHERE output_path = ? AND profile != ? LIMIT 1",
            (output_path, profile),
        ).fetchone()
        return row is not None
    def get_group(self, output_path: str, profile: str = "") -> list[str]:
        """Return all output_paths sharing the same (filename, start_time, profile) as *output_path*."""
        if not self._enabled:
@@ -336,29 +408,120 @@ class ProcessedDB:
            self._con.commit()
            return paths
-    def _get_markers_for(self, match: str, profile: str = "default") -> list[tuple[float, int, str]]:
+    def _get_markers_for(self, match: str, profile: str = "default",
                         export_folder: str = "") -> list[tuple[float, int, str, float]]:
        if export_folder:
            rows = self._con.execute(
-            "SELECT start_time, output_path FROM processed"
+                "SELECT start_time, output_path, clip_duration, clip_count, spread"
                " FROM processed"
                " WHERE filename = ? AND profile = ? AND scan_export = 0"
                "   AND output_path LIKE ?"
                " ORDER BY start_time",
                (match, profile, export_folder.rstrip("/") + "/%"),
            ).fetchall()
        else:
            rows = self._con.execute(
                "SELECT start_time, output_path, clip_duration, clip_count, spread"
                " FROM processed"
                " WHERE filename = ? AND profile = ? AND scan_export = 0"
                " ORDER BY start_time",
                (match, profile),
            ).fetchall()
-        # Deduplicate by start_time — batch exports share the same cursor.
+        seen_times: dict[float, tuple[float, int, str, float]] = {}
-        seen_times: dict[float, tuple[float, int, str]] = {}
+        seq = 0
-        n = 0
+        for t, p, dur, cnt, spr in rows:
        for t, p in rows:
            if t not in seen_times:
-                n += 1
+                seq += 1
-                seen_times[t] = (t, n, p)
+                num = _extract_m_number(p) or seq
                span = (dur or 8.0) + ((cnt or 1) - 1) * (spr or 3.0)
                seen_times[t] = (t, num, p, span)
        return list(seen_times.values())
-    def get_markers(self, filename: str, profile: str = "default") -> list[tuple[float, int, str]]:
+    def get_markers(self, filename: str, profile: str = "default",
-        """Return [(start_time, marker_number, output_path), ...] for exact
+                    export_folder: str = "") -> list[tuple[float, int, str, float]]:
-        filename match, sorted by start_time. Empty list if no match.
+        """Return [(start_time, marker_number, output_path, clip_span), ...]
-        Excludes scan exports (shown via scan panel instead)."""
+        for exact filename match, sorted by start_time. Empty list if no match.
        Excludes scan exports (shown via scan panel instead).
        If export_folder is set, only markers in that folder are returned."""
        if not self._enabled:
            return []
-        return self._get_markers_for(filename, profile)
+        return self._get_markers_for(filename, profile, export_folder)
    def get_other_folder_markers(self, filename: str, profile: str = "default",
                                export_folder: str = ""
                                ) -> dict[str, list[tuple[float, int, str, float]]]:
        """Return {folder_name: [(start_time, num, path, span), ...]} for
        markers NOT in export_folder, grouped by their base export folder."""
        if not self._enabled or not export_folder:
            return {}
        rows = self._con.execute(
            "SELECT start_time, output_path, clip_duration, clip_count, spread"
            " FROM processed"
            " WHERE filename = ? AND profile = ? AND scan_export = 0"
            "   AND output_path NOT LIKE ?"
            " ORDER BY start_time",
            (filename, profile, export_folder.rstrip("/") + "/%"),
        ).fetchall()
        by_folder: dict[str, list] = {}
        for t, p, dur, cnt, spr in rows:
            parts = p.split("/")
            for i, part in enumerate(parts):
                if part.startswith("vid_"):
                    folder = "/".join(parts[:i])
                    break
            else:
                folder = os.path.dirname(os.path.dirname(p))
            by_folder.setdefault(folder, []).append((t, p, dur, cnt, spr))
        result: dict[str, list[tuple[float, int, str, float]]] = {}
        for folder, folder_rows in by_folder.items():
            seen: dict[float, tuple[float, int, str, float]] = {}
            seq = 0
            for t, p, dur, cnt, spr in folder_rows:
                if t not in seen:
                    seq += 1
                    num = _extract_m_number(p) or seq
                    span = (dur or 8.0) + ((cnt or 1) - 1) * (spr or 3.0)
                    seen[t] = (t, num, p, span)
            name = os.path.basename(folder)
            if name.endswith("_disabled"):
                continue  # disabled clips are excluded from the timeline
            result[name] = list(seen.values())
        return result
    def get_manual_export_groups(self, filename: str, profile: str = "default"
                                ) -> list[dict]:
        """Return manual (non-scan) export groups for *filename*.
        Each group dict has:
          start_time, paths (list[str] sorted), clip_count, clip_duration,
          spread, short_side, portrait_ratio, crop_center, format, label,
          category
        """
        if not self._enabled:
            return []
        rows = self._con.execute(
            "SELECT start_time, output_path, clip_count, clip_duration, spread,"
            " short_side, portrait_ratio, crop_center, format, label, category"
            " FROM processed"
            " WHERE filename = ? AND profile = ? AND scan_export = 0"
            " ORDER BY start_time, output_path",
            (filename, profile),
        ).fetchall()
        groups: dict[float, dict] = {}
        for r in rows:
            t = r[0]
            if t not in groups:
                groups[t] = {
                    "start_time": t,
                    "paths": [],
                    "clip_count": r[2], "clip_duration": r[3],
                    "spread": r[4],
                    "short_side": r[5], "portrait_ratio": r[6],
                    "crop_center": r[7], "format": r[8],
                    "label": r[9], "category": r[10],
                }
            groups[t]["paths"].append(r[1])
        return list(groups.values())
    def get_clip_count(self, filename: str, profile: str = "default") -> int:
        """Return total number of exported clips (including scan exports)."""
@@ -370,15 +533,235 @@ class ProcessedDB:
        ).fetchone()
        return row[0] if row else 0
    def get_clip_counts_by_folder(self, filename: str,
                                  profile: str = "default") -> dict[str, int]:
        """Return per-export-folder clip counts for a single video.
        Folder name is the grandparent dir of each clip's output_path
        (e.g. ``mp4_doggy_clap``).
        """
        if not self._enabled:
            return {}
        rows = self._con.execute(
            "SELECT output_path FROM processed WHERE filename = ? AND profile = ?",
            (filename, profile),
        ).fetchall()
        counts: dict[str, int] = {}
        for (op,) in rows:
            folder = os.path.basename(os.path.dirname(os.path.dirname(op)))
            counts[folder] = counts.get(folder, 0) + 1
        return counts
    def get_all_folder_counts(self, profile: str = "default") -> dict[str, int]:
        """Return clip counts per export folder across all videos in *profile*.
        Includes ``_disabled`` folders so callers can offer enable/disable.
        """
        if not self._enabled:
            return {}
        rows = self._con.execute(
            "SELECT output_path FROM processed WHERE profile = ?",
            (profile,),
        ).fetchall()
        counts: dict[str, int] = {}
        for (op,) in rows:
            folder = os.path.basename(os.path.dirname(os.path.dirname(op)))
            counts[folder] = counts.get(folder, 0) + 1
        return counts
    def relocate_video_clips(self, filename: "str | None", profile: str,
                             src_folder_name: str,
                             dst_folder_name: str) -> int:
        """Move clips from one export folder to a sibling folder.
        Matches rows whose grandparent dir basename == *src_folder_name*
        (restricted to *filename* when given, else every video in *profile*),
        then moves each clip (and any ``.wav`` sidecar) on disk into a sibling
        folder named *dst_folder_name*, migrates its dataset.json annotation,
        and rewrites output_path in the DB.  Returns the number of clips moved.
        """
        if not self._enabled:
            return 0
        import shutil
        from .annotations import remove_clip_annotation, upsert_clip_annotation
        if filename is None:
            rows = self._con.execute(
                "SELECT id, output_path, label FROM processed WHERE profile = ?",
                (profile,),
            ).fetchall()
        else:
            rows = self._con.execute(
                "SELECT id, output_path, label FROM processed"
                " WHERE filename = ? AND profile = ?",
                (filename, profile),
            ).fetchall()
        moves: list[tuple[str, str]] = []        # (old_path, new_path)
        updates: list[tuple[str, int]] = []       # (new_path, id)
        ann: list[tuple[str, str, str, str, str]] = []  # old_fold,new_fold,old,new,label
        new_dirs: set[str] = set()
        old_vid_dirs: set[str] = set()
        for rid, op, label in rows:
            vid_dir = os.path.dirname(op)
            export_folder = os.path.dirname(vid_dir)
            if os.path.basename(export_folder) != src_folder_name:
                continue
            new_export_folder = os.path.join(
                os.path.dirname(export_folder), dst_folder_name)
            new_vid_dir = os.path.join(new_export_folder, os.path.basename(vid_dir))
            new_op = os.path.join(new_vid_dir, os.path.basename(op))
            updates.append((new_op, rid))
            new_dirs.add(new_vid_dir)
            old_vid_dirs.add(vid_dir)
            if os.path.exists(op):
                moves.append((op, new_op))
            ann.append((export_folder, new_export_folder, op, new_op, label or ""))
        if not updates:
            return 0
        with self._lock:
            for d in sorted(new_dirs):
                os.makedirs(d, exist_ok=True)
            for old, new in moves:
                if os.path.exists(old) and not os.path.exists(new):
                    shutil.move(old, new)
                wav_old, wav_new = old + ".wav", new + ".wav"
                if os.path.exists(wav_old) and not os.path.exists(wav_new):
                    shutil.move(wav_old, wav_new)
            self._con.executemany(
                "UPDATE processed SET output_path = ? WHERE id = ?", updates)
            self._con.commit()
        # Migrate dataset.json entries (best-effort, outside the DB lock).
        for old_fold, new_fold, old_op, new_op, label in ann:
            remove_clip_annotation(old_fold, old_op)
            if label:
                upsert_clip_annotation(new_fold, new_op, label)
        # Remove now-empty old vid dirs and their export folder if empty.
        for d in sorted(old_vid_dirs):
            try:
                if os.path.isdir(d) and not os.listdir(d):
                    os.rmdir(d)
                parent = os.path.dirname(d)
                if os.path.isdir(parent) and not os.listdir(parent):
                    os.rmdir(parent)
            except OSError:
                pass
        _log(f"Relocated {len(updates)} clip(s) of {filename or 'all videos'}: "
             f"{src_folder_name} -> {dst_folder_name}")
        return len(updates)
    def get_profiles(self) -> list[str]:
-        """Return distinct profile names, ordered alphabetically."""
+        """Return distinct profile names across all tables, ordered alphabetically."""
        if not self._enabled:
            return []
        rows = self._con.execute(
-            "SELECT DISTINCT profile FROM processed ORDER BY profile"
+            "SELECT DISTINCT profile FROM processed"
            " UNION SELECT DISTINCT profile FROM scan_results"
            " UNION SELECT DISTINCT profile FROM hard_negatives"
            " ORDER BY profile"
        ).fetchall()
        return [r[0] for r in rows]
    def duplicate_profile(self, src: str, dst: str) -> int:
        """Copy all profile data from *src* to *dst*.
        Copies processed (exports), scan_results, hard_negatives, and
        hidden_files.  Returns total number of rows copied.
        """
        if not self._enabled or src == dst:
            return 0
        total = 0
        with self._lock:
            # processed (exports)
            rows = self._con.execute(
                "SELECT filename, start_time, output_path, label, category,"
                " short_side, portrait_ratio, crop_center, format,"
                " clip_count, clip_duration, spread, source_path, scan_export,"
                " processed_at"
                " FROM processed WHERE profile = ?", (src,),
            ).fetchall()
            for r in rows:
                self._con.execute(
                    "INSERT INTO processed"
                    " (filename, start_time, output_path, label, category,"
                    "  short_side, portrait_ratio, crop_center, format,"
                    "  clip_count, clip_duration, spread, profile,"
                    "  source_path, scan_export, processed_at)"
                    " VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
                    (*r[:12], dst, *r[12:]),
                )
            total += len(rows)
            # scan_results
            rows = self._con.execute(
                "SELECT filename, model, start_time, end_time, score,"
                " disabled, orig_start_time, orig_end_time, scan_timestamp"
                " FROM scan_results WHERE profile = ?", (src,),
            ).fetchall()
            for r in rows:
                self._con.execute(
                    "INSERT INTO scan_results"
                    " (filename, profile, model, start_time, end_time, score,"
                    "  disabled, orig_start_time, orig_end_time, scan_timestamp)"
                    " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
                    (r[0], dst, r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]),
                )
            total += len(rows)
            # hard_negatives
            rows = self._con.execute(
                "SELECT filename, start_time, source_path, source_model"
                " FROM hard_negatives WHERE profile = ?", (src,),
            ).fetchall()
            for r in rows:
                self._con.execute(
                    "INSERT INTO hard_negatives"
                    " (filename, profile, start_time, source_path, source_model)"
                    " VALUES (?, ?, ?, ?, ?)",
                    (r[0], dst, r[1], r[2], r[3]),
                )
            total += len(rows)
            # hidden_files
            rows = self._con.execute(
                "SELECT filename FROM hidden_files WHERE profile = ?", (src,),
            ).fetchall()
            for r in rows:
                self._con.execute(
                    "INSERT OR IGNORE INTO hidden_files (filename, profile)"
                    " VALUES (?, ?)",
                    (r[0], dst),
                )
            total += len(rows)
            self._con.commit()
        return total
    def count_profile_rows(self, profile: str) -> int:
        """Return total number of rows across all tables for *profile*."""
        if not self._enabled:
            return 0
        n = 0
        for table in ("processed", "scan_results", "hard_negatives", "hidden_files"):
            row = self._con.execute(
                f"SELECT COUNT(*) FROM {table} WHERE profile = ?", (profile,),
            ).fetchone()
            n += row[0] if row else 0
        return n
    def delete_profile(self, profile: str) -> None:
        """Delete all rows for *profile* from every table."""
        if not self._enabled:
            return
        with self._lock:
            for table in ("processed", "scan_results", "hard_negatives", "hidden_files"):
                self._con.execute(
                    f"DELETE FROM {table} WHERE profile = ?", (profile,),
                )
            self._con.commit()
    def get_all_export_paths(self, profile: str = "default") -> list[str]:
        """Return all unique output_path values for a given profile."""
        if not self._enabled:
@@ -418,6 +801,32 @@ class ProcessedDB:
                pass
        return max_n
    def get_scan_export_rep_paths_in_range(self, filename: str, profile: str,
                                           start: float, end: float) -> list[str]:
        """Return one representative output_path per distinct scan-export
        start_time inside [start, end] for (filename, profile)."""
        if not self._enabled:
            return []
        rows = self._con.execute(
            "SELECT output_path FROM processed"
            " WHERE filename = ? AND profile = ? AND scan_export = 1"
            " AND start_time BETWEEN ? AND ?"
            " GROUP BY start_time",
            (filename, profile, start, end),
        ).fetchall()
        return [r[0] for r in rows]
    def get_scan_export_times(self, filename: str, profile: str) -> list[float]:
        """Return start_times of scan_export=1 rows for this file/profile."""
        if not self._enabled:
            return []
        rows = self._con.execute(
            "SELECT start_time FROM processed"
            " WHERE filename = ? AND profile = ? AND scan_export = 1",
            (filename, profile),
        ).fetchall()
        return [r[0] for r in rows]
    def delete_scan_exports(self, filename: str, profile: str) -> int:
        """Delete all scan_export entries for *filename* in *profile*.
@@ -504,13 +913,15 @@ class ProcessedDB:
        folder_names: set[str] = set()
        for (op,) in rows:
            grandparent = os.path.basename(os.path.dirname(os.path.dirname(op)))
-            if grandparent:
+            if grandparent and not grandparent.endswith("_disabled"):
                folder_names.add(grandparent)
        return sorted(folder_names)
-    def get_training_data(self, profile: str, positive_folder: str,
+    def get_training_data(self, profile: str,
                          positive_folder: "str | list[str]",
                          negative_folder: str = "",
                          fallback_video_dir: str = "",
                          playlist_paths: list[str] | None = None,
                          include_scan_exports: bool = False,
                          use_hard_negatives: bool = True,
                          ) -> list[tuple[str, list[float], list[float], list[float]]]:
@@ -518,18 +929,20 @@ class ProcessedDB:
        Args:
            profile: profile name
-            positive_folder: export folder name for positive class (e.g. "mp4_Intense")
+            positive_folder: export folder name(s) for positive class
            negative_folder: export folder name for explicit negatives (optional)
            fallback_video_dir: if source_path is empty, try filename in this dir
            playlist_paths: loaded playlist paths to resolve filenames
            include_scan_exports: if True, include auto-exported scan clips
            use_hard_negatives: if False, skip hard negatives from scan feedback
        Returns:
            list of (source_video_path, positive_times, soft_times, negative_times)
-            per video.  Soft times = clips from any other non-negative folder.
+            per video.  Soft times = clips from any other non-positive/non-negative folder.
        """
        if not self._enabled:
            return []
        pos_folders = {positive_folder} if isinstance(positive_folder, str) else set(positive_folder)
        if include_scan_exports:
            rows = self._con.execute(
                "SELECT filename, start_time, output_path, source_path"
@@ -553,7 +966,9 @@ class ProcessedDB:
            if sp:
                source_by_filename[fn] = sp
            grandparent = os.path.basename(os.path.dirname(os.path.dirname(op)))
-            if grandparent == positive_folder:
+            if grandparent.endswith("_disabled"):
                continue  # disabled clips are excluded from training entirely
            if grandparent in pos_folders:
                pos_by_video.setdefault(fn, set()).add(st)
            elif negative_folder and grandparent == negative_folder:
                neg_by_video.setdefault(fn, set()).add(st)
@@ -590,11 +1005,19 @@ class ProcessedDB:
                    result.append(t)
            return result
        # Build filename→path lookup from playlist
        playlist_lookup: dict[str, str] = {}
        if playlist_paths:
            for p in playlist_paths:
                playlist_lookup[os.path.basename(p)] = p
        # Include videos that have positives OR explicit negatives
        all_videos = set(pos_by_video) | set(neg_by_video)
        result = []
        for fn in all_videos:
            sp = source_by_filename.get(fn, "")
            if not sp or not os.path.exists(sp):
                sp = playlist_lookup.get(fn, "")
            if not sp or not os.path.exists(sp):
                if fallback_video_dir:
                    sp = os.path.join(fallback_video_dir, fn)
@@ -777,6 +1200,41 @@ class ProcessedDB:
            )
            self._con.commit()
    def insert_scan_result(self, filename: str, profile: str, model: str,
                           start: float, end: float, score: float,
                           disabled: bool, orig_start: float, orig_end: float,
                           scan_timestamp: str = "") -> int:
        """Insert a single scan result row; returns its new id."""
        if not self._enabled:
            return -1
        with self._lock:
            cur = self._con.execute(
                "INSERT INTO scan_results"
                " (filename, profile, model, start_time, end_time, score,"
                "  disabled, orig_start_time, orig_end_time, scan_timestamp)"
                " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
                (filename, profile, model, start, end, score,
                 1 if disabled else 0, orig_start, orig_end, scan_timestamp),
            )
            self._con.commit()
            return int(cur.lastrowid or -1)
    def update_scan_result_full(self, row_id: int, start: float, end: float,
                                score: float, orig_start: float,
                                orig_end: float) -> None:
        """Update bounds, score and orig_* fields — used after merging rows."""
        if not self._enabled:
            return
        with self._lock:
            self._con.execute(
                "UPDATE scan_results"
                " SET start_time = ?, end_time = ?, score = ?,"
                "     orig_start_time = ?, orig_end_time = ?"
                " WHERE id = ?",
                (start, end, score, orig_start, orig_end, row_id),
            )
            self._con.commit()
    def get_scan_models(self, filename: str, profile: str) -> list[str]:
        """Return model names that have scan results for this file."""
        if not self._enabled:
@@ -78,6 +78,7 @@ def build_ffmpeg_command(
    crop_center: float = 0.5,
    image_sequence: bool = False,
    encoder: str = "libx264",
    duration: float = 8.0,
 ) -> list[str]:
    # -ss before -i: fast input-seeking. Safe here because we always re-encode,
    # so there is no keyframe-alignment issue from pre-input seek.
@@ -96,7 +97,7 @@ def build_ffmpeg_command(
        "-threads", "0",
        "-ss", str(start),
        "-i", input_path,
-        "-t", "8",
+        "-t", str(duration),
    ]
    filters: list[str] = []
@@ -141,14 +142,15 @@ def build_ffmpeg_command(
    return cmd
-def build_audio_extract_command(input_path: str, start: float, sequence_dir: str) -> list[str]:
+def build_audio_extract_command(input_path: str, start: float, sequence_dir: str,
                                duration: float = 8.0) -> list[str]:
    """Return an ffmpeg command that extracts audio to <sequence_dir>.wav."""
    audio_path = sequence_dir + ".wav"
    return [
        _bin("ffmpeg"), "-y",
        "-ss", str(start),
        "-i", input_path,
-        "-t", "8",
+        "-t", str(duration),
        "-vn",
        "-c:a", "pcm_s16le",
        audio_path,
Author	SHA1	Message	Date
Ethanfel	1d49ce7cee	perf: run waveform ffmpeg at low priority so it yields to mpv on load The first load of a file decodes the whole audio track in a background thread; nice'ing it (os.nice(15)) reduces disk/CPU contention with mpv during the initial open. Result is cached, so subsequent loads are fast. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-06-06 12:14:04 +02:00
Ethanfel	109bc658c3	feat: flag playlist files missing from disk (⚠ orange strikethrough) Missing files are kept in the list instead of being silently dropped on load, and styled distinctly with a tooltip. add_files gains allow_missing; tab restore keeps missing entries so they're visible. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-06-06 12:12:03 +02:00
Ethanfel	ec7138f51b	feat: single Disable all / Enable all for every subcategory at once Replace the per-folder submenu/buttons with one batch action: "Disable all" moves every enabled subcategory (excluding the main folder and already-disabled ones) to _disabled in one click; "Enable all" restores them. Available in both the playlist right-click menu and the Sub button menu. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-06-05 14:19:49 +02:00
Ethanfel	68c633ab46	feat: add "Disable all in" / "Enable all in" to playlist right-click menu Folder-wide disable/enable is now reachable right next to the per-video "Disable in" submenu, listing every subcategory in the profile (not just ones tied to the loaded video). Backed by profile-wide subcategory counts pushed to the playlist in _refresh_playlist_checks. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-06-05 14:14:42 +02:00
Ethanfel	d0a94e7b68	fix: Sub menu lists all profile subcategories so Disable/Enable all is reachable Previously the Sub menu only showed folders from the current video's markers plus configured subprofiles, so subcategories without clips on the loaded video (or without a matching subprofile) never appeared. Now it also includes every subcategory that has clips anywhere in the profile (active or disabled). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-06-05 14:11:11 +02:00
Ethanfel	632c2dc076	feat: disable/enable all clips in a subcategory folder at once - Sub menu now has per-folder "Disable all" / "Enable all" buttons with live counts - relocate_video_clips accepts filename=None to move every video's clips in a folder - get_all_folder_counts returns profile-wide per-folder counts (incl _disabled) - Disable-all confirms before moving; both refresh markers + playlist counts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-06-05 14:08:20 +02:00
Ethanfel	0f335c5e66	feat: tabbed file lists with editable labels - Wrap the playlist in a QTabWidget; each tab is its own file list - "+" corner button adds tabs; double-click a tab to rename inline; tabs are closable (last tab protected) and movable - self._playlist now resolves to the active tab's PlaylistWidget - Persist tabs (label + files + separators) per profile as JSON; falls back to legacy session_files/separators on first load - Filter box and playlist filters apply to the active tab; tab switches reapply filters and refresh marks - Profile switch/duplicate/delete now save/load/copy/remove per-profile tab state Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-06-05 13:30:18 +02:00
Ethanfel	f1f8fd5244	feat: playlist separator can be added above or below a file - Context menu offers both "Add/Remove separator above" and "below" - "Below" anchors to the next visible file, or a trailing line via end sentinel when clicking the last file - End sentinel preserved across rebuilds and persisted per profile Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-06-05 12:47:43 +02:00
Ethanfel	299779cf29	feat: disable videos per-subcategory, named models, multi-category training, playlist separators - Train dialog: multi-select positive subcategories via checkbox list, optional model name suffix ({profile}_{model}_{name}.joblib) - list_trained_models recognizes named model variants - Disable a video per-subcategory: moves its clips to a sibling {subcat}_disabled folder, rewrites DB output_path, migrates dataset.json, marks the name red - Disabled clips excluded from training, stats, timeline, and playlist counts - Playlist per-video count reflects only visible, non-disabled subcategories - Persist subcategory show/hide visibility per profile across restarts - Add/remove playlist separator rows (right-click) to mark batches, persisted per profile Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-06-05 12:45:03 +02:00
Ethanfel	56218c18f4	feat: speech detection, format export buttons, subcategory controls, crop overlay during playback - Add speech detection via faster-whisper with red waveform coloring for speech regions - Add format variant export buttons (P/S) next to Export and subprofile buttons when portrait/square enabled - Add force_ratio parameter to _on_export for deterministic format exports - Add subcategory show/hide with persistent checkbox menu (no longer closes on toggle) - Show crop overlay lines during video playback, not just when paused - Delete marker now also removes files from disk and cleans up annotations - Clear all markers also deletes files and DB entries - Add playlist text filter, clip spread tick lines on timeline - Fix LD_PRELOAD for GLIBCXX in conda launcher Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-14 18:23:43 +02:00
Ethanfel	2c45aff668	feat: add delete-from-disk option in playlist context menu Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-10 11:38:37 +02:00
Ethanfel	07e2f733b9	feat: bulk update source paths in train dialog Add ProcessedDB.update_source_paths() to re-resolve missing or stale source_path entries by matching filenames against a directory listing and the current playlist. Exposed as "Update paths" button in the train dialog next to the video dir field. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-09 13:47:48 +02:00
Ethanfel	8c5a4c4524	fix: marker labels show actual m-number from filename instead of time order Extract the manual export counter (m1, m2, ...) from the output path so timeline markers match their filenames. Falls back to sequential numbering for old-format paths without m-prefix. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-04 11:42:15 +02:00
Ethanfel	4e5b631efb	fix: right-click delete works on other-folder markers too The context menu hit test only searched the current folder's markers. Now also checks other-folder markers so the delete option appears for subprofile markers. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-04 11:38:49 +02:00
Ethanfel	ec77b8224f	feat: show other-folder markers in distinct colors on timeline Subprofile/subfolder exports now appear as colored markers (yellow, green, blue, purple, orange) with their own numbering, separate from the main folder's red markers. Each folder gets its own color and independent sequence numbers. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-04 11:36:38 +02:00
Ethanfel	9becd5a06d	fix: filter timeline markers by current export folder Subprofile exports (folder_suffix) created markers that interleaved with main folder markers, shifting their numbering. Now get_markers and _get_markers_for accept an export_folder parameter and use SQL LIKE to only return markers whose output_path is in that folder. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-04 11:32:39 +02:00
Ethanfel	fae5560e2d	feat: overview scrollbar on timeline when zoomed in Thin 8px scrollbar appears above the ruler when the timeline is zoomed. Shows a draggable thumb representing the current view window. Click outside the thumb to jump, drag the thumb to pan. Ruler and track shift down to make room. Scrollbar hidden when not zoomed. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-03 11:47:37 +02:00
Ethanfel	07e3a1223c	fix: unpack 4-tuple markers in export overlap check The marker format was extended to include clip_span but the overlap check in _on_export still unpacked 3 values, causing a crash on export. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-03 11:43:15 +02:00
Ethanfel	3af6e05fb7	fix: use exact seeking instead of keyframe-based seeking mpv's "absolute" seek lands on the nearest keyframe before the target, causing playback to start ~3s before the marker. Switch to "absolute+exact" for both seek() and play_loop() so playback starts at the precise requested time. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-03 11:39:57 +02:00
Ethanfel	d787871735	fix: auto-pan timeline to follow playback position when zoomed in Revert span opacity back to 35 (was fine). The actual issue was the play position line disappearing when scrolled out of the zoomed view. Now set_play_position auto-pans the view window to keep the playback marker visible with a 10% margin. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-03 11:36:25 +02:00
Ethanfel	85c08d7c48	fix: seek to exact marker position on click, increase clip span visibility - _on_marker_clicked now explicitly sets cursor and seeks mpv to start_time instead of relying on the timeline's indirect seek chain - Doubled clip span area opacity (35 → 70) so spans are always visible - Trigger end-frame preview after config restoration on marker click Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-03 11:34:36 +02:00
Ethanfel	f6966a092a	feat: per-profile playlists, marker span display, precise marker seek - Per-profile playlist persistence (session_files/{profile} in QSettings) - Training data resolves source videos via playlist paths before fallback dir - Guard against deleted video files in _load_file - Fix marker double-click to seek to exact marker time instead of click pixel - Show manual clip spans as light amber areas on the timeline - Extend marker tuples with clip_span from DB (clip_duration + overlap) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-05-02 17:11:50 +02:00
Ethanfel	7cee3ab768	fix: default embedding model to EAT_LARGE Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-28 15:49:51 +02:00
Ethanfel	47f910644d	feat: configurable clip duration, playback speed, Windows WId embedding Add clip duration spinner (2–30s, default 8s) replacing all hardcoded 8.0 references. Store clip_duration in DB for accurate re-export span calculations. Add x2/x4 playback speed toggle buttons. On Windows, mpv renders directly into the widget's native window handle (WId embedding) instead of slow FBO readback; crop overlays use a transparent child widget. Fix _poll_render crash when player is None after closeEvent. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-28 15:18:37 +02:00
Ethanfel	e972c7a2ae	feat: re-export rework, delete profile, shared path protection Re-export dialog now offers two modes: keep section length (adjust clip count) or keep clip count (adjust section length). Files shared with other profiles are preserved during re-export. Vid folder is resolved before DB deletions to reuse existing folders. Add delete profile option with confirmation dialog. Profile duplication now copies all tables including processed exports. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-28 14:57:54 +02:00
Ethanfel	cb805c5bda	feat: add re-export button and duplicate profile option Re-export button (next to Spread spinner) re-exports all manual clips for the current file into the current folder with the new spread value. Old files are deleted from their original locations first. Duplicate profile option in the profile dropdown copies scan_results, hard_negatives, and hidden_files to a new profile name (exports are not copied since they reference file paths tied to the source profile). Also widened get_profiles() to include profiles that only have scan_results or hard_negatives, not just exports. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>	2026-04-28 08:24:13 +02:00
Ethanfel	bf14247b00	feat: auto-pan timeline to selected scan region when zoomed When a scan result row is clicked, if the active region falls outside the current zoomed view the view centers on the region (and widens if the region is larger than the current span). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-04-21 15:49:55 +02:00
Ethanfel	73396659dc	feat: add timeline zoom and pan for precise edge editing Ctrl+scroll zooms the timeline view around the mouse. Middle-mouse drag pans when zoomed. Scrolling all the way out clamps back to full view. While dragging a scan region edge with Shift, the view auto-pans when the mouse approaches the widget border so you can extend a region past the visible range. All paint and hit-test paths now route through _time_to_x / _pos_to_time helpers backed by a _view_start / _view_span window, so existing interactions (seek, marker click, edge resize, keyframe context menu) all adapt naturally to the zoom level. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-04-21 15:41:56 +02:00
Ethanfel	c8bc629419	feat: merge scan rows and strengthen Ctrl+Z undo Add "Merge N rows" context-menu option that combines selected scan rows into one (min start, max end, max score), with full undo support. Ctrl+Z is now an application-wide shortcut so it works regardless of which widget has focus. Negatives undo now respects the exported-green row color instead of reverting to default. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-04-21 15:20:06 +02:00
Ethanfel	de8840e1eb	feat: adapt export button for selection; show markers in review mode - Scan panel button now reads "Export Selected (N)" while rows are selected, mirroring the clip-count estimate used for full exports. Selection changes fire an explicit signal so the label refreshes. - Export markers remain visible on the timeline in scan/review mode. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-04-21 13:12:00 +02:00
Ethanfel	def966a913	feat: delete-export right-click and partial scan export on selection - Right-click on exported (green) rows shows "Delete export" to wipe associated clip files, annotations, DB rows and empty vid folders; scan panel, markers and playlist badge refresh afterwards. - Exporting with rows selected in the scan panel now runs a partial export: prior scan exports are preserved, and the area index for new clip filenames is offset past existing a-suffixes in the vid folder to avoid collisions. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-04-21 13:04:01 +02:00
Ethanfel	bc4ae21153	feat: color exported scan result rows green Scan panel rows whose range contains an exported clip's start time are colored green. Priority: disabled > negative > exported > default. Exported state refreshes automatically after an auto-export batch completes on the current file. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-04-21 12:50:12 +02:00
Ethanfel	a731fbfc32	feat: highlight active scan region on timeline when row clicked Draws a yellow outline around the scan region corresponding to the selected/clicked row, so overlapping regions can be distinguished. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>	2026-04-21 11:34:23 +02:00