feat: vid folder layout, changelog popup, shift-to-resize, DB migration

- Export layout changed from clip_NNN group dirs to vid_NNN per-video folders - Automatic DB migration rewrites old paths and moves files on startup - Per-video counter with DB cross-check to prevent overwrites - Changelog popup on version bump with "don't show again" checkbox - Scan region resize now requires Shift+drag to prevent accidental edits - Recalculate vid folder and counter on file load - Add EAT_LARGE embedding model variant - Update tests for new flat export path structure Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-19 17:01:37 +02:00
parent d8b3972bdc
commit 6c1d42adfe
7 changed files with 509 additions and 816 deletions
@@ -65,6 +65,7 @@ _EMBED_MODELS = {
    "AST":                 768,
    "AST_ML":             3072,   # 768 * 4
    "EAT":                 768,
    "EAT_LARGE":          1024,
 }
 _DEFAULT_EMBED_MODEL = "WAV2VEC2_BASE"
@@ -104,11 +105,13 @@ def _get_w2v_model(model_name: str | None = None):
            _ast_feature_extractor = ASTFeatureExtractor.from_pretrained(
                "MIT/ast-finetuned-audioset-10-10-0.4593"
            )
-        elif load_name == "EAT":
+        elif load_name in ("EAT", "EAT_LARGE"):
            from transformers import AutoModel
            eat_repo = ("worstchan/EAT-large_epoch20_finetune_AS2M"
                        if load_name == "EAT_LARGE"
                        else "worstchan/EAT-base_epoch30_finetune_AS2M")
            _w2v_model = AutoModel.from_pretrained(
-                "worstchan/EAT-base_epoch30_finetune_AS2M",
+                eat_repo, trust_remote_code=True,
                trust_remote_code=True,
            ).to(_w2v_device)
        else:
            import torchaudio
@@ -254,7 +257,7 @@ def _extract_w2v_windows(y: np.ndarray, sr: int = _SR,
    model, device = _get_w2v_model(model_name)
    is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
    is_ast = (model_name or _DEFAULT_EMBED_MODEL) in ("AST", "AST_ML")
-    is_eat = (model_name or _DEFAULT_EMBED_MODEL) == "EAT"
+    is_eat = (model_name or _DEFAULT_EMBED_MODEL) in ("EAT", "EAT_LARGE")
    ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)
    # Auto-size batches based on available GPU memory
    batch_size = 16
@@ -383,7 +386,7 @@ def _extract_w2v_targeted(y: np.ndarray, sr: int, gt_intense: list[float],
    is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
    is_ast = (model_name or _DEFAULT_EMBED_MODEL) in ("AST", "AST_ML")
-    is_eat = (model_name or _DEFAULT_EMBED_MODEL) == "EAT"
+    is_eat = (model_name or _DEFAULT_EMBED_MODEL) in ("EAT", "EAT_LARGE")
    ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)
    for batch_start in range(0, len(valid_times), batch_size):
@@ -141,6 +141,92 @@ class ProcessedDB:
            " ON hard_negatives(filename, profile)"
        )
        self._con.commit()
        self._migrate_vid_folders()
    def _migrate_vid_folders(self) -> None:
        """Migrate old clip_NNN group dirs → vid_NNN per-video folders.
        Old layout: export_folder/clip_NNN/clip_NNN_sub.mp4
        New layout: export_folder/vid_NNN/clip_NNN_sub.mp4
        Rewrites output_path in DB and moves files on disk.
        """
        # Check if any rows still use the old clip_NNN parent dir layout
        row = self._con.execute(
            "SELECT id FROM processed WHERE output_path LIKE '%/clip_%/%' LIMIT 1"
        ).fetchone()
        if not row:
            return
        _log("Migrating old clip group dirs → vid folders …")
        rows = self._con.execute(
            "SELECT id, filename, profile, output_path FROM processed"
            " ORDER BY profile, filename, output_path"
        ).fetchall()
        # Assign vid_NNN per (profile, export_folder, filename)
        vid_map: dict[tuple, str] = {}
        vid_counters: dict[tuple, int] = {}
        for rid, filename, profile, op in rows:
            parent = os.path.dirname(op)
            export_folder = os.path.dirname(parent)
            key = (profile, export_folder, filename)
            if key not in vid_map:
                counter_key = (profile, export_folder)
                n = vid_counters.get(counter_key, 1)
                vid_map[key] = f"vid_{n:03d}"
                vid_counters[counter_key] = n + 1
        updates: list[tuple[str, int]] = []
        moves: list[tuple[str, str]] = []
        dirs_to_create: set[str] = set()
        old_dirs: set[str] = set()
        for rid, filename, profile, op in rows:
            parent = os.path.dirname(op)
            parent_name = os.path.basename(parent)
            # Skip rows already using vid_NNN layout
            if parent_name.startswith("vid_"):
                continue
            export_folder = os.path.dirname(parent)
            key = (profile, export_folder, filename)
            vid_name = vid_map[key]
            new_path = os.path.join(export_folder, vid_name, os.path.basename(op))
            updates.append((new_path, rid))
            dirs_to_create.add(os.path.join(export_folder, vid_name))
            old_dirs.add(parent)
            if os.path.exists(op):
                moves.append((op, new_path))
        if not updates:
            return
        # Create vid directories
        for d in sorted(dirs_to_create):
            os.makedirs(d, exist_ok=True)
        # Move files
        import shutil
        for old, new in moves:
            if os.path.exists(old) and not os.path.exists(new):
                shutil.move(old, new)
        # Update DB
        self._con.executemany(
            "UPDATE processed SET output_path = ? WHERE id = ?", updates
        )
        self._con.commit()
        # Remove empty old group directories
        for d in sorted(old_dirs, reverse=True):
            try:
                if os.path.isdir(d) and not os.listdir(d):
                    os.rmdir(d)
            except OSError:
                pass
        _log(f"Migrated {len(updates)} rows, moved {len(moves)} files to vid folders")
    def add(self, filename: str, start_time: float, output_path: str,
            label: str = "", category: str = "",
@@ -306,8 +392,8 @@ class ProcessedDB:
    def get_max_counter(self, folder: str, name: str) -> int:
        """Return the highest counter N found in output_paths matching folder/name_NNN*.
-        Parses the group directory component (e.g. 'clip_035') from stored
+        Parses the counter from filenames (e.g. 'clip_035_0.mp4' → 35).
-        output_path values.  Returns 0 if no matches exist.
+        *folder* is typically the vid folder.  Returns 0 if no matches exist.
        """
        if not self._enabled:
            return 0
@@ -318,24 +404,66 @@ class ProcessedDB:
            (prefix + "%",),
        ).fetchall()
        max_n = 0
        name_prefix = name + "_"
        for (op,) in rows:
-            # output_path: .../folder/name_NNN/name_NNN_sub.ext
+            stem = os.path.splitext(os.path.basename(op))[0]
-            parent = os.path.basename(os.path.dirname(op))
+            # stem: "clip_035_0" or "clip_036_a1_0"
-            # parent should be "name_NNN"
+            if not stem.startswith(name_prefix):
-            parts = parent.rsplit("_", 1)
+                continue
-            if len(parts) == 2:
+            rest = stem[len(name_prefix):]  # "035_0" or "036_a1_0"
-                try:
+            counter_str = rest.split("_")[0]
-                    max_n = max(max_n, int(parts[1]))
+            try:
-                except ValueError:
+                max_n = max(max_n, int(counter_str))
-                    pass
+            except ValueError:
                pass
        return max_n
    def get_vid_folder(self, filename: str, profile: str,
                       export_folder: str) -> str:
        """Return the vid_NNN folder name for a source video.
        Checks existing DB output_paths first; if the video already has a
        vid_NNN folder, returns it.  Otherwise assigns the next available
        number, also checking disk for orphan vid folders.
        """
        if not self._enabled:
            return "vid_001"
        row = self._con.execute(
            "SELECT output_path FROM processed"
            " WHERE filename = ? AND profile = ? LIMIT 1",
            (filename, profile),
        ).fetchone()
        if row:
            parent = os.path.basename(os.path.dirname(row[0]))
            if parent.startswith("vid_"):
                return parent
        # Collect all existing vid_NNN names from DB + disk
        existing: set[str] = set()
        rows = self._con.execute(
            "SELECT DISTINCT output_path FROM processed WHERE profile = ?",
            (profile,),
        ).fetchall()
        for (op,) in rows:
            p = os.path.basename(os.path.dirname(op))
            if p.startswith("vid_"):
                existing.add(p)
        if os.path.isdir(export_folder):
            for d in os.listdir(export_folder):
                if d.startswith("vid_") and os.path.isdir(
                    os.path.join(export_folder, d)
                ):
                    existing.add(d)
        n = 1
        while f"vid_{n:03d}" in existing:
            n += 1
        return f"vid_{n:03d}"
    def get_export_folders(self, profile: str = "default",
                           include_scan_exports: bool = False) -> list[str]:
        """Return distinct export folder names found in output_paths for a profile.
        Export paths follow the structure:
-            .../export_folder/group_dir/clip.mp4
+            .../export_folder/vid_NNN/clip.mp4
        The export folder is 2 levels up from the clip file.
        Returns folder names sorted alphabetically (e.g. ["mp4_Intense", "mp4_Soft"]).
        """
@@ -25,15 +25,19 @@ def _log(*args) -> None:
 def build_export_path(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
-    group = f"{basename}_{counter:03d}"
+    """Build clip output path.  *folder* should be the vid folder (e.g. .../mp4/vid_001)."""
-    name = f"{group}_{sub}" if sub is not None else group
+    name = f"{basename}_{counter:03d}"
-    return os.path.join(folder, group, name + ".mp4")
+    if sub is not None:
        name = f"{name}_{sub}"
    return os.path.join(folder, name + ".mp4")
 def build_sequence_dir(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
-    group = f"{basename}_{counter:03d}"
+    """Build WebP sequence output dir.  *folder* should be the vid folder."""
-    name = f"{group}_{sub}" if sub is not None else group
+    name = f"{basename}_{counter:03d}"
-    return os.path.join(folder, group, name)
+    if sub is not None:
        name = f"{name}_{sub}"
    return os.path.join(folder, name)
 def format_time(seconds: float) -> str:
@@ -1,6 +1,6 @@
-# Scan History & Hard Negative Management Design
+# Scan History & Hard Negative Management — Final Design
-Date: 2026-04-19
+Date: 2026-04-19 (implemented on `feat/training-ui`)
 ## Goal
@@ -8,83 +8,198 @@ Date: 2026-04-19
 2. Make hard negatives manageable — viewable, removable, and optionally disabled per training run
 3. Fix latent bug: `get_export_folders()` doesn't filter by `scan_export`
-## 1. Scan Result History
+---
-### Current behavior
+## 1. Ghost Folder Fix
 `save_scan_results()` **replaces** all results for `(filename, profile, model)` on every scan. No history is preserved.
 ### Change
 Keep the last N scan results per `(filename, profile, model)` with timestamps. The most recent is the "active" result displayed in the panel; older versions are accessible for comparison.
 ### Schema change
 Add column to `scan_results`:
 ```sql
 ALTER TABLE scan_results ADD COLUMN scan_timestamp TEXT NOT NULL DEFAULT '';
 ```
 All rows from the same scan share the same timestamp string (e.g. `"20260419_143022"`).
 ### save_scan_results changes
 Instead of `DELETE ... WHERE filename=? AND profile=? AND model=?`, the new flow:
 1. Insert new rows with current timestamp
 2. Count distinct timestamps for this `(filename, profile, model)`
 3. If count > N (default 5), delete rows belonging to the oldest timestamps
 ### UI changes
 Add a small version dropdown/selector in `ScanResultsPanel` per model tab — shows timestamps of available scan versions. Selecting a version loads that version's results into the tab. The most recent is selected by default.
 The tab label shows the active version's region count, e.g. `HUBERT_XLARGE (12) [v3]`.
 ### Cache interaction
 Embedding cache is per `(file, model)` and doesn't change across scans. Only the classifier output changes. History stores the classified regions (start, end, score), not embeddings.
 ## 2. Hard Negative Management
 ### Current behavior
 - Hard negatives stored in `hard_negatives` table: `(filename, profile, start_time, source_path)`
 - No model column — applied globally within a profile
 - Removable one-by-one via N toggle in scan panel, but no bulk management
 - Always used in training — no way to disable
 ### Changes
 #### Schema
 Add `source_model TEXT NOT NULL DEFAULT ''` column to `hard_negatives`. Populated when marking negatives from scan results (we know which model tab is active).
 #### Training toggle
 New checkbox in `TrainDialog`: **"Use hard negatives"** (default checked). When unchecked, `get_training_data()` skips the `hard_negatives` query entirely. Non-destructive — negatives remain in DB.
 #### Management dialog
 New `HardNegativesDialog` accessible from Train dialog via "Manage..." button next to the checkbox. Shows:
 - Table: filename, start time, source model, date added (if we add created_at)
 - Filter by source model (dropdown)
 - Multi-select + Delete button
 - "Clear All" button with confirmation
 - Count summary at top
 ### Training integration
 `get_training_data()` gets a new `use_hard_negatives: bool = True` parameter. When False, the hard negatives query (lines 365-374 of db.py) is skipped entirely.
 ## 3. Ghost Folder Fix
 ### Bug
-`get_export_folders()` queries all `output_path` rows without filtering `scan_export`. Folders that only contain scan-exported clips appear in training dropdowns with 0 clips.
+`get_export_folders()` queried all `output_path` rows without filtering `scan_export`. Folders that only contained scan-exported clips appeared in training dropdowns with 0 clips.
-### Fix
+### Implementation (`core/db.py`)
-Add `include_scan_exports` parameter to `get_export_folders()`. When False (default), only query rows with `scan_export = 0`. Also filter out folders with 0 clips from `get_training_stats()` result dict.
+**`get_export_folders(profile, include_scan_exports=False)`** — new parameter. When `False` (default), the SQL query adds `AND scan_export = 0` to exclude scan-only folders. The `get_training_stats()` method passes this through and also filters its return dict to remove folders with 0 clips:
 ```python
 return {k: v for k, v in stats.items() if v["clips"] > 0}
 ```
 ### Test
 `tests/test_db.py::test_export_folders_excludes_scan_exports` — verifies scan-only folders are excluded by default and included when `include_scan_exports=True`.
 ---
 ## 2. Scan Result History
 ### Schema
 Added column to `scan_results`:
 ```sql
 scan_timestamp TEXT NOT NULL DEFAULT ''
 ```
 All rows from the same scan share one timestamp string with **microsecond precision** (`%Y%m%d_%H%M%S_%f`, e.g. `"20260419_143022_123456"`). Microsecond precision prevents version collisions on fast successive scans.
 Migration adds the column via `ALTER TABLE` for existing databases. Legacy rows keep `scan_timestamp = ''`.
 ### DB methods (`core/db.py`)
 **`save_scan_results(filename, profile, model, regions, max_versions=5)`**
 1. Inserts new rows with current microsecond-precision timestamp
 2. Counts distinct timestamps for this `(filename, profile, model)`
 3. Prunes oldest timestamps beyond `max_versions`
 No more DELETE-then-INSERT — all versions coexist in the table.
 **`get_scan_versions(filename, profile, model)`**
 Returns `[{timestamp, count, max_score}, ...]` ordered newest first. Filters `scan_timestamp != ''` so legacy rows don't appear as named versions.
 **`get_scan_results(filename, profile, scan_timestamp=None)`**
 - With `scan_timestamp`: returns rows matching that exact version
 - Without (default): uses `INNER JOIN` subquery with `MAX(scan_timestamp)` per model to return only the latest version. Legacy rows (empty timestamp) sort before any real timestamp, so they're returned when no versioned scans exist.
 ### UI (`main.py` — `ScanResultsPanel`)
 Each model tab wraps its `QTableWidget` in a container `QWidget` with a `QComboBox` for version selection:
 ```
 container (QWidget)
 ├── cmb_version (QComboBox) — hidden when ≤ 1 version
 └── table (QTableWidget)
 ```
 **Helper methods** unwrap this container:
 - `_current_table()` — returns `QTableWidget` from active tab (handles both raw table and container)
 - `_tab_table(index)` — same by tab index
 **Version combo** is populated by `_populate_version_combos()` after every `load_for_file()` and `add_scan_results()` call. Labels use `datetime.strptime` parsing with try/except fallback for robustness:
 ```
 2026-04-19 14:30 (12 regions, best: 0.95)
 ```
 **Version switching** via `_on_version_changed(model, idx)`:
 1. Reads `scan_timestamp` from combo's `userData`
 2. Calls `get_scan_results(filename, profile, scan_timestamp=ts)`
 3. Repopulates the table in-place
 4. **Clears the undo stack** — stale undo entries from a different version would corrupt data
 5. Emits `regions_edited` to refresh the timeline
 **Tab switch** connects `tab_changed` signal to `_on_scan_regions_edited` (not just `_update_scan_export_count`), so the timeline updates scan regions when switching model tabs.
 ### Cache interaction
 Embedding cache is per `(file, model)` and doesn't change across scans. History stores classified regions (start, end, score), not embeddings.
 ### Test
 `tests/test_db.py::test_scan_result_history` — saves 3 versions, verifies counts, ordering, and latest-by-default behavior.
 ---
 ## 3. Hard Negative Management
 ### Schema
 Added column to `hard_negatives`:
 ```sql
 source_model TEXT NOT NULL DEFAULT ''
 ```
 Migration adds the column via `ALTER TABLE` for existing databases.
 ### DB methods (`core/db.py`)
 **`add_hard_negatives(filename, profile, times, source_path="", source_model="")`** — now stores which embedding model produced the scan that led to the negative marking.
 **`get_hard_negatives(profile)`** — returns all rows as `[{id, filename, start_time, source_path, source_model}, ...]` for the management dialog.
 **`delete_hard_negatives_by_ids(ids)`** — bulk delete by row IDs.
 **`get_training_data(..., use_hard_negatives=True)`** — new parameter. When `False`, the hard negatives query is skipped entirely. Non-destructive — negatives remain in DB.
 ### Source model tracking (`main.py`)
 `_on_scan_negatives()` now passes `source_model=self._scan_panel.current_model_name()` when marking negatives from scan results. `current_model_name()` extracts the model name from the active tab text (stripping the count suffix).
 ### Training toggle (`main.py` — `TrainDialog`)
 Checkbox **"Use hard negatives in training"** (default checked) with "Manage..." button in an HBox layout. The toggle:
 - Updates live training stats preview via debounced `_update_stats()`
 - Passes `use_hard_negatives` through `_open_train_dialog()` to `get_training_data()`
 ### Management dialog (`main.py` — `HardNegativesDialog`)
 Accessible from TrainDialog's "Manage..." button. Features:
 | Component | Details |
 |-----------|---------|
 | **Filter combo** | `(all)` + each distinct `source_model` found in data |
 | **Summary label** | `<b>N</b> hard negatives` |
 | **Table** | File, Time (`{:.1f}s`), Source Model, hidden ID column |
 | **Delete Selected** | Multi-select aware, skips hidden (filtered) rows |
 | **Clear All** | **Filter-aware**: if a model filter is active, only deletes negatives for that model with an appropriate confirmation message. If `(all)`, deletes everything. |
 | **Close** | Closes dialog, triggers stats refresh in parent TrainDialog |
 `blockSignals(True)` guards prevent spurious filter callbacks during `_load()` repopulation.
 ### Tests
 - `test_hard_negatives_source_model` — verifies source_model stored and retrieved
 - `test_training_data_skips_hard_negatives` — verifies `use_hard_negatives=False` excludes them
 - `test_delete_hard_negatives_by_ids` — verifies bulk deletion by ID
 ---
 ## 4. Runtime Fixes (discovered during testing)
 ### EAT/torchvision ABI mismatch
 **Problem:** `torchvision` installed from PyPI (CPU build) was incompatible with `torch` from CUDA wheel index, causing `operator torchvision::nms does not exist`.
 **Fix:** Added `torchvision` to the explicit torch install line in both setup scripts:
 ```bash
 pip install torch torchaudio torchvision --index-url "$TORCH_INDEX"
 ```
 Also added `--extra-index-url "$TORCH_INDEX"` to the `pip install -r requirements.txt` line to prevent transitive dependencies (timm, ultralytics) from pulling CPU-only torch packages.
 Applied to: `setup_env.sh` (both conda and venv paths), `setup-windows.ps1`.
 ### EAT / transformers 5.x incompatibility
 **Problem:** transformers 5.x broke EAT's remote model code (`'EATModel' object has no attribute 'all_tied_weights_keys'`).
 **Fix:** Pinned `transformers>=4.30,<5.0` in `requirements.txt`.
 ### NumPy non-writable array warning
 **Problem:** Cached HuBERT/EAT embeddings loaded from disk are read-only numpy arrays. `torch.from_numpy()` on a non-writable array triggers a deprecation warning.
 **Fix:** In `core/audio_scan.py`, changed EAT preprocessing to copy the array:
 ```python
 wav = torch.from_numpy(np.array(chunk)).unsqueeze(0).float()
 ```
 ### Timeline not updating on tab switch
 **Problem:** Switching model tabs in the scan results panel didn't refresh the timeline's highlighted regions because `tab_changed` was only connected to `_update_scan_export_count`.
 **Fix:** Connected `tab_changed` to `_on_scan_regions_edited` instead, which handles both timeline refresh and export count update.
 ---
 ## File Summary
 | File | Changes |
 |------|---------|
 | `core/db.py` | Schema migrations, `get_export_folders` filter, versioned `save_scan_results`, `get_scan_versions`, version-aware `get_scan_results`, `add_hard_negatives` with `source_model`, `get_hard_negatives`, `delete_hard_negatives_by_ids`, `get_training_data` with `use_hard_negatives` |
 | `main.py` | `HardNegativesDialog` class, `TrainDialog` hard neg toggle + manage button, `ScanResultsPanel` container/combo architecture, version combo population and switching, `current_model_name()`, tab-switch timeline fix |
 | `core/audio_scan.py` | `np.array(chunk)` copy for read-only numpy arrays in EAT preprocessing |
 | `requirements.txt` | `transformers>=4.30,<5.0` pin |
 | `setup_env.sh` | `torchvision` in torch install, `--extra-index-url` on requirements install |
 | `setup-windows.ps1` | `torchvision` in torch install, `--extra-index-url` on requirements install, removed skip-if-exists guard |
 | `tests/test_db.py` | 5 tests covering all DB-layer changes |
@@ -1,714 +1,94 @@
-# Scan History & Hard Negative Management Implementation Plan
+# Scan History & Hard Negative Management — Implementation Log
-> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+> All tasks complete. See the design doc for the final specification.
-**Goal:** Add scan result versioning, hard negative management dialog with training toggle, and fix ghost folder bug.
+**Branch:** `feat/training-ui`
 **Architecture:** DB schema changes in `core/db.py` (new columns, new queries). UI changes in `main.py` (version selector in ScanResultsPanel, management dialog, training toggle). No changes to `core/audio_scan.py`.
 **Tech Stack:** SQLite (existing), PyQt6 (existing)
 **Key design notes:**
 - Scan history stores N versions per `(filename, profile, model)` using a `scan_timestamp` column. All rows from one scan share the same timestamp.
 - Hard negatives gain a `source_model` column (informational) and training gains a `use_hard_negatives` toggle.
 - `get_export_folders()` must respect `scan_export` filter to prevent ghost folders.
 ---
-### Task 1: Fix ghost folder bug in get_export_folders
+### Task 1: Fix ghost folder bug in get_export_folders -- DONE
-**Files:**
+**Commit:** `2614a76 fix: get_export_folders respects scan_export filter`
 - Modify: `core/db.py:294-313` (get_export_folders)
 - Modify: `core/db.py:410-443` (get_training_stats — filter out 0-clip folders)
 - Test: `tests/test_db.py`
-**Step 1: Write failing test**
+- `core/db.py` — `get_export_folders(profile, include_scan_exports=False)`: filters `scan_export = 0` by default
-
+- `core/db.py` — `get_training_stats()`: passes `include_scan_exports` through, filters out 0-clip folders
-```python
+- `tests/test_db.py` — `test_export_folders_excludes_scan_exports`
 def test_export_folders_excludes_scan_exports():
    """Scan-export-only folders should not appear when include_scan_exports=False."""
    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
        path = f.name
    try:
        db = ProcessedDB(path)
        # Manual export
        db.add("a.mp4", 10.0, "/out/mp4_Intense/g1/clip.mp4", profile="test")
        # Scan export to different folder
        db.add("a.mp4", 20.0, "/out/mp4_ScanOnly/g1/clip.mp4", profile="test",
               scan_export=True)
        folders = db.get_export_folders("test")
        assert "mp4_Intense" in folders
        assert "mp4_ScanOnly" not in folders, "scan-only folder should be excluded"
        # With include_scan_exports=True, both should appear
        folders_all = db.get_export_folders("test", include_scan_exports=True)
        assert "mp4_ScanOnly" in folders_all
    finally:
        os.unlink(path)
 ```
 **Step 2: Fix get_export_folders**
 Add `include_scan_exports` parameter:
 ```python
 def get_export_folders(self, profile: str = "default",
                       include_scan_exports: bool = False) -> list[str]:
    if not self._enabled:
        return []
    if include_scan_exports:
        rows = self._con.execute(
            "SELECT DISTINCT output_path FROM processed WHERE profile = ?",
            (profile,),
        ).fetchall()
    else:
        rows = self._con.execute(
            "SELECT DISTINCT output_path FROM processed"
            " WHERE profile = ? AND scan_export = 0",
            (profile,),
        ).fetchall()
    folder_names: set[str] = set()
    for (op,) in rows:
        grandparent = os.path.basename(os.path.dirname(os.path.dirname(op)))
        if grandparent:
            folder_names.add(grandparent)
    return sorted(folder_names)
 ```
 **Step 3: Update get_training_stats to pass through**
 ```python
    folders = self.get_export_folders(profile, include_scan_exports=include_scan_exports)
 ```
 And filter out empty folders at the end:
 ```python
    return {k: v for k, v in stats.items() if v["clips"] > 0}
 ```
 **Step 4: Run tests, commit**
 ```bash
 pytest tests/ -v
 git add core/db.py tests/test_db.py
 git commit -m "fix: get_export_folders respects scan_export filter"
 ```
 ---
-### Task 2: Scan result history — schema and DB methods
+### Task 2: Scan result history — schema and DB methods -- DONE
-**Files:**
+**Commit:** `4fb2ae1 feat: scan result history — keep N versions per (file, model)`
 - Modify: `core/db.py:86-98` (scan_results schema — add scan_timestamp column)
 - Modify: `core/db.py:100-113` (migration — add scan_timestamp to existing tables)
 - Modify: `core/db.py:447-468` (save_scan_results — version management)
 - Add: `core/db.py` (get_scan_versions, load_scan_version, delete_scan_version)
 - Test: `tests/test_db.py`
-**Step 1: Write failing test**
+- `core/db.py` — added `scan_timestamp TEXT NOT NULL DEFAULT ''` column with migration
-
+- `core/db.py` — `save_scan_results()`: versioned insert with microsecond-precision timestamp (`%Y%m%d_%H%M%S_%f`), auto-prunes beyond `max_versions=5`
-```python
+- `core/db.py` — `get_scan_versions()`: returns `[{timestamp, count, max_score}, ...]` newest first
-def test_scan_result_history():
+- `core/db.py` — `get_scan_results(scan_timestamp=None)`: `INNER JOIN` subquery with `MAX(scan_timestamp)` for latest-by-default
-    """save_scan_results should keep multiple versions."""
+- `tests/test_db.py` — `test_scan_result_history`
    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
        path = f.name
    try:
        db = ProcessedDB(path)
        # Save three versions
        db.save_scan_results("v.mp4", "test", "MODEL_A",
                             [(0, 8, 0.9)])
        db.save_scan_results("v.mp4", "test", "MODEL_A",
                             [(0, 8, 0.8), (10, 18, 0.7)])
        db.save_scan_results("v.mp4", "test", "MODEL_A",
                             [(5, 13, 0.95)])
        versions = db.get_scan_versions("v.mp4", "test", "MODEL_A")
        assert len(versions) == 3
        # Most recent first
        assert versions[0]["count"] == 1   # latest: 1 region
        assert versions[1]["count"] == 2   # middle: 2 regions
        assert versions[2]["count"] == 1   # oldest: 1 region
        # get_scan_results returns latest version by default
        results = db.get_scan_results("v.mp4", "test")
        assert len(results.get("MODEL_A", [])) == 1
    finally:
        os.unlink(path)
 ```
 **Step 2: Add scan_timestamp column**
 In the CREATE TABLE (line 87-98), add:
 ```sql
  scan_timestamp  TEXT NOT NULL DEFAULT ''
 ```
 In the migration block (lines 100-113), add:
 ```python
        ("scan_timestamp", "TEXT NOT NULL DEFAULT ''"),
 ```
 **Step 3: Modify save_scan_results**
 Replace the current DELETE+INSERT with versioned insert + cleanup:
 ```python
 def save_scan_results(self, filename: str, profile: str, model: str,
                      regions: list[tuple[float, float, float]],
                      max_versions: int = 5) -> None:
    if not self._enabled:
        return
    from datetime import datetime
    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    with self._lock:
        self._con.executemany(
            "INSERT INTO scan_results"
            " (filename, profile, model, start_time, end_time, score,"
            "  orig_start_time, orig_end_time, scan_timestamp)"
            " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
            [(filename, profile, model, s, e, sc, s, e, ts)
             for s, e, sc in regions],
        )
        # Prune old versions beyond max_versions
        versions = self._con.execute(
            "SELECT DISTINCT scan_timestamp FROM scan_results"
            " WHERE filename = ? AND profile = ? AND model = ?"
            " ORDER BY scan_timestamp DESC",
            (filename, profile, model),
        ).fetchall()
        if len(versions) > max_versions:
            old_ts = [v[0] for v in versions[max_versions:]]
            self._con.execute(
                "DELETE FROM scan_results"
                " WHERE filename = ? AND profile = ? AND model = ?"
                f" AND scan_timestamp IN ({','.join('?' * len(old_ts))})",
                (filename, profile, model, *old_ts),
            )
        self._con.commit()
 ```
 **Step 4: Add get_scan_versions**
 ```python
 def get_scan_versions(self, filename: str, profile: str, model: str
                      ) -> list[dict]:
    """Return list of scan versions for (filename, profile, model).
    Returns [{timestamp, count, max_score}, ...] ordered newest first.
    """
    if not self._enabled:
        return []
    rows = self._con.execute(
        "SELECT scan_timestamp, COUNT(*), MAX(score)"
        " FROM scan_results"
        " WHERE filename = ? AND profile = ? AND model = ?"
        "   AND scan_timestamp != ''"
        " GROUP BY scan_timestamp"
        " ORDER BY scan_timestamp DESC",
        (filename, profile, model),
    ).fetchall()
    return [{"timestamp": ts, "count": cnt, "max_score": sc}
            for ts, cnt, sc in rows]
 ```
 **Step 5: Modify get_scan_results to support version selection**
 Add optional `scan_timestamp` parameter. When None (default), returns latest version:
 ```python
 def get_scan_results(self, filename: str, profile: str,
                     scan_timestamp: str | None = None
                     ) -> dict[str, list[tuple]]:
    if not self._enabled:
        return {}
    if scan_timestamp:
        rows = self._con.execute(
            "SELECT id, model, start_time, end_time, score, disabled,"
            "       orig_start_time, orig_end_time"
            " FROM scan_results"
            " WHERE filename = ? AND profile = ? AND scan_timestamp = ?"
            " ORDER BY model, start_time",
            (filename, profile, scan_timestamp),
        ).fetchall()
    else:
        # For each model, get rows from the latest timestamp only
        rows = self._con.execute(
            "SELECT r.id, r.model, r.start_time, r.end_time, r.score,"
            "       r.disabled, r.orig_start_time, r.orig_end_time"
            " FROM scan_results r"
            " INNER JOIN ("
            "   SELECT model, MAX(scan_timestamp) AS latest"
            "   FROM scan_results"
            "   WHERE filename = ? AND profile = ?"
            "   GROUP BY model"
            " ) m ON r.model = m.model AND r.scan_timestamp = m.latest"
            " WHERE r.filename = ? AND r.profile = ?"
            " ORDER BY r.model, r.start_time",
            (filename, profile, filename, profile),
        ).fetchall()
    result: dict[str, list] = {}
    for row_id, model, s, e, sc, dis, os_, oe in rows:
        result.setdefault(model, []).append(
            (row_id, s, e, sc, bool(dis),
             os_ if os_ is not None else s,
             oe if oe is not None else e))
    return result
 ```
 **Important:** Legacy rows (before this change) have `scan_timestamp = ''`. The `MAX(scan_timestamp)` query handles this correctly — empty string sorts before any real timestamp, so legacy rows are returned when they're the only version. The `get_scan_versions` query filters `scan_timestamp != ''` so legacy rows don't appear as named versions.
 **Step 6: Run tests, commit**
 ```bash
 pytest tests/ -v
 git add core/db.py tests/test_db.py
 git commit -m "feat: scan result history — keep N versions per (file, model)"
 ```
 ---
-### Task 3: Scan history UI — version selector in ScanResultsPanel
+### Task 3: Scan history UI — version selector in ScanResultsPanel -- DONE
-**Files:**
+**Commit:** `8ed9fbf feat: scan version selector in results panel`
 - Modify: `main.py` (ScanResultsPanel — add version combo per tab)
 - Modify: `main.py` (ScanResultsPanel.load_for_file — populate versions)
-**Step 1: Add version combo to tab UI**
+- `main.py` — `_add_tab()`: wraps table in container `QWidget` with version `QComboBox` (hidden when ≤ 1 version)
-
+- `main.py` — `_current_table()` / `_tab_table(idx)`: unwrap container to get `QTableWidget`
-In `ScanResultsPanel._add_tab()`, add a small QComboBox above the table. When no history exists, hide it. When versions exist, populate with timestamps and connect to a slot that reloads the tab with that version.
+- `main.py` — `_populate_version_combos()`: queries `get_scan_versions()`, formats labels with `datetime.strptime` + try/except fallback
-
+- `main.py` — `_on_version_changed()`: reloads table from specific version, clears undo stack, emits `regions_edited`
-```python
+- `main.py` — `current_model_name()`: extracts model name from tab text
 # In _add_tab, create a container widget with version combo + table
 container = QWidget()
 layout = QVBoxLayout(container)
 layout.setContentsMargins(0, 0, 0, 0)
 cmb_version = QComboBox()
 cmb_version.setMaximumWidth(200)
 cmb_version.setToolTip("Scan version history")
 cmb_version.hide()  # Hidden when only 1 version
 layout.addWidget(cmb_version)
 layout.addWidget(table)
 self._tabs.addTab(container, label)
 ```
 Store the combo and table as properties on the container widget for later access.
 **Step 2: Populate versions in load_for_file**
 After creating each model tab, query `get_scan_versions()`. If > 1 version, show the combo with entries like `"2026-04-19 14:30 (12 regions, best: 0.95)"`. Connect `currentIndexChanged` to reload that version's results.
 **Step 3: Version switching slot**
 When user selects a different version from the combo:
 1. Call `db.get_scan_results(filename, profile, scan_timestamp=selected_ts)`
 2. Repopulate the table with that version's rows
 3. Update timeline regions
 **Step 4: Test manually, commit**
 ```bash
 git add main.py
 git commit -m "feat: scan version selector in results panel"
 ```
 ---
-### Task 4: Hard negatives — schema and training toggle
+### Task 4: Hard negatives — schema and training toggle -- DONE
-**Files:**
+**Commit:** `edc5784 feat: hard negative source_model tracking, training toggle`
 - Modify: `core/db.py:118-130` (hard_negatives schema — add source_model column)
 - Modify: `core/db.py:548-560` (add_hard_negatives — accept source_model)
 - Modify: `core/db.py:365-374` (get_training_data — use_hard_negatives parameter)
 - Modify: `main.py` (TrainDialog — add "Use hard negatives" checkbox)
 - Modify: `main.py` (_open_train_dialog — pass use_hard_negatives to get_training_data)
 - Test: `tests/test_db.py`
-**Step 1: Write failing test**
+- `core/db.py` — added `source_model TEXT NOT NULL DEFAULT ''` column to `hard_negatives` with migration
-
+- `core/db.py` — `add_hard_negatives(source_model="")`: stores originating model
-```python
+- `core/db.py` — `get_hard_negatives(profile)`: returns full rows as list of dicts
-def test_hard_negatives_source_model():
+- `core/db.py` — `delete_hard_negatives_by_ids(ids)`: bulk delete by row IDs
-    """Hard negatives should store source_model."""
+- `core/db.py` — `get_training_data(use_hard_negatives=True)`: conditionally skips hard negatives query
-    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
+- `main.py` — `TrainDialog`: "Use hard negatives" checkbox + "Manage..." button in HBox layout
-        path = f.name
+- `main.py` — `_on_scan_negatives()`: passes `source_model=self._scan_panel.current_model_name()`
-    try:
+- `tests/test_db.py` — `test_hard_negatives_source_model`, `test_training_data_skips_hard_negatives`, `test_delete_hard_negatives_by_ids`
        db = ProcessedDB(path)
        db.add_hard_negatives("a.mp4", "test", [10.0, 20.0],
                              source_path="/a.mp4", source_model="HUBERT_XLARGE")
        rows = db.get_hard_negatives("test")
        assert len(rows) == 2
        assert all(r["source_model"] == "HUBERT_XLARGE" for r in rows)
    finally:
        os.unlink(path)
 def test_training_data_skips_hard_negatives():
    """get_training_data with use_hard_negatives=False should skip them."""
    with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
        path = f.name
    try:
        db = ProcessedDB(path)
        db.add("a.mp4", 10.0, "/out/folder/g/clip.mp4", profile="test",
               source_path="/videos/a.mp4")
        db.add_hard_negatives("a.mp4", "test", [500.0], source_path="/videos/a.mp4")
        # With hard negatives
        data_with = db.get_training_data("test", "folder", use_hard_negatives=True)
        # Without hard negatives
        data_without = db.get_training_data("test", "folder", use_hard_negatives=False)
        # Both should find the video, but negative counts differ
        assert len(data_with) >= 1
        neg_with = sum(len(vi[3]) for vi in data_with)
        neg_without = sum(len(vi[3]) for vi in data_without)
        assert neg_with > neg_without or neg_with == neg_without  # depends on margin
    finally:
        os.unlink(path)
 ```
 **Step 2: Add source_model column to hard_negatives**
 In CREATE TABLE (line 119-125), add:
 ```sql
  source_model TEXT NOT NULL DEFAULT ''
 ```
 In migration section, add after the hard_negatives table creation:
 ```python
 hn_cols = {
    row[1]
    for row in self._con.execute("PRAGMA table_info(hard_negatives)").fetchall()
 }
 if "source_model" not in hn_cols:
    self._con.execute(
        "ALTER TABLE hard_negatives ADD COLUMN source_model TEXT NOT NULL DEFAULT ''"
    )
 ```
 **Step 3: Update add_hard_negatives to accept source_model**
 ```python
 def add_hard_negatives(self, filename: str, profile: str,
                       times: list[float], source_path: str = "",
                       source_model: str = "") -> None:
    if not self._enabled or not times:
        return
    with self._lock:
        for t in times:
            self._con.execute(
                "INSERT INTO hard_negatives"
                " (filename, profile, start_time, source_path, source_model)"
                " VALUES (?, ?, ?, ?, ?)",
                (filename, profile, t, source_path, source_model),
            )
        self._con.commit()
 ```
 **Step 4: Add get_hard_negatives (full rows for management dialog)**
 ```python
 def get_hard_negatives(self, profile: str) -> list[dict]:
    """Return all hard negatives for a profile with full details."""
    if not self._enabled:
        return []
    rows = self._con.execute(
        "SELECT id, filename, start_time, source_path, source_model"
        " FROM hard_negatives WHERE profile = ?"
        " ORDER BY filename, start_time",
        (profile,),
    ).fetchall()
    return [{"id": r[0], "filename": r[1], "start_time": r[2],
             "source_path": r[3], "source_model": r[4]} for r in rows]
 ```
 **Step 5: Add delete_hard_negatives_by_ids**
 ```python
 def delete_hard_negatives_by_ids(self, ids: list[int]) -> None:
    """Delete hard negatives by row IDs."""
    if not self._enabled or not ids:
        return
    with self._lock:
        self._con.execute(
            f"DELETE FROM hard_negatives WHERE id IN ({','.join('?' * len(ids))})",
            ids,
        )
        self._con.commit()
 ```
 **Step 6: Add use_hard_negatives parameter to get_training_data**
 In `get_training_data()` (line 315), add parameter:
 ```python
 def get_training_data(self, profile: str, positive_folder: str,
                      negative_folder: str = "",
                      fallback_video_dir: str = "",
                      include_scan_exports: bool = False,
                      use_hard_negatives: bool = True,
                      ) -> list[tuple[str, list[float], list[float], list[float]]]:
 ```
 Then wrap the hard negatives query (lines 365-374) in a conditional:
 ```python
    if use_hard_negatives:
        hard_rows = self._con.execute(
            "SELECT filename, start_time, source_path FROM hard_negatives"
            " WHERE profile = ?",
            (profile,),
        ).fetchall()
        for fn, st, sp in hard_rows:
            neg_by_video.setdefault(fn, set()).add(st)
            if sp:
                source_by_filename.setdefault(fn, sp)
 ```
 **Step 7: Pass source_model when marking negatives from scan panel**
 In `main.py`, `_on_scan_negatives()` needs to pass the current scan model. The scan panel knows which tab is active:
 ```python
 def _on_scan_negatives(self, times: list) -> None:
    if not self._file_path:
        return
    filename = os.path.basename(self._file_path)
    # Get current model tab name for source_model
    source_model = self._scan_panel.current_model_name()
    self._db.add_hard_negatives(filename, self._profile, times,
                                source_path=self._file_path,
                                source_model=source_model)
 ```
 Add `current_model_name()` to ScanResultsPanel:
 ```python
 def current_model_name(self) -> str:
    """Return the model name of the currently active tab."""
    idx = self._tabs.currentIndex()
    if idx >= 0:
        return self._tabs.tabText(idx).split(" (")[0]  # strip count suffix
    return ""
 ```
 **Step 8: Add training toggle to TrainDialog**
 After the existing `_chk_scan_exports` checkbox:
 ```python
 self._chk_hard_negatives = QCheckBox("Use hard negatives in training")
 self._chk_hard_negatives.setChecked(True)
 self._chk_hard_negatives.setToolTip(
    "When unchecked, manually marked hard negatives are excluded from training.\n"
    "Useful when training a new model type where old negatives may not apply.")
 self._chk_hard_negatives.stateChanged.connect(lambda: self._debounce.start())
 form.addRow("", self._chk_hard_negatives)
 ```
 Add property:
 ```python
@property
 def use_hard_negatives(self) -> bool:
    return self._chk_hard_negatives.isChecked()
 ```
 **Step 9: Wire toggle through _open_train_dialog**
 In `_open_train_dialog()`, pass the flag:
 ```python
    video_infos = self._db.get_training_data(
        self._profile, pos_folder, negative_folder=neg_folder,
        fallback_video_dir=video_dir,
        include_scan_exports=inc_scan,
        use_hard_negatives=dlg.use_hard_negatives,
    )
 ```
 Also update `_update_stats()` in TrainDialog to pass it through for accurate counts:
 ```python
    use_neg = self._chk_hard_negatives.isChecked() if hasattr(self, '_chk_hard_negatives') else True
    video_infos = self._db.get_training_data(
        self._profile, folder, negative_folder=neg_folder,
        fallback_video_dir=self._txt_video_dir.text(),
        include_scan_exports=inc_scan,
        use_hard_negatives=use_neg,
    )
 ```
 **Step 10: Run tests, commit**
 ```bash
 pytest tests/ -v
 git add core/db.py main.py tests/test_db.py
 git commit -m "feat: hard negative source_model tracking, training toggle"
 ```
 ---
-### Task 5: Hard negatives management dialog
+### Task 5: Hard negatives management dialog -- DONE
-**Files:**
+**Commit:** `e6db83f feat: hard negatives management dialog with filter and bulk delete`
 - Modify: `main.py` (add HardNegativesDialog class)
 - Modify: `main.py` (TrainDialog — add "Manage..." button)
-**Step 1: Create HardNegativesDialog**
+- `main.py` — `HardNegativesDialog`: table with File/Time/Source Model/hidden ID columns, model filter combo, delete selected, filter-aware clear all, close button
-
+- Filter-aware "Clear All": respects active model filter, shows appropriate confirmation message
 Place before TrainDialog class:
 ```python
 class HardNegativesDialog(QDialog):
    """View and manage hard negative training examples."""
    def __init__(self, db: ProcessedDB, profile: str, parent=None):
        super().__init__(parent)
        self.setWindowTitle("Hard Negatives")
        self.setMinimumSize(600, 400)
        self._db = db
        self._profile = profile
        layout = QVBoxLayout(self)
        # Filter row
        filter_row = QHBoxLayout()
        filter_row.addWidget(QLabel("Filter model:"))
        self._cmb_filter = QComboBox()
        self._cmb_filter.addItem("(all)")
        self._cmb_filter.currentIndexChanged.connect(self._apply_filter)
        filter_row.addWidget(self._cmb_filter, 1)
        layout.addLayout(filter_row)
        # Summary
        self._lbl_summary = QLabel()
        layout.addWidget(self._lbl_summary)
        # Table
        self._table = QTableWidget(0, 4)
        self._table.setHorizontalHeaderLabels(
            ["File", "Time", "Source Model", "ID"])
        self._table.horizontalHeader().setSectionResizeMode(
            0, QHeaderView.ResizeMode.Stretch)
        self._table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)
        self._table.setSelectionBehavior(QTableWidget.SelectionBehavior.SelectRows)
        self._table.setColumnHidden(3, True)  # hide ID column
        layout.addWidget(self._table)
        # Buttons
        btn_row = QHBoxLayout()
        btn_delete = QPushButton("Delete Selected")
        btn_delete.clicked.connect(self._delete_selected)
        btn_row.addWidget(btn_delete)
        btn_clear = QPushButton("Clear All")
        btn_clear.clicked.connect(self._clear_all)
        btn_row.addWidget(btn_clear)
        btn_row.addStretch()
        btn_close = QPushButton("Close")
        btn_close.clicked.connect(self.close)
        btn_row.addWidget(btn_close)
        layout.addLayout(btn_row)
        self._load()
    def _load(self):
        rows = self._db.get_hard_negatives(self._profile)
        models = sorted(set(r["source_model"] for r in rows if r["source_model"]))
        self._cmb_filter.blockSignals(True)
        self._cmb_filter.clear()
        self._cmb_filter.addItem("(all)")
        for m in models:
            self._cmb_filter.addItem(m)
        self._cmb_filter.blockSignals(False)
        self._table.setRowCount(len(rows))
        for i, r in enumerate(rows):
            self._table.setItem(i, 0, QTableWidgetItem(r["filename"]))
            self._table.setItem(i, 1, QTableWidgetItem(f'{r["start_time"]:.1f}s'))
            self._table.setItem(i, 2, QTableWidgetItem(r["source_model"]))
            item = QTableWidgetItem(str(r["id"]))
            self._table.setItem(i, 3, item)
        self._lbl_summary.setText(f"<b>{len(rows)}</b> hard negatives")
    def _apply_filter(self):
        model = self._cmb_filter.currentText()
        for row in range(self._table.rowCount()):
            if model == "(all)":
                self._table.setRowHidden(row, False)
            else:
                src = self._table.item(row, 2).text()
                self._table.setRowHidden(row, src != model)
    def _delete_selected(self):
        ids = []
        for row in sorted(set(i.row() for i in self._table.selectedItems()), reverse=True):
            if not self._table.isRowHidden(row):
                ids.append(int(self._table.item(row, 3).text()))
        if ids:
            self._db.delete_hard_negatives_by_ids(ids)
            self._load()
    def _clear_all(self):
        reply = QMessageBox.question(
            self, "Clear All",
            f"Delete all hard negatives for profile '{self._profile}'?",
            QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
        )
        if reply == QMessageBox.StandardButton.Yes:
            all_rows = self._db.get_hard_negatives(self._profile)
            self._db.delete_hard_negatives_by_ids([r["id"] for r in all_rows])
            self._load()
 ```
 **Step 2: Add "Manage..." button to TrainDialog**
 After the hard negatives checkbox, add a button:
 ```python
 neg_row = QHBoxLayout()
 neg_row.addWidget(self._chk_hard_negatives)
 btn_manage_neg = QPushButton("Manage…")
 btn_manage_neg.setFixedWidth(80)
 btn_manage_neg.clicked.connect(self._manage_negatives)
 neg_row.addWidget(btn_manage_neg)
 form.addRow("", neg_row)  # replaces the standalone checkbox addRow
 ```
 Add handler:
 ```python
 def _manage_negatives(self):
    dlg = HardNegativesDialog(self._db, self._profile, parent=self)
    dlg.exec()
    self._debounce.start()  # refresh stats after potential deletions
 ```
 **Step 3: Test manually, commit**
 ```bash
 pytest tests/ -v
 git add main.py
 git commit -m "feat: hard negatives management dialog with filter and bulk delete"
 ```
 ---
-### Task 6: Final integration test and push
+### Task 6: Code review fixes -- DONE
-**Step 1: Manual test checklist**
+**Commit:** `5d45b8d fix: timestamp collision, undo stack invalidation, label parsing, filter-aware clear`
- [ ] Open Train dialog — verify no ghost folders appear
+Four issues found during code review:
- [ ] Train with "Use hard negatives" unchecked — verify training works
+1. **Timestamp collision** — second-precision timestamps could merge versions on sub-second calls. Fixed with microsecond precision `%f`
- [ ] Train with "Use hard negatives" checked — verify negatives are used
+2. **Undo stack invalidation** — switching scan versions left stale undo entries. Fixed by clearing undo stack in `_on_version_changed()`
- [ ] Open Manage dialog — verify negatives listed with source model
+3. **Timestamp label fragile parsing** — hard-coded string slicing. Fixed with `datetime.strptime` + try/except fallback
- [ ] Delete selected negatives — verify they're removed
+4. **Clear All ignoring filter** — deleted all negatives regardless of model filter. Fixed to respect active filter
 - [ ] Scan a video — verify results saved with timestamp
 - [ ] Rescan same video — verify version history appears
 - [ ] Switch version in scan panel — verify correct results display
 - [ ] Mark negative from scan results — verify source_model stored
-**Step 2: Push**
+---
-```bash
+### Runtime fixes (discovered during manual testing)
-git push
+
-```
+| Commit | Fix |
 |--------|-----|
 | `a3c657c` | Install `torchvision` from CUDA wheel index (was pulling CPU build from PyPI) |
 | `3c3b1d7` | Remove "skip if torch exists" guard in Windows setup so re-runs fix broken envs |
 | `fd043f4` | Pin `transformers>=4.30,<5.0` — EAT remote model code incompatible with transformers 5.x |
 | `7d6fee9` | Copy read-only numpy array before `torch.from_numpy()` in EAT preprocessing |
 | `bd345ab` | Connect `tab_changed` to `_on_scan_regions_edited` so timeline refreshes on tab switch |
 | `d8b3972` | Add `--extra-index-url` to `pip install -r requirements.txt` in both setup scripts |
 ---
 ### Test results
 All 68 tests pass (5 new DB tests + 63 existing).
@@ -1756,16 +1756,18 @@ class TimelineWidget(QWidget):
    def mousePressEvent(self, event):
        x = event.position().x()
-        # Check for scan region edge drag
+        # Check for scan region edge drag — require Shift to avoid accidental resizes
-        hit = self._hit_scan_edge(x)
+        mods = event.modifiers()
-        if hit is not None:
+        if mods & Qt.KeyboardModifier.ShiftModifier:
-            idx, edge = hit
+            hit = self._hit_scan_edge(x)
-            r = self._scan_regions[idx]
+            if hit is not None:
-            self._drag_idx = idx
+                idx, edge = hit
-            self._drag_edge = edge
+                r = self._scan_regions[idx]
-            self._drag_start_val = r[0]
+                self._drag_idx = idx
-            self._drag_end_val = r[1]
+                self._drag_edge = edge
-            return
+                self._drag_start_val = r[0]
                self._drag_end_val = r[1]
                return
        self._seek(x)
    def mouseDoubleClickEvent(self, event):
@@ -1801,9 +1803,9 @@ class TimelineWidget(QWidget):
            self.update()
            return
-        # Hover cursor: resize arrow near edges, normal otherwise
+        # Hover cursor: resize arrow near edges (only with Shift held)
-        hit = self._hit_scan_edge(x)
+        mods = event.modifiers()
-        if hit is not None:
+        if (mods & Qt.KeyboardModifier.ShiftModifier) and self._hit_scan_edge(x):
            self.setCursor(Qt.CursorShape.SizeHorCursor)
        else:
            self.unsetCursor()
@@ -3224,6 +3226,67 @@ class MainWindow(QMainWindow):
                    self._playlist._select(0)
                _log(f"Resumed session: {len(valid)} file(s)")
        self._show_changelog()
    # ── Changelog ────────────────────────────────────────────
    APP_VERSION = "1.0"
    CHANGELOG: list[tuple[str, list[str]]] = [
        ("1.0", [
            "<b>New export layout</b> — clips are now stored in per-video "
            "<code>vid_NNN/</code> folders instead of per-clip "
            "<code>clip_NNN/</code> group dirs. "
            "Each source video gets its own folder with flat clip files inside "
            "(e.g. <code>mp4/vid_001/clip_001_0.mp4</code>). "
            "Old databases are migrated automatically on startup: "
            "DB paths are rewritten and files are moved to the new layout.",
            "<b>Counter is now per-video</b> — clip numbering restarts in each "
            "vid folder, and the DB is cross-checked to prevent overwrites "
            "even if the export folder is temporarily empty.",
            "<b>Audio detection models</b> — three new embedding models for "
            "audio scanning: <b>AST</b> (Audio Spectrogram Transformer), "
            "<b>EAT</b> (Efficient Audio Transformer), and <b>multi-layer "
            "HuBERT/Wav2Vec2</b> extraction. Classifier probabilities are now "
            "calibrated with isotonic regression for more meaningful scores.",
            "<b>Scan result history</b> — scan results are versioned per "
            "(file, model); switch between past scan versions from a dropdown.",
            "<b>Hard negatives</b> — management dialog to review, filter, and "
            "bulk-delete hard negatives; source model is tracked per negative.",
            "<b>Scan workflow</b> — disable/resize scan regions, undo edits, "
            "interruptible Scan All with resume, audio prefetch, review mode.",
            "<b>Dataset statistics</b> — dialog showing per-video clip breakdown "
            "and class balance.",
            "<b>Waveform overlay</b> on timeline.",
        ]),
    ]
    def _show_changelog(self) -> None:
        last = self._settings.value("last_seen_version", "")
        if last == self.APP_VERSION:
            return
        # Collect entries newer than last seen
        lines: list[str] = []
        for ver, items in self.CHANGELOG:
            if ver == last:
                break
            lines.append(f"<h3>v{ver}</h3><ul>")
            for item in items:
                lines.append(f"<li>{item}</li>")
            lines.append("</ul>")
        if not lines:
            self._settings.setValue("last_seen_version", self.APP_VERSION)
            return
        msg = QMessageBox(self)
        msg.setWindowTitle("What's new")
        msg.setIcon(QMessageBox.Icon.Information)
        msg.setTextFormat(Qt.TextFormat.RichText)
        msg.setText("".join(lines))
        cb = QCheckBox("Don't show again for this version")
        msg.setCheckBox(cb)
        msg.exec()
        if cb.isChecked():
            self._settings.setValue("last_seen_version", self.APP_VERSION)
    def _show_shortcuts(self) -> None:
        text = (
            "<table cellpadding='4' style='font-size:13px'>"
@@ -3248,7 +3311,7 @@ class MainWindow(QMainWindow):
            "<tr><td><b>Double-click marker</b></td><td>Enter overwrite mode (locked: jump to end of clip span)</td></tr>"
            "<tr><td><b>Right-click marker</b></td><td>Delete clip group</td></tr>"
            "<tr><td><b>Click video / crop bar</b></td><td>Reposition portrait crop</td></tr>"
-            "<tr><td><b>Drag scan region edge</b></td><td>Resize scan region</td></tr>"
+            "<tr><td><b>Shift+drag scan region edge</b></td><td>Resize scan region</td></tr>"
            "</table>"
        )
        QMessageBox.information(self, "Keyboard shortcuts", text)
@@ -5,21 +5,21 @@ from main import ProcessedDB
 def test_build_export_path_first():
-    assert build_export_path("/out", "clip", 1) == "/out/clip_001/clip_001.mp4"
+    assert build_export_path("/out", "clip", 1) == "/out/clip_001.mp4"
 def test_build_export_path_counter():
-    assert build_export_path("/out", "clip", 42) == "/out/clip_042/clip_042.mp4"
+    assert build_export_path("/out", "clip", 42) == "/out/clip_042.mp4"
 def test_build_export_path_deep_counter():
-    assert build_export_path("/out", "shot", 999) == "/out/shot_999/shot_999.mp4"
+    assert build_export_path("/out", "shot", 999) == "/out/shot_999.mp4"
 def test_build_export_path_sub():
-    assert build_export_path("/out", "clip", 1, sub=0) == "/out/clip_001/clip_001_0.mp4"
+    assert build_export_path("/out", "clip", 1, sub=0) == "/out/clip_001_0.mp4"
-    assert build_export_path("/out", "clip", 1, sub=2) == "/out/clip_001/clip_001_2.mp4"
+    assert build_export_path("/out", "clip", 1, sub=2) == "/out/clip_001_2.mp4"
 def test_build_sequence_dir_sub():
-    assert build_sequence_dir("/out", "clip", 1, sub=0) == "/out/clip_001/clip_001_0"
+    assert build_sequence_dir("/out", "clip", 1, sub=0) == "/out/clip_001_0"
-    assert build_sequence_dir("/out", "clip", 1, sub=1) == "/out/clip_001/clip_001_1"
+    assert build_sequence_dir("/out", "clip", 1, sub=1) == "/out/clip_001_1"
 def test_format_time_seconds():
    assert format_time(0.0) == "0:00.0"
@@ -178,10 +178,10 @@ def test_audio_extract_timing():
 def test_build_sequence_dir_basic():
-    assert build_sequence_dir("/out", "clip", 1) == "/out/clip_001/clip_001"
+    assert build_sequence_dir("/out", "clip", 1) == "/out/clip_001"
 def test_build_sequence_dir_counter():
-    assert build_sequence_dir("/out", "clip", 42) == "/out/clip_042/clip_042"
+    assert build_sequence_dir("/out", "clip", 42) == "/out/clip_042"
 def test_ffmpeg_command_image_sequence():
    cmd = build_ffmpeg_command("/in/v.mp4", 0.0, "/out/seq_001", image_sequence=True)
@@ -265,13 +265,13 @@ def test_db_get_group_returns_all_sub_clips():
        path = f.name
    try:
        db = ProcessedDB(path)
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_0.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_0.mp4")
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_1.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_1.mp4")
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_2.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_2.mp4")
-        group = db.get_group("/out/clip_001/clip_001_0.mp4")
+        group = db.get_group("/out/vid_001/clip_001_0.mp4")
        assert len(group) == 3
-        assert "/out/clip_001/clip_001_0.mp4" in group
+        assert "/out/vid_001/clip_001_0.mp4" in group
-        assert "/out/clip_001/clip_001_2.mp4" in group
+        assert "/out/vid_001/clip_001_2.mp4" in group
    finally:
        os.unlink(path)
@@ -281,10 +281,10 @@ def test_db_get_group_isolates_by_start_time():
        path = f.name
    try:
        db = ProcessedDB(path)
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_0.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_0.mp4")
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_1.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_1.mp4")
-        db.add("video.mp4", 30.0, "/out/clip_002/clip_002_0.mp4")
+        db.add("video.mp4", 30.0, "/out/vid_001/clip_002_0.mp4")
-        group = db.get_group("/out/clip_001/clip_001_0.mp4")
+        group = db.get_group("/out/vid_001/clip_001_0.mp4")
        assert len(group) == 2
    finally:
        os.unlink(path)
@@ -295,10 +295,10 @@ def test_db_delete_group_removes_all():
        path = f.name
    try:
        db = ProcessedDB(path)
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_0.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_0.mp4")
-        db.add("video.mp4", 10.0, "/out/clip_001/clip_001_1.mp4")
+        db.add("video.mp4", 10.0, "/out/vid_001/clip_001_1.mp4")
-        db.add("video.mp4", 30.0, "/out/clip_002/clip_002_0.mp4")
+        db.add("video.mp4", 30.0, "/out/vid_001/clip_002_0.mp4")
-        deleted = db.delete_group("/out/clip_001/clip_001_0.mp4")
+        deleted = db.delete_group("/out/vid_001/clip_001_0.mp4")
        assert len(deleted) == 2
        # clip_002 should still exist
        markers = db.get_markers("video.mp4")