diff --git a/core/audio_scan.py b/core/audio_scan.py
index bffd23d..e3a35b7 100644
--- a/core/audio_scan.py
+++ b/core/audio_scan.py
@@ -65,6 +65,7 @@ _EMBED_MODELS = {
"AST": 768,
"AST_ML": 3072, # 768 * 4
"EAT": 768,
+ "EAT_LARGE": 1024,
}
_DEFAULT_EMBED_MODEL = "WAV2VEC2_BASE"
@@ -104,11 +105,13 @@ def _get_w2v_model(model_name: str | None = None):
_ast_feature_extractor = ASTFeatureExtractor.from_pretrained(
"MIT/ast-finetuned-audioset-10-10-0.4593"
)
- elif load_name == "EAT":
+ elif load_name in ("EAT", "EAT_LARGE"):
from transformers import AutoModel
+ eat_repo = ("worstchan/EAT-large_epoch20_finetune_AS2M"
+ if load_name == "EAT_LARGE"
+ else "worstchan/EAT-base_epoch30_finetune_AS2M")
_w2v_model = AutoModel.from_pretrained(
- "worstchan/EAT-base_epoch30_finetune_AS2M",
- trust_remote_code=True,
+ eat_repo, trust_remote_code=True,
).to(_w2v_device)
else:
import torchaudio
@@ -254,7 +257,7 @@ def _extract_w2v_windows(y: np.ndarray, sr: int = _SR,
model, device = _get_w2v_model(model_name)
is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
is_ast = (model_name or _DEFAULT_EMBED_MODEL) in ("AST", "AST_ML")
- is_eat = (model_name or _DEFAULT_EMBED_MODEL) == "EAT"
+ is_eat = (model_name or _DEFAULT_EMBED_MODEL) in ("EAT", "EAT_LARGE")
ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)
# Auto-size batches based on available GPU memory
batch_size = 16
@@ -383,7 +386,7 @@ def _extract_w2v_targeted(y: np.ndarray, sr: int, gt_intense: list[float],
is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
is_ast = (model_name or _DEFAULT_EMBED_MODEL) in ("AST", "AST_ML")
- is_eat = (model_name or _DEFAULT_EMBED_MODEL) == "EAT"
+ is_eat = (model_name or _DEFAULT_EMBED_MODEL) in ("EAT", "EAT_LARGE")
ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)
for batch_start in range(0, len(valid_times), batch_size):
diff --git a/core/db.py b/core/db.py
index 1e2590e..52a1c6f 100644
--- a/core/db.py
+++ b/core/db.py
@@ -141,6 +141,92 @@ class ProcessedDB:
" ON hard_negatives(filename, profile)"
)
self._con.commit()
+ self._migrate_vid_folders()
+
+ def _migrate_vid_folders(self) -> None:
+ """Migrate old clip_NNN group dirs → vid_NNN per-video folders.
+
+ Old layout: export_folder/clip_NNN/clip_NNN_sub.mp4
+ New layout: export_folder/vid_NNN/clip_NNN_sub.mp4
+
+ Rewrites output_path in DB and moves files on disk.
+ """
+ # Check if any rows still use the old clip_NNN parent dir layout
+ row = self._con.execute(
+ "SELECT id FROM processed WHERE output_path LIKE '%/clip_%/%' LIMIT 1"
+ ).fetchone()
+ if not row:
+ return
+
+ _log("Migrating old clip group dirs → vid folders …")
+ rows = self._con.execute(
+ "SELECT id, filename, profile, output_path FROM processed"
+ " ORDER BY profile, filename, output_path"
+ ).fetchall()
+
+ # Assign vid_NNN per (profile, export_folder, filename)
+ vid_map: dict[tuple, str] = {}
+ vid_counters: dict[tuple, int] = {}
+
+ for rid, filename, profile, op in rows:
+ parent = os.path.dirname(op)
+ export_folder = os.path.dirname(parent)
+ key = (profile, export_folder, filename)
+ if key not in vid_map:
+ counter_key = (profile, export_folder)
+ n = vid_counters.get(counter_key, 1)
+ vid_map[key] = f"vid_{n:03d}"
+ vid_counters[counter_key] = n + 1
+
+ updates: list[tuple[str, int]] = []
+ moves: list[tuple[str, str]] = []
+ dirs_to_create: set[str] = set()
+ old_dirs: set[str] = set()
+
+ for rid, filename, profile, op in rows:
+ parent = os.path.dirname(op)
+ parent_name = os.path.basename(parent)
+ # Skip rows already using vid_NNN layout
+ if parent_name.startswith("vid_"):
+ continue
+ export_folder = os.path.dirname(parent)
+ key = (profile, export_folder, filename)
+ vid_name = vid_map[key]
+ new_path = os.path.join(export_folder, vid_name, os.path.basename(op))
+ updates.append((new_path, rid))
+ dirs_to_create.add(os.path.join(export_folder, vid_name))
+ old_dirs.add(parent)
+ if os.path.exists(op):
+ moves.append((op, new_path))
+
+ if not updates:
+ return
+
+ # Create vid directories
+ for d in sorted(dirs_to_create):
+ os.makedirs(d, exist_ok=True)
+
+ # Move files
+ import shutil
+ for old, new in moves:
+ if os.path.exists(old) and not os.path.exists(new):
+ shutil.move(old, new)
+
+ # Update DB
+ self._con.executemany(
+ "UPDATE processed SET output_path = ? WHERE id = ?", updates
+ )
+ self._con.commit()
+
+ # Remove empty old group directories
+ for d in sorted(old_dirs, reverse=True):
+ try:
+ if os.path.isdir(d) and not os.listdir(d):
+ os.rmdir(d)
+ except OSError:
+ pass
+
+ _log(f"Migrated {len(updates)} rows, moved {len(moves)} files to vid folders")
def add(self, filename: str, start_time: float, output_path: str,
label: str = "", category: str = "",
@@ -306,8 +392,8 @@ class ProcessedDB:
def get_max_counter(self, folder: str, name: str) -> int:
"""Return the highest counter N found in output_paths matching folder/name_NNN*.
- Parses the group directory component (e.g. 'clip_035') from stored
- output_path values. Returns 0 if no matches exist.
+ Parses the counter from filenames (e.g. 'clip_035_0.mp4' → 35).
+ *folder* is typically the vid folder. Returns 0 if no matches exist.
"""
if not self._enabled:
return 0
@@ -318,24 +404,66 @@ class ProcessedDB:
(prefix + "%",),
).fetchall()
max_n = 0
+ name_prefix = name + "_"
for (op,) in rows:
- # output_path: .../folder/name_NNN/name_NNN_sub.ext
- parent = os.path.basename(os.path.dirname(op))
- # parent should be "name_NNN"
- parts = parent.rsplit("_", 1)
- if len(parts) == 2:
- try:
- max_n = max(max_n, int(parts[1]))
- except ValueError:
- pass
+ stem = os.path.splitext(os.path.basename(op))[0]
+ # stem: "clip_035_0" or "clip_036_a1_0"
+ if not stem.startswith(name_prefix):
+ continue
+ rest = stem[len(name_prefix):] # "035_0" or "036_a1_0"
+ counter_str = rest.split("_")[0]
+ try:
+ max_n = max(max_n, int(counter_str))
+ except ValueError:
+ pass
return max_n
+ def get_vid_folder(self, filename: str, profile: str,
+ export_folder: str) -> str:
+ """Return the vid_NNN folder name for a source video.
+
+ Checks existing DB output_paths first; if the video already has a
+ vid_NNN folder, returns it. Otherwise assigns the next available
+ number, also checking disk for orphan vid folders.
+ """
+ if not self._enabled:
+ return "vid_001"
+ row = self._con.execute(
+ "SELECT output_path FROM processed"
+ " WHERE filename = ? AND profile = ? LIMIT 1",
+ (filename, profile),
+ ).fetchone()
+ if row:
+ parent = os.path.basename(os.path.dirname(row[0]))
+ if parent.startswith("vid_"):
+ return parent
+ # Collect all existing vid_NNN names from DB + disk
+ existing: set[str] = set()
+ rows = self._con.execute(
+ "SELECT DISTINCT output_path FROM processed WHERE profile = ?",
+ (profile,),
+ ).fetchall()
+ for (op,) in rows:
+ p = os.path.basename(os.path.dirname(op))
+ if p.startswith("vid_"):
+ existing.add(p)
+ if os.path.isdir(export_folder):
+ for d in os.listdir(export_folder):
+ if d.startswith("vid_") and os.path.isdir(
+ os.path.join(export_folder, d)
+ ):
+ existing.add(d)
+ n = 1
+ while f"vid_{n:03d}" in existing:
+ n += 1
+ return f"vid_{n:03d}"
+
def get_export_folders(self, profile: str = "default",
include_scan_exports: bool = False) -> list[str]:
"""Return distinct export folder names found in output_paths for a profile.
Export paths follow the structure:
- .../export_folder/group_dir/clip.mp4
+ .../export_folder/vid_NNN/clip.mp4
The export folder is 2 levels up from the clip file.
Returns folder names sorted alphabetically (e.g. ["mp4_Intense", "mp4_Soft"]).
"""
diff --git a/core/paths.py b/core/paths.py
index d4de4a2..030835b 100644
--- a/core/paths.py
+++ b/core/paths.py
@@ -25,15 +25,19 @@ def _log(*args) -> None:
def build_export_path(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
- group = f"{basename}_{counter:03d}"
- name = f"{group}_{sub}" if sub is not None else group
- return os.path.join(folder, group, name + ".mp4")
+ """Build clip output path. *folder* should be the vid folder (e.g. .../mp4/vid_001)."""
+ name = f"{basename}_{counter:03d}"
+ if sub is not None:
+ name = f"{name}_{sub}"
+ return os.path.join(folder, name + ".mp4")
def build_sequence_dir(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
- group = f"{basename}_{counter:03d}"
- name = f"{group}_{sub}" if sub is not None else group
- return os.path.join(folder, group, name)
+ """Build WebP sequence output dir. *folder* should be the vid folder."""
+ name = f"{basename}_{counter:03d}"
+ if sub is not None:
+ name = f"{name}_{sub}"
+ return os.path.join(folder, name)
def format_time(seconds: float) -> str:
diff --git a/docs/plans/2026-04-19-scan-history-negatives-design.md b/docs/plans/2026-04-19-scan-history-negatives-design.md
index 5b7410b..9ac679d 100644
--- a/docs/plans/2026-04-19-scan-history-negatives-design.md
+++ b/docs/plans/2026-04-19-scan-history-negatives-design.md
@@ -1,6 +1,6 @@
-# Scan History & Hard Negative Management Design
+# Scan History & Hard Negative Management — Final Design
-Date: 2026-04-19
+Date: 2026-04-19 (implemented on `feat/training-ui`)
## Goal
@@ -8,83 +8,198 @@ Date: 2026-04-19
2. Make hard negatives manageable — viewable, removable, and optionally disabled per training run
3. Fix latent bug: `get_export_folders()` doesn't filter by `scan_export`
-## 1. Scan Result History
+---
-### Current behavior
-
-`save_scan_results()` **replaces** all results for `(filename, profile, model)` on every scan. No history is preserved.
-
-### Change
-
-Keep the last N scan results per `(filename, profile, model)` with timestamps. The most recent is the "active" result displayed in the panel; older versions are accessible for comparison.
-
-### Schema change
-
-Add column to `scan_results`:
-
-```sql
-ALTER TABLE scan_results ADD COLUMN scan_timestamp TEXT NOT NULL DEFAULT '';
-```
-
-All rows from the same scan share the same timestamp string (e.g. `"20260419_143022"`).
-
-### save_scan_results changes
-
-Instead of `DELETE ... WHERE filename=? AND profile=? AND model=?`, the new flow:
-
-1. Insert new rows with current timestamp
-2. Count distinct timestamps for this `(filename, profile, model)`
-3. If count > N (default 5), delete rows belonging to the oldest timestamps
-
-### UI changes
-
-Add a small version dropdown/selector in `ScanResultsPanel` per model tab — shows timestamps of available scan versions. Selecting a version loads that version's results into the tab. The most recent is selected by default.
-
-The tab label shows the active version's region count, e.g. `HUBERT_XLARGE (12) [v3]`.
-
-### Cache interaction
-
-Embedding cache is per `(file, model)` and doesn't change across scans. Only the classifier output changes. History stores the classified regions (start, end, score), not embeddings.
-
-## 2. Hard Negative Management
-
-### Current behavior
-
-- Hard negatives stored in `hard_negatives` table: `(filename, profile, start_time, source_path)`
-- No model column — applied globally within a profile
-- Removable one-by-one via N toggle in scan panel, but no bulk management
-- Always used in training — no way to disable
-
-### Changes
-
-#### Schema
-
-Add `source_model TEXT NOT NULL DEFAULT ''` column to `hard_negatives`. Populated when marking negatives from scan results (we know which model tab is active).
-
-#### Training toggle
-
-New checkbox in `TrainDialog`: **"Use hard negatives"** (default checked). When unchecked, `get_training_data()` skips the `hard_negatives` query entirely. Non-destructive — negatives remain in DB.
-
-#### Management dialog
-
-New `HardNegativesDialog` accessible from Train dialog via "Manage..." button next to the checkbox. Shows:
-
-- Table: filename, start time, source model, date added (if we add created_at)
-- Filter by source model (dropdown)
-- Multi-select + Delete button
-- "Clear All" button with confirmation
-- Count summary at top
-
-### Training integration
-
-`get_training_data()` gets a new `use_hard_negatives: bool = True` parameter. When False, the hard negatives query (lines 365-374 of db.py) is skipped entirely.
-
-## 3. Ghost Folder Fix
+## 1. Ghost Folder Fix
### Bug
-`get_export_folders()` queries all `output_path` rows without filtering `scan_export`. Folders that only contain scan-exported clips appear in training dropdowns with 0 clips.
+`get_export_folders()` queried all `output_path` rows without filtering `scan_export`. Folders that only contained scan-exported clips appeared in training dropdowns with 0 clips.
-### Fix
+### Implementation (`core/db.py`)
-Add `include_scan_exports` parameter to `get_export_folders()`. When False (default), only query rows with `scan_export = 0`. Also filter out folders with 0 clips from `get_training_stats()` result dict.
+**`get_export_folders(profile, include_scan_exports=False)`** — new parameter. When `False` (default), the SQL query adds `AND scan_export = 0` to exclude scan-only folders. The `get_training_stats()` method passes this through and also filters its return dict to remove folders with 0 clips:
+
+```python
+return {k: v for k, v in stats.items() if v["clips"] > 0}
+```
+
+### Test
+
+`tests/test_db.py::test_export_folders_excludes_scan_exports` — verifies scan-only folders are excluded by default and included when `include_scan_exports=True`.
+
+---
+
+## 2. Scan Result History
+
+### Schema
+
+Added column to `scan_results`:
+
+```sql
+scan_timestamp TEXT NOT NULL DEFAULT ''
+```
+
+All rows from the same scan share one timestamp string with **microsecond precision** (`%Y%m%d_%H%M%S_%f`, e.g. `"20260419_143022_123456"`). Microsecond precision prevents version collisions on fast successive scans.
+
+Migration adds the column via `ALTER TABLE` for existing databases. Legacy rows keep `scan_timestamp = ''`.
+
+### DB methods (`core/db.py`)
+
+**`save_scan_results(filename, profile, model, regions, max_versions=5)`**
+1. Inserts new rows with current microsecond-precision timestamp
+2. Counts distinct timestamps for this `(filename, profile, model)`
+3. Prunes oldest timestamps beyond `max_versions`
+
+No more DELETE-then-INSERT — all versions coexist in the table.
+
+**`get_scan_versions(filename, profile, model)`**
+Returns `[{timestamp, count, max_score}, ...]` ordered newest first. Filters `scan_timestamp != ''` so legacy rows don't appear as named versions.
+
+**`get_scan_results(filename, profile, scan_timestamp=None)`**
+- With `scan_timestamp`: returns rows matching that exact version
+- Without (default): uses `INNER JOIN` subquery with `MAX(scan_timestamp)` per model to return only the latest version. Legacy rows (empty timestamp) sort before any real timestamp, so they're returned when no versioned scans exist.
+
+### UI (`main.py` — `ScanResultsPanel`)
+
+Each model tab wraps its `QTableWidget` in a container `QWidget` with a `QComboBox` for version selection:
+
+```
+container (QWidget)
+├── cmb_version (QComboBox) — hidden when ≤ 1 version
+└── table (QTableWidget)
+```
+
+**Helper methods** unwrap this container:
+- `_current_table()` — returns `QTableWidget` from active tab (handles both raw table and container)
+- `_tab_table(index)` — same by tab index
+
+**Version combo** is populated by `_populate_version_combos()` after every `load_for_file()` and `add_scan_results()` call. Labels use `datetime.strptime` parsing with try/except fallback for robustness:
+
+```
+2026-04-19 14:30 (12 regions, best: 0.95)
+```
+
+**Version switching** via `_on_version_changed(model, idx)`:
+1. Reads `scan_timestamp` from combo's `userData`
+2. Calls `get_scan_results(filename, profile, scan_timestamp=ts)`
+3. Repopulates the table in-place
+4. **Clears the undo stack** — stale undo entries from a different version would corrupt data
+5. Emits `regions_edited` to refresh the timeline
+
+**Tab switch** connects `tab_changed` signal to `_on_scan_regions_edited` (not just `_update_scan_export_count`), so the timeline updates scan regions when switching model tabs.
+
+### Cache interaction
+
+Embedding cache is per `(file, model)` and doesn't change across scans. History stores classified regions (start, end, score), not embeddings.
+
+### Test
+
+`tests/test_db.py::test_scan_result_history` — saves 3 versions, verifies counts, ordering, and latest-by-default behavior.
+
+---
+
+## 3. Hard Negative Management
+
+### Schema
+
+Added column to `hard_negatives`:
+
+```sql
+source_model TEXT NOT NULL DEFAULT ''
+```
+
+Migration adds the column via `ALTER TABLE` for existing databases.
+
+### DB methods (`core/db.py`)
+
+**`add_hard_negatives(filename, profile, times, source_path="", source_model="")`** — now stores which embedding model produced the scan that led to the negative marking.
+
+**`get_hard_negatives(profile)`** — returns all rows as `[{id, filename, start_time, source_path, source_model}, ...]` for the management dialog.
+
+**`delete_hard_negatives_by_ids(ids)`** — bulk delete by row IDs.
+
+**`get_training_data(..., use_hard_negatives=True)`** — new parameter. When `False`, the hard negatives query is skipped entirely. Non-destructive — negatives remain in DB.
+
+### Source model tracking (`main.py`)
+
+`_on_scan_negatives()` now passes `source_model=self._scan_panel.current_model_name()` when marking negatives from scan results. `current_model_name()` extracts the model name from the active tab text (stripping the count suffix).
+
+### Training toggle (`main.py` — `TrainDialog`)
+
+Checkbox **"Use hard negatives in training"** (default checked) with "Manage..." button in an HBox layout. The toggle:
+- Updates live training stats preview via debounced `_update_stats()`
+- Passes `use_hard_negatives` through `_open_train_dialog()` to `get_training_data()`
+
+### Management dialog (`main.py` — `HardNegativesDialog`)
+
+Accessible from TrainDialog's "Manage..." button. Features:
+
+| Component | Details |
+|-----------|---------|
+| **Filter combo** | `(all)` + each distinct `source_model` found in data |
+| **Summary label** | `N hard negatives` |
+| **Table** | File, Time (`{:.1f}s`), Source Model, hidden ID column |
+| **Delete Selected** | Multi-select aware, skips hidden (filtered) rows |
+| **Clear All** | **Filter-aware**: if a model filter is active, only deletes negatives for that model with an appropriate confirmation message. If `(all)`, deletes everything. |
+| **Close** | Closes dialog, triggers stats refresh in parent TrainDialog |
+
+`blockSignals(True)` guards prevent spurious filter callbacks during `_load()` repopulation.
+
+### Tests
+
+- `test_hard_negatives_source_model` — verifies source_model stored and retrieved
+- `test_training_data_skips_hard_negatives` — verifies `use_hard_negatives=False` excludes them
+- `test_delete_hard_negatives_by_ids` — verifies bulk deletion by ID
+
+---
+
+## 4. Runtime Fixes (discovered during testing)
+
+### EAT/torchvision ABI mismatch
+
+**Problem:** `torchvision` installed from PyPI (CPU build) was incompatible with `torch` from CUDA wheel index, causing `operator torchvision::nms does not exist`.
+
+**Fix:** Added `torchvision` to the explicit torch install line in both setup scripts:
+```bash
+pip install torch torchaudio torchvision --index-url "$TORCH_INDEX"
+```
+
+Also added `--extra-index-url "$TORCH_INDEX"` to the `pip install -r requirements.txt` line to prevent transitive dependencies (timm, ultralytics) from pulling CPU-only torch packages.
+
+Applied to: `setup_env.sh` (both conda and venv paths), `setup-windows.ps1`.
+
+### EAT / transformers 5.x incompatibility
+
+**Problem:** transformers 5.x broke EAT's remote model code (`'EATModel' object has no attribute 'all_tied_weights_keys'`).
+
+**Fix:** Pinned `transformers>=4.30,<5.0` in `requirements.txt`.
+
+### NumPy non-writable array warning
+
+**Problem:** Cached HuBERT/EAT embeddings loaded from disk are read-only numpy arrays. `torch.from_numpy()` on a non-writable array triggers a deprecation warning.
+
+**Fix:** In `core/audio_scan.py`, changed EAT preprocessing to copy the array:
+```python
+wav = torch.from_numpy(np.array(chunk)).unsqueeze(0).float()
+```
+
+### Timeline not updating on tab switch
+
+**Problem:** Switching model tabs in the scan results panel didn't refresh the timeline's highlighted regions because `tab_changed` was only connected to `_update_scan_export_count`.
+
+**Fix:** Connected `tab_changed` to `_on_scan_regions_edited` instead, which handles both timeline refresh and export count update.
+
+---
+
+## File Summary
+
+| File | Changes |
+|------|---------|
+| `core/db.py` | Schema migrations, `get_export_folders` filter, versioned `save_scan_results`, `get_scan_versions`, version-aware `get_scan_results`, `add_hard_negatives` with `source_model`, `get_hard_negatives`, `delete_hard_negatives_by_ids`, `get_training_data` with `use_hard_negatives` |
+| `main.py` | `HardNegativesDialog` class, `TrainDialog` hard neg toggle + manage button, `ScanResultsPanel` container/combo architecture, version combo population and switching, `current_model_name()`, tab-switch timeline fix |
+| `core/audio_scan.py` | `np.array(chunk)` copy for read-only numpy arrays in EAT preprocessing |
+| `requirements.txt` | `transformers>=4.30,<5.0` pin |
+| `setup_env.sh` | `torchvision` in torch install, `--extra-index-url` on requirements install |
+| `setup-windows.ps1` | `torchvision` in torch install, `--extra-index-url` on requirements install, removed skip-if-exists guard |
+| `tests/test_db.py` | 5 tests covering all DB-layer changes |
diff --git a/docs/plans/2026-04-19-scan-history-negatives-implementation.md b/docs/plans/2026-04-19-scan-history-negatives-implementation.md
index 0c33946..af457f0 100644
--- a/docs/plans/2026-04-19-scan-history-negatives-implementation.md
+++ b/docs/plans/2026-04-19-scan-history-negatives-implementation.md
@@ -1,714 +1,94 @@
-# Scan History & Hard Negative Management Implementation Plan
+# Scan History & Hard Negative Management — Implementation Log
-> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+> All tasks complete. See the design doc for the final specification.
-**Goal:** Add scan result versioning, hard negative management dialog with training toggle, and fix ghost folder bug.
-
-**Architecture:** DB schema changes in `core/db.py` (new columns, new queries). UI changes in `main.py` (version selector in ScanResultsPanel, management dialog, training toggle). No changes to `core/audio_scan.py`.
-
-**Tech Stack:** SQLite (existing), PyQt6 (existing)
-
-**Key design notes:**
-- Scan history stores N versions per `(filename, profile, model)` using a `scan_timestamp` column. All rows from one scan share the same timestamp.
-- Hard negatives gain a `source_model` column (informational) and training gains a `use_hard_negatives` toggle.
-- `get_export_folders()` must respect `scan_export` filter to prevent ghost folders.
+**Branch:** `feat/training-ui`
---
-### Task 1: Fix ghost folder bug in get_export_folders
+### Task 1: Fix ghost folder bug in get_export_folders -- DONE
-**Files:**
-- Modify: `core/db.py:294-313` (get_export_folders)
-- Modify: `core/db.py:410-443` (get_training_stats — filter out 0-clip folders)
-- Test: `tests/test_db.py`
+**Commit:** `2614a76 fix: get_export_folders respects scan_export filter`
-**Step 1: Write failing test**
-
-```python
-def test_export_folders_excludes_scan_exports():
- """Scan-export-only folders should not appear when include_scan_exports=False."""
- with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
- path = f.name
- try:
- db = ProcessedDB(path)
- # Manual export
- db.add("a.mp4", 10.0, "/out/mp4_Intense/g1/clip.mp4", profile="test")
- # Scan export to different folder
- db.add("a.mp4", 20.0, "/out/mp4_ScanOnly/g1/clip.mp4", profile="test",
- scan_export=True)
- folders = db.get_export_folders("test")
- assert "mp4_Intense" in folders
- assert "mp4_ScanOnly" not in folders, "scan-only folder should be excluded"
- # With include_scan_exports=True, both should appear
- folders_all = db.get_export_folders("test", include_scan_exports=True)
- assert "mp4_ScanOnly" in folders_all
- finally:
- os.unlink(path)
-```
-
-**Step 2: Fix get_export_folders**
-
-Add `include_scan_exports` parameter:
-
-```python
-def get_export_folders(self, profile: str = "default",
- include_scan_exports: bool = False) -> list[str]:
- if not self._enabled:
- return []
- if include_scan_exports:
- rows = self._con.execute(
- "SELECT DISTINCT output_path FROM processed WHERE profile = ?",
- (profile,),
- ).fetchall()
- else:
- rows = self._con.execute(
- "SELECT DISTINCT output_path FROM processed"
- " WHERE profile = ? AND scan_export = 0",
- (profile,),
- ).fetchall()
- folder_names: set[str] = set()
- for (op,) in rows:
- grandparent = os.path.basename(os.path.dirname(os.path.dirname(op)))
- if grandparent:
- folder_names.add(grandparent)
- return sorted(folder_names)
-```
-
-**Step 3: Update get_training_stats to pass through**
-
-```python
- folders = self.get_export_folders(profile, include_scan_exports=include_scan_exports)
-```
-
-And filter out empty folders at the end:
-
-```python
- return {k: v for k, v in stats.items() if v["clips"] > 0}
-```
-
-**Step 4: Run tests, commit**
-
-```bash
-pytest tests/ -v
-git add core/db.py tests/test_db.py
-git commit -m "fix: get_export_folders respects scan_export filter"
-```
+- `core/db.py` — `get_export_folders(profile, include_scan_exports=False)`: filters `scan_export = 0` by default
+- `core/db.py` — `get_training_stats()`: passes `include_scan_exports` through, filters out 0-clip folders
+- `tests/test_db.py` — `test_export_folders_excludes_scan_exports`
---
-### Task 2: Scan result history — schema and DB methods
+### Task 2: Scan result history — schema and DB methods -- DONE
-**Files:**
-- Modify: `core/db.py:86-98` (scan_results schema — add scan_timestamp column)
-- Modify: `core/db.py:100-113` (migration — add scan_timestamp to existing tables)
-- Modify: `core/db.py:447-468` (save_scan_results — version management)
-- Add: `core/db.py` (get_scan_versions, load_scan_version, delete_scan_version)
-- Test: `tests/test_db.py`
+**Commit:** `4fb2ae1 feat: scan result history — keep N versions per (file, model)`
-**Step 1: Write failing test**
-
-```python
-def test_scan_result_history():
- """save_scan_results should keep multiple versions."""
- with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
- path = f.name
- try:
- db = ProcessedDB(path)
- # Save three versions
- db.save_scan_results("v.mp4", "test", "MODEL_A",
- [(0, 8, 0.9)])
- db.save_scan_results("v.mp4", "test", "MODEL_A",
- [(0, 8, 0.8), (10, 18, 0.7)])
- db.save_scan_results("v.mp4", "test", "MODEL_A",
- [(5, 13, 0.95)])
- versions = db.get_scan_versions("v.mp4", "test", "MODEL_A")
- assert len(versions) == 3
- # Most recent first
- assert versions[0]["count"] == 1 # latest: 1 region
- assert versions[1]["count"] == 2 # middle: 2 regions
- assert versions[2]["count"] == 1 # oldest: 1 region
- # get_scan_results returns latest version by default
- results = db.get_scan_results("v.mp4", "test")
- assert len(results.get("MODEL_A", [])) == 1
- finally:
- os.unlink(path)
-```
-
-**Step 2: Add scan_timestamp column**
-
-In the CREATE TABLE (line 87-98), add:
-
-```sql
- scan_timestamp TEXT NOT NULL DEFAULT ''
-```
-
-In the migration block (lines 100-113), add:
-
-```python
- ("scan_timestamp", "TEXT NOT NULL DEFAULT ''"),
-```
-
-**Step 3: Modify save_scan_results**
-
-Replace the current DELETE+INSERT with versioned insert + cleanup:
-
-```python
-def save_scan_results(self, filename: str, profile: str, model: str,
- regions: list[tuple[float, float, float]],
- max_versions: int = 5) -> None:
- if not self._enabled:
- return
- from datetime import datetime
- ts = datetime.now().strftime("%Y%m%d_%H%M%S")
- with self._lock:
- self._con.executemany(
- "INSERT INTO scan_results"
- " (filename, profile, model, start_time, end_time, score,"
- " orig_start_time, orig_end_time, scan_timestamp)"
- " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
- [(filename, profile, model, s, e, sc, s, e, ts)
- for s, e, sc in regions],
- )
- # Prune old versions beyond max_versions
- versions = self._con.execute(
- "SELECT DISTINCT scan_timestamp FROM scan_results"
- " WHERE filename = ? AND profile = ? AND model = ?"
- " ORDER BY scan_timestamp DESC",
- (filename, profile, model),
- ).fetchall()
- if len(versions) > max_versions:
- old_ts = [v[0] for v in versions[max_versions:]]
- self._con.execute(
- "DELETE FROM scan_results"
- " WHERE filename = ? AND profile = ? AND model = ?"
- f" AND scan_timestamp IN ({','.join('?' * len(old_ts))})",
- (filename, profile, model, *old_ts),
- )
- self._con.commit()
-```
-
-**Step 4: Add get_scan_versions**
-
-```python
-def get_scan_versions(self, filename: str, profile: str, model: str
- ) -> list[dict]:
- """Return list of scan versions for (filename, profile, model).
-
- Returns [{timestamp, count, max_score}, ...] ordered newest first.
- """
- if not self._enabled:
- return []
- rows = self._con.execute(
- "SELECT scan_timestamp, COUNT(*), MAX(score)"
- " FROM scan_results"
- " WHERE filename = ? AND profile = ? AND model = ?"
- " AND scan_timestamp != ''"
- " GROUP BY scan_timestamp"
- " ORDER BY scan_timestamp DESC",
- (filename, profile, model),
- ).fetchall()
- return [{"timestamp": ts, "count": cnt, "max_score": sc}
- for ts, cnt, sc in rows]
-```
-
-**Step 5: Modify get_scan_results to support version selection**
-
-Add optional `scan_timestamp` parameter. When None (default), returns latest version:
-
-```python
-def get_scan_results(self, filename: str, profile: str,
- scan_timestamp: str | None = None
- ) -> dict[str, list[tuple]]:
- if not self._enabled:
- return {}
- if scan_timestamp:
- rows = self._con.execute(
- "SELECT id, model, start_time, end_time, score, disabled,"
- " orig_start_time, orig_end_time"
- " FROM scan_results"
- " WHERE filename = ? AND profile = ? AND scan_timestamp = ?"
- " ORDER BY model, start_time",
- (filename, profile, scan_timestamp),
- ).fetchall()
- else:
- # For each model, get rows from the latest timestamp only
- rows = self._con.execute(
- "SELECT r.id, r.model, r.start_time, r.end_time, r.score,"
- " r.disabled, r.orig_start_time, r.orig_end_time"
- " FROM scan_results r"
- " INNER JOIN ("
- " SELECT model, MAX(scan_timestamp) AS latest"
- " FROM scan_results"
- " WHERE filename = ? AND profile = ?"
- " GROUP BY model"
- " ) m ON r.model = m.model AND r.scan_timestamp = m.latest"
- " WHERE r.filename = ? AND r.profile = ?"
- " ORDER BY r.model, r.start_time",
- (filename, profile, filename, profile),
- ).fetchall()
- result: dict[str, list] = {}
- for row_id, model, s, e, sc, dis, os_, oe in rows:
- result.setdefault(model, []).append(
- (row_id, s, e, sc, bool(dis),
- os_ if os_ is not None else s,
- oe if oe is not None else e))
- return result
-```
-
-**Important:** Legacy rows (before this change) have `scan_timestamp = ''`. The `MAX(scan_timestamp)` query handles this correctly — empty string sorts before any real timestamp, so legacy rows are returned when they're the only version. The `get_scan_versions` query filters `scan_timestamp != ''` so legacy rows don't appear as named versions.
-
-**Step 6: Run tests, commit**
-
-```bash
-pytest tests/ -v
-git add core/db.py tests/test_db.py
-git commit -m "feat: scan result history — keep N versions per (file, model)"
-```
+- `core/db.py` — added `scan_timestamp TEXT NOT NULL DEFAULT ''` column with migration
+- `core/db.py` — `save_scan_results()`: versioned insert with microsecond-precision timestamp (`%Y%m%d_%H%M%S_%f`), auto-prunes beyond `max_versions=5`
+- `core/db.py` — `get_scan_versions()`: returns `[{timestamp, count, max_score}, ...]` newest first
+- `core/db.py` — `get_scan_results(scan_timestamp=None)`: `INNER JOIN` subquery with `MAX(scan_timestamp)` for latest-by-default
+- `tests/test_db.py` — `test_scan_result_history`
---
-### Task 3: Scan history UI — version selector in ScanResultsPanel
+### Task 3: Scan history UI — version selector in ScanResultsPanel -- DONE
-**Files:**
-- Modify: `main.py` (ScanResultsPanel — add version combo per tab)
-- Modify: `main.py` (ScanResultsPanel.load_for_file — populate versions)
+**Commit:** `8ed9fbf feat: scan version selector in results panel`
-**Step 1: Add version combo to tab UI**
-
-In `ScanResultsPanel._add_tab()`, add a small QComboBox above the table. When no history exists, hide it. When versions exist, populate with timestamps and connect to a slot that reloads the tab with that version.
-
-```python
-# In _add_tab, create a container widget with version combo + table
-container = QWidget()
-layout = QVBoxLayout(container)
-layout.setContentsMargins(0, 0, 0, 0)
-
-cmb_version = QComboBox()
-cmb_version.setMaximumWidth(200)
-cmb_version.setToolTip("Scan version history")
-cmb_version.hide() # Hidden when only 1 version
-layout.addWidget(cmb_version)
-layout.addWidget(table)
-
-self._tabs.addTab(container, label)
-```
-
-Store the combo and table as properties on the container widget for later access.
-
-**Step 2: Populate versions in load_for_file**
-
-After creating each model tab, query `get_scan_versions()`. If > 1 version, show the combo with entries like `"2026-04-19 14:30 (12 regions, best: 0.95)"`. Connect `currentIndexChanged` to reload that version's results.
-
-**Step 3: Version switching slot**
-
-When user selects a different version from the combo:
-1. Call `db.get_scan_results(filename, profile, scan_timestamp=selected_ts)`
-2. Repopulate the table with that version's rows
-3. Update timeline regions
-
-**Step 4: Test manually, commit**
-
-```bash
-git add main.py
-git commit -m "feat: scan version selector in results panel"
-```
+- `main.py` — `_add_tab()`: wraps table in container `QWidget` with version `QComboBox` (hidden when ≤ 1 version)
+- `main.py` — `_current_table()` / `_tab_table(idx)`: unwrap container to get `QTableWidget`
+- `main.py` — `_populate_version_combos()`: queries `get_scan_versions()`, formats labels with `datetime.strptime` + try/except fallback
+- `main.py` — `_on_version_changed()`: reloads table from specific version, clears undo stack, emits `regions_edited`
+- `main.py` — `current_model_name()`: extracts model name from tab text
---
-### Task 4: Hard negatives — schema and training toggle
+### Task 4: Hard negatives — schema and training toggle -- DONE
-**Files:**
-- Modify: `core/db.py:118-130` (hard_negatives schema — add source_model column)
-- Modify: `core/db.py:548-560` (add_hard_negatives — accept source_model)
-- Modify: `core/db.py:365-374` (get_training_data — use_hard_negatives parameter)
-- Modify: `main.py` (TrainDialog — add "Use hard negatives" checkbox)
-- Modify: `main.py` (_open_train_dialog — pass use_hard_negatives to get_training_data)
-- Test: `tests/test_db.py`
+**Commit:** `edc5784 feat: hard negative source_model tracking, training toggle`
-**Step 1: Write failing test**
-
-```python
-def test_hard_negatives_source_model():
- """Hard negatives should store source_model."""
- with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
- path = f.name
- try:
- db = ProcessedDB(path)
- db.add_hard_negatives("a.mp4", "test", [10.0, 20.0],
- source_path="/a.mp4", source_model="HUBERT_XLARGE")
- rows = db.get_hard_negatives("test")
- assert len(rows) == 2
- assert all(r["source_model"] == "HUBERT_XLARGE" for r in rows)
- finally:
- os.unlink(path)
-
-def test_training_data_skips_hard_negatives():
- """get_training_data with use_hard_negatives=False should skip them."""
- with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
- path = f.name
- try:
- db = ProcessedDB(path)
- db.add("a.mp4", 10.0, "/out/folder/g/clip.mp4", profile="test",
- source_path="/videos/a.mp4")
- db.add_hard_negatives("a.mp4", "test", [500.0], source_path="/videos/a.mp4")
- # With hard negatives
- data_with = db.get_training_data("test", "folder", use_hard_negatives=True)
- # Without hard negatives
- data_without = db.get_training_data("test", "folder", use_hard_negatives=False)
- # Both should find the video, but negative counts differ
- assert len(data_with) >= 1
- neg_with = sum(len(vi[3]) for vi in data_with)
- neg_without = sum(len(vi[3]) for vi in data_without)
- assert neg_with > neg_without or neg_with == neg_without # depends on margin
- finally:
- os.unlink(path)
-```
-
-**Step 2: Add source_model column to hard_negatives**
-
-In CREATE TABLE (line 119-125), add:
-
-```sql
- source_model TEXT NOT NULL DEFAULT ''
-```
-
-In migration section, add after the hard_negatives table creation:
-
-```python
-hn_cols = {
- row[1]
- for row in self._con.execute("PRAGMA table_info(hard_negatives)").fetchall()
-}
-if "source_model" not in hn_cols:
- self._con.execute(
- "ALTER TABLE hard_negatives ADD COLUMN source_model TEXT NOT NULL DEFAULT ''"
- )
-```
-
-**Step 3: Update add_hard_negatives to accept source_model**
-
-```python
-def add_hard_negatives(self, filename: str, profile: str,
- times: list[float], source_path: str = "",
- source_model: str = "") -> None:
- if not self._enabled or not times:
- return
- with self._lock:
- for t in times:
- self._con.execute(
- "INSERT INTO hard_negatives"
- " (filename, profile, start_time, source_path, source_model)"
- " VALUES (?, ?, ?, ?, ?)",
- (filename, profile, t, source_path, source_model),
- )
- self._con.commit()
-```
-
-**Step 4: Add get_hard_negatives (full rows for management dialog)**
-
-```python
-def get_hard_negatives(self, profile: str) -> list[dict]:
- """Return all hard negatives for a profile with full details."""
- if not self._enabled:
- return []
- rows = self._con.execute(
- "SELECT id, filename, start_time, source_path, source_model"
- " FROM hard_negatives WHERE profile = ?"
- " ORDER BY filename, start_time",
- (profile,),
- ).fetchall()
- return [{"id": r[0], "filename": r[1], "start_time": r[2],
- "source_path": r[3], "source_model": r[4]} for r in rows]
-```
-
-**Step 5: Add delete_hard_negatives_by_ids**
-
-```python
-def delete_hard_negatives_by_ids(self, ids: list[int]) -> None:
- """Delete hard negatives by row IDs."""
- if not self._enabled or not ids:
- return
- with self._lock:
- self._con.execute(
- f"DELETE FROM hard_negatives WHERE id IN ({','.join('?' * len(ids))})",
- ids,
- )
- self._con.commit()
-```
-
-**Step 6: Add use_hard_negatives parameter to get_training_data**
-
-In `get_training_data()` (line 315), add parameter:
-
-```python
-def get_training_data(self, profile: str, positive_folder: str,
- negative_folder: str = "",
- fallback_video_dir: str = "",
- include_scan_exports: bool = False,
- use_hard_negatives: bool = True,
- ) -> list[tuple[str, list[float], list[float], list[float]]]:
-```
-
-Then wrap the hard negatives query (lines 365-374) in a conditional:
-
-```python
- if use_hard_negatives:
- hard_rows = self._con.execute(
- "SELECT filename, start_time, source_path FROM hard_negatives"
- " WHERE profile = ?",
- (profile,),
- ).fetchall()
- for fn, st, sp in hard_rows:
- neg_by_video.setdefault(fn, set()).add(st)
- if sp:
- source_by_filename.setdefault(fn, sp)
-```
-
-**Step 7: Pass source_model when marking negatives from scan panel**
-
-In `main.py`, `_on_scan_negatives()` needs to pass the current scan model. The scan panel knows which tab is active:
-
-```python
-def _on_scan_negatives(self, times: list) -> None:
- if not self._file_path:
- return
- filename = os.path.basename(self._file_path)
- # Get current model tab name for source_model
- source_model = self._scan_panel.current_model_name()
- self._db.add_hard_negatives(filename, self._profile, times,
- source_path=self._file_path,
- source_model=source_model)
-```
-
-Add `current_model_name()` to ScanResultsPanel:
-
-```python
-def current_model_name(self) -> str:
- """Return the model name of the currently active tab."""
- idx = self._tabs.currentIndex()
- if idx >= 0:
- return self._tabs.tabText(idx).split(" (")[0] # strip count suffix
- return ""
-```
-
-**Step 8: Add training toggle to TrainDialog**
-
-After the existing `_chk_scan_exports` checkbox:
-
-```python
-self._chk_hard_negatives = QCheckBox("Use hard negatives in training")
-self._chk_hard_negatives.setChecked(True)
-self._chk_hard_negatives.setToolTip(
- "When unchecked, manually marked hard negatives are excluded from training.\n"
- "Useful when training a new model type where old negatives may not apply.")
-self._chk_hard_negatives.stateChanged.connect(lambda: self._debounce.start())
-form.addRow("", self._chk_hard_negatives)
-```
-
-Add property:
-
-```python
-@property
-def use_hard_negatives(self) -> bool:
- return self._chk_hard_negatives.isChecked()
-```
-
-**Step 9: Wire toggle through _open_train_dialog**
-
-In `_open_train_dialog()`, pass the flag:
-
-```python
- video_infos = self._db.get_training_data(
- self._profile, pos_folder, negative_folder=neg_folder,
- fallback_video_dir=video_dir,
- include_scan_exports=inc_scan,
- use_hard_negatives=dlg.use_hard_negatives,
- )
-```
-
-Also update `_update_stats()` in TrainDialog to pass it through for accurate counts:
-
-```python
- use_neg = self._chk_hard_negatives.isChecked() if hasattr(self, '_chk_hard_negatives') else True
- video_infos = self._db.get_training_data(
- self._profile, folder, negative_folder=neg_folder,
- fallback_video_dir=self._txt_video_dir.text(),
- include_scan_exports=inc_scan,
- use_hard_negatives=use_neg,
- )
-```
-
-**Step 10: Run tests, commit**
-
-```bash
-pytest tests/ -v
-git add core/db.py main.py tests/test_db.py
-git commit -m "feat: hard negative source_model tracking, training toggle"
-```
+- `core/db.py` — added `source_model TEXT NOT NULL DEFAULT ''` column to `hard_negatives` with migration
+- `core/db.py` — `add_hard_negatives(source_model="")`: stores originating model
+- `core/db.py` — `get_hard_negatives(profile)`: returns full rows as list of dicts
+- `core/db.py` — `delete_hard_negatives_by_ids(ids)`: bulk delete by row IDs
+- `core/db.py` — `get_training_data(use_hard_negatives=True)`: conditionally skips hard negatives query
+- `main.py` — `TrainDialog`: "Use hard negatives" checkbox + "Manage..." button in HBox layout
+- `main.py` — `_on_scan_negatives()`: passes `source_model=self._scan_panel.current_model_name()`
+- `tests/test_db.py` — `test_hard_negatives_source_model`, `test_training_data_skips_hard_negatives`, `test_delete_hard_negatives_by_ids`
---
-### Task 5: Hard negatives management dialog
+### Task 5: Hard negatives management dialog -- DONE
-**Files:**
-- Modify: `main.py` (add HardNegativesDialog class)
-- Modify: `main.py` (TrainDialog — add "Manage..." button)
+**Commit:** `e6db83f feat: hard negatives management dialog with filter and bulk delete`
-**Step 1: Create HardNegativesDialog**
-
-Place before TrainDialog class:
-
-```python
-class HardNegativesDialog(QDialog):
- """View and manage hard negative training examples."""
-
- def __init__(self, db: ProcessedDB, profile: str, parent=None):
- super().__init__(parent)
- self.setWindowTitle("Hard Negatives")
- self.setMinimumSize(600, 400)
- self._db = db
- self._profile = profile
-
- layout = QVBoxLayout(self)
-
- # Filter row
- filter_row = QHBoxLayout()
- filter_row.addWidget(QLabel("Filter model:"))
- self._cmb_filter = QComboBox()
- self._cmb_filter.addItem("(all)")
- self._cmb_filter.currentIndexChanged.connect(self._apply_filter)
- filter_row.addWidget(self._cmb_filter, 1)
- layout.addLayout(filter_row)
-
- # Summary
- self._lbl_summary = QLabel()
- layout.addWidget(self._lbl_summary)
-
- # Table
- self._table = QTableWidget(0, 4)
- self._table.setHorizontalHeaderLabels(
- ["File", "Time", "Source Model", "ID"])
- self._table.horizontalHeader().setSectionResizeMode(
- 0, QHeaderView.ResizeMode.Stretch)
- self._table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)
- self._table.setSelectionBehavior(QTableWidget.SelectionBehavior.SelectRows)
- self._table.setColumnHidden(3, True) # hide ID column
- layout.addWidget(self._table)
-
- # Buttons
- btn_row = QHBoxLayout()
- btn_delete = QPushButton("Delete Selected")
- btn_delete.clicked.connect(self._delete_selected)
- btn_row.addWidget(btn_delete)
- btn_clear = QPushButton("Clear All")
- btn_clear.clicked.connect(self._clear_all)
- btn_row.addWidget(btn_clear)
- btn_row.addStretch()
- btn_close = QPushButton("Close")
- btn_close.clicked.connect(self.close)
- btn_row.addWidget(btn_close)
- layout.addLayout(btn_row)
-
- self._load()
-
- def _load(self):
- rows = self._db.get_hard_negatives(self._profile)
- models = sorted(set(r["source_model"] for r in rows if r["source_model"]))
- self._cmb_filter.blockSignals(True)
- self._cmb_filter.clear()
- self._cmb_filter.addItem("(all)")
- for m in models:
- self._cmb_filter.addItem(m)
- self._cmb_filter.blockSignals(False)
-
- self._table.setRowCount(len(rows))
- for i, r in enumerate(rows):
- self._table.setItem(i, 0, QTableWidgetItem(r["filename"]))
- self._table.setItem(i, 1, QTableWidgetItem(f'{r["start_time"]:.1f}s'))
- self._table.setItem(i, 2, QTableWidgetItem(r["source_model"]))
- item = QTableWidgetItem(str(r["id"]))
- self._table.setItem(i, 3, item)
- self._lbl_summary.setText(f"{len(rows)} hard negatives")
-
- def _apply_filter(self):
- model = self._cmb_filter.currentText()
- for row in range(self._table.rowCount()):
- if model == "(all)":
- self._table.setRowHidden(row, False)
- else:
- src = self._table.item(row, 2).text()
- self._table.setRowHidden(row, src != model)
-
- def _delete_selected(self):
- ids = []
- for row in sorted(set(i.row() for i in self._table.selectedItems()), reverse=True):
- if not self._table.isRowHidden(row):
- ids.append(int(self._table.item(row, 3).text()))
- if ids:
- self._db.delete_hard_negatives_by_ids(ids)
- self._load()
-
- def _clear_all(self):
- reply = QMessageBox.question(
- self, "Clear All",
- f"Delete all hard negatives for profile '{self._profile}'?",
- QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
- )
- if reply == QMessageBox.StandardButton.Yes:
- all_rows = self._db.get_hard_negatives(self._profile)
- self._db.delete_hard_negatives_by_ids([r["id"] for r in all_rows])
- self._load()
-```
-
-**Step 2: Add "Manage..." button to TrainDialog**
-
-After the hard negatives checkbox, add a button:
-
-```python
-neg_row = QHBoxLayout()
-neg_row.addWidget(self._chk_hard_negatives)
-btn_manage_neg = QPushButton("Manage…")
-btn_manage_neg.setFixedWidth(80)
-btn_manage_neg.clicked.connect(self._manage_negatives)
-neg_row.addWidget(btn_manage_neg)
-form.addRow("", neg_row) # replaces the standalone checkbox addRow
-```
-
-Add handler:
-
-```python
-def _manage_negatives(self):
- dlg = HardNegativesDialog(self._db, self._profile, parent=self)
- dlg.exec()
- self._debounce.start() # refresh stats after potential deletions
-```
-
-**Step 3: Test manually, commit**
-
-```bash
-pytest tests/ -v
-git add main.py
-git commit -m "feat: hard negatives management dialog with filter and bulk delete"
-```
+- `main.py` — `HardNegativesDialog`: table with File/Time/Source Model/hidden ID columns, model filter combo, delete selected, filter-aware clear all, close button
+- Filter-aware "Clear All": respects active model filter, shows appropriate confirmation message
---
-### Task 6: Final integration test and push
+### Task 6: Code review fixes -- DONE
-**Step 1: Manual test checklist**
+**Commit:** `5d45b8d fix: timestamp collision, undo stack invalidation, label parsing, filter-aware clear`
-- [ ] Open Train dialog — verify no ghost folders appear
-- [ ] Train with "Use hard negatives" unchecked — verify training works
-- [ ] Train with "Use hard negatives" checked — verify negatives are used
-- [ ] Open Manage dialog — verify negatives listed with source model
-- [ ] Delete selected negatives — verify they're removed
-- [ ] Scan a video — verify results saved with timestamp
-- [ ] Rescan same video — verify version history appears
-- [ ] Switch version in scan panel — verify correct results display
-- [ ] Mark negative from scan results — verify source_model stored
+Four issues found during code review:
+1. **Timestamp collision** — second-precision timestamps could merge versions on sub-second calls. Fixed with microsecond precision `%f`
+2. **Undo stack invalidation** — switching scan versions left stale undo entries. Fixed by clearing undo stack in `_on_version_changed()`
+3. **Timestamp label fragile parsing** — hard-coded string slicing. Fixed with `datetime.strptime` + try/except fallback
+4. **Clear All ignoring filter** — deleted all negatives regardless of model filter. Fixed to respect active filter
-**Step 2: Push**
+---
-```bash
-git push
-```
+### Runtime fixes (discovered during manual testing)
+
+| Commit | Fix |
+|--------|-----|
+| `a3c657c` | Install `torchvision` from CUDA wheel index (was pulling CPU build from PyPI) |
+| `3c3b1d7` | Remove "skip if torch exists" guard in Windows setup so re-runs fix broken envs |
+| `fd043f4` | Pin `transformers>=4.30,<5.0` — EAT remote model code incompatible with transformers 5.x |
+| `7d6fee9` | Copy read-only numpy array before `torch.from_numpy()` in EAT preprocessing |
+| `bd345ab` | Connect `tab_changed` to `_on_scan_regions_edited` so timeline refreshes on tab switch |
+| `d8b3972` | Add `--extra-index-url` to `pip install -r requirements.txt` in both setup scripts |
+
+---
+
+### Test results
+
+All 68 tests pass (5 new DB tests + 63 existing).
diff --git a/main.py b/main.py
index 0225744..51819fb 100755
--- a/main.py
+++ b/main.py
@@ -1756,16 +1756,18 @@ class TimelineWidget(QWidget):
def mousePressEvent(self, event):
x = event.position().x()
- # Check for scan region edge drag
- hit = self._hit_scan_edge(x)
- if hit is not None:
- idx, edge = hit
- r = self._scan_regions[idx]
- self._drag_idx = idx
- self._drag_edge = edge
- self._drag_start_val = r[0]
- self._drag_end_val = r[1]
- return
+ # Check for scan region edge drag — require Shift to avoid accidental resizes
+ mods = event.modifiers()
+ if mods & Qt.KeyboardModifier.ShiftModifier:
+ hit = self._hit_scan_edge(x)
+ if hit is not None:
+ idx, edge = hit
+ r = self._scan_regions[idx]
+ self._drag_idx = idx
+ self._drag_edge = edge
+ self._drag_start_val = r[0]
+ self._drag_end_val = r[1]
+ return
self._seek(x)
def mouseDoubleClickEvent(self, event):
@@ -1801,9 +1803,9 @@ class TimelineWidget(QWidget):
self.update()
return
- # Hover cursor: resize arrow near edges, normal otherwise
- hit = self._hit_scan_edge(x)
- if hit is not None:
+ # Hover cursor: resize arrow near edges (only with Shift held)
+ mods = event.modifiers()
+ if (mods & Qt.KeyboardModifier.ShiftModifier) and self._hit_scan_edge(x):
self.setCursor(Qt.CursorShape.SizeHorCursor)
else:
self.unsetCursor()
@@ -3224,6 +3226,67 @@ class MainWindow(QMainWindow):
self._playlist._select(0)
_log(f"Resumed session: {len(valid)} file(s)")
+ self._show_changelog()
+
+ # ── Changelog ────────────────────────────────────────────
+
+ APP_VERSION = "1.0"
+ CHANGELOG: list[tuple[str, list[str]]] = [
+ ("1.0", [
+ "New export layout — clips are now stored in per-video "
+ "vid_NNN/ folders instead of per-clip "
+ "clip_NNN/ group dirs. "
+ "Each source video gets its own folder with flat clip files inside "
+ "(e.g. mp4/vid_001/clip_001_0.mp4). "
+ "Old databases are migrated automatically on startup: "
+ "DB paths are rewritten and files are moved to the new layout.",
+ "Counter is now per-video — clip numbering restarts in each "
+ "vid folder, and the DB is cross-checked to prevent overwrites "
+ "even if the export folder is temporarily empty.",
+ "Audio detection models — three new embedding models for "
+ "audio scanning: AST (Audio Spectrogram Transformer), "
+ "EAT (Efficient Audio Transformer), and multi-layer "
+ "HuBERT/Wav2Vec2 extraction. Classifier probabilities are now "
+ "calibrated with isotonic regression for more meaningful scores.",
+ "Scan result history — scan results are versioned per "
+ "(file, model); switch between past scan versions from a dropdown.",
+ "Hard negatives — management dialog to review, filter, and "
+ "bulk-delete hard negatives; source model is tracked per negative.",
+ "Scan workflow — disable/resize scan regions, undo edits, "
+ "interruptible Scan All with resume, audio prefetch, review mode.",
+ "Dataset statistics — dialog showing per-video clip breakdown "
+ "and class balance.",
+ "Waveform overlay on timeline.",
+ ]),
+ ]
+
+ def _show_changelog(self) -> None:
+ last = self._settings.value("last_seen_version", "")
+ if last == self.APP_VERSION:
+ return
+ # Collect entries newer than last seen
+ lines: list[str] = []
+ for ver, items in self.CHANGELOG:
+ if ver == last:
+ break
+ lines.append(f"
| Double-click marker | Enter overwrite mode (locked: jump to end of clip span) |
| Right-click marker | Delete clip group |
| Click video / crop bar | Reposition portrait crop |
| Drag scan region edge | Resize scan region |
| Shift+drag scan region edge | Resize scan region |