feat: vid folder layout, changelog popup, shift-to-resize, DB migration
- Export layout changed from clip_NNN group dirs to vid_NNN per-video folders - Automatic DB migration rewrites old paths and moves files on startup - Per-video counter with DB cross-check to prevent overwrites - Changelog popup on version bump with "don't show again" checkbox - Scan region resize now requires Shift+drag to prevent accidental edits - Recalculate vid folder and counter on file load - Add EAT_LARGE embedding model variant - Update tests for new flat export path structure Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+8
-5
@@ -65,6 +65,7 @@ _EMBED_MODELS = {
|
|||||||
"AST": 768,
|
"AST": 768,
|
||||||
"AST_ML": 3072, # 768 * 4
|
"AST_ML": 3072, # 768 * 4
|
||||||
"EAT": 768,
|
"EAT": 768,
|
||||||
|
"EAT_LARGE": 1024,
|
||||||
}
|
}
|
||||||
_DEFAULT_EMBED_MODEL = "WAV2VEC2_BASE"
|
_DEFAULT_EMBED_MODEL = "WAV2VEC2_BASE"
|
||||||
|
|
||||||
@@ -104,11 +105,13 @@ def _get_w2v_model(model_name: str | None = None):
|
|||||||
_ast_feature_extractor = ASTFeatureExtractor.from_pretrained(
|
_ast_feature_extractor = ASTFeatureExtractor.from_pretrained(
|
||||||
"MIT/ast-finetuned-audioset-10-10-0.4593"
|
"MIT/ast-finetuned-audioset-10-10-0.4593"
|
||||||
)
|
)
|
||||||
elif load_name == "EAT":
|
elif load_name in ("EAT", "EAT_LARGE"):
|
||||||
from transformers import AutoModel
|
from transformers import AutoModel
|
||||||
|
eat_repo = ("worstchan/EAT-large_epoch20_finetune_AS2M"
|
||||||
|
if load_name == "EAT_LARGE"
|
||||||
|
else "worstchan/EAT-base_epoch30_finetune_AS2M")
|
||||||
_w2v_model = AutoModel.from_pretrained(
|
_w2v_model = AutoModel.from_pretrained(
|
||||||
"worstchan/EAT-base_epoch30_finetune_AS2M",
|
eat_repo, trust_remote_code=True,
|
||||||
trust_remote_code=True,
|
|
||||||
).to(_w2v_device)
|
).to(_w2v_device)
|
||||||
else:
|
else:
|
||||||
import torchaudio
|
import torchaudio
|
||||||
@@ -254,7 +257,7 @@ def _extract_w2v_windows(y: np.ndarray, sr: int = _SR,
|
|||||||
model, device = _get_w2v_model(model_name)
|
model, device = _get_w2v_model(model_name)
|
||||||
is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
|
is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
|
||||||
is_ast = (model_name or _DEFAULT_EMBED_MODEL) in ("AST", "AST_ML")
|
is_ast = (model_name or _DEFAULT_EMBED_MODEL) in ("AST", "AST_ML")
|
||||||
is_eat = (model_name or _DEFAULT_EMBED_MODEL) == "EAT"
|
is_eat = (model_name or _DEFAULT_EMBED_MODEL) in ("EAT", "EAT_LARGE")
|
||||||
ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)
|
ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)
|
||||||
# Auto-size batches based on available GPU memory
|
# Auto-size batches based on available GPU memory
|
||||||
batch_size = 16
|
batch_size = 16
|
||||||
@@ -383,7 +386,7 @@ def _extract_w2v_targeted(y: np.ndarray, sr: int, gt_intense: list[float],
|
|||||||
|
|
||||||
is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
|
is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
|
||||||
is_ast = (model_name or _DEFAULT_EMBED_MODEL) in ("AST", "AST_ML")
|
is_ast = (model_name or _DEFAULT_EMBED_MODEL) in ("AST", "AST_ML")
|
||||||
is_eat = (model_name or _DEFAULT_EMBED_MODEL) == "EAT"
|
is_eat = (model_name or _DEFAULT_EMBED_MODEL) in ("EAT", "EAT_LARGE")
|
||||||
ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)
|
ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)
|
||||||
|
|
||||||
for batch_start in range(0, len(valid_times), batch_size):
|
for batch_start in range(0, len(valid_times), batch_size):
|
||||||
|
|||||||
+140
-12
@@ -141,6 +141,92 @@ class ProcessedDB:
|
|||||||
" ON hard_negatives(filename, profile)"
|
" ON hard_negatives(filename, profile)"
|
||||||
)
|
)
|
||||||
self._con.commit()
|
self._con.commit()
|
||||||
|
self._migrate_vid_folders()
|
||||||
|
|
||||||
|
def _migrate_vid_folders(self) -> None:
|
||||||
|
"""Migrate old clip_NNN group dirs → vid_NNN per-video folders.
|
||||||
|
|
||||||
|
Old layout: export_folder/clip_NNN/clip_NNN_sub.mp4
|
||||||
|
New layout: export_folder/vid_NNN/clip_NNN_sub.mp4
|
||||||
|
|
||||||
|
Rewrites output_path in DB and moves files on disk.
|
||||||
|
"""
|
||||||
|
# Check if any rows still use the old clip_NNN parent dir layout
|
||||||
|
row = self._con.execute(
|
||||||
|
"SELECT id FROM processed WHERE output_path LIKE '%/clip_%/%' LIMIT 1"
|
||||||
|
).fetchone()
|
||||||
|
if not row:
|
||||||
|
return
|
||||||
|
|
||||||
|
_log("Migrating old clip group dirs → vid folders …")
|
||||||
|
rows = self._con.execute(
|
||||||
|
"SELECT id, filename, profile, output_path FROM processed"
|
||||||
|
" ORDER BY profile, filename, output_path"
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Assign vid_NNN per (profile, export_folder, filename)
|
||||||
|
vid_map: dict[tuple, str] = {}
|
||||||
|
vid_counters: dict[tuple, int] = {}
|
||||||
|
|
||||||
|
for rid, filename, profile, op in rows:
|
||||||
|
parent = os.path.dirname(op)
|
||||||
|
export_folder = os.path.dirname(parent)
|
||||||
|
key = (profile, export_folder, filename)
|
||||||
|
if key not in vid_map:
|
||||||
|
counter_key = (profile, export_folder)
|
||||||
|
n = vid_counters.get(counter_key, 1)
|
||||||
|
vid_map[key] = f"vid_{n:03d}"
|
||||||
|
vid_counters[counter_key] = n + 1
|
||||||
|
|
||||||
|
updates: list[tuple[str, int]] = []
|
||||||
|
moves: list[tuple[str, str]] = []
|
||||||
|
dirs_to_create: set[str] = set()
|
||||||
|
old_dirs: set[str] = set()
|
||||||
|
|
||||||
|
for rid, filename, profile, op in rows:
|
||||||
|
parent = os.path.dirname(op)
|
||||||
|
parent_name = os.path.basename(parent)
|
||||||
|
# Skip rows already using vid_NNN layout
|
||||||
|
if parent_name.startswith("vid_"):
|
||||||
|
continue
|
||||||
|
export_folder = os.path.dirname(parent)
|
||||||
|
key = (profile, export_folder, filename)
|
||||||
|
vid_name = vid_map[key]
|
||||||
|
new_path = os.path.join(export_folder, vid_name, os.path.basename(op))
|
||||||
|
updates.append((new_path, rid))
|
||||||
|
dirs_to_create.add(os.path.join(export_folder, vid_name))
|
||||||
|
old_dirs.add(parent)
|
||||||
|
if os.path.exists(op):
|
||||||
|
moves.append((op, new_path))
|
||||||
|
|
||||||
|
if not updates:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create vid directories
|
||||||
|
for d in sorted(dirs_to_create):
|
||||||
|
os.makedirs(d, exist_ok=True)
|
||||||
|
|
||||||
|
# Move files
|
||||||
|
import shutil
|
||||||
|
for old, new in moves:
|
||||||
|
if os.path.exists(old) and not os.path.exists(new):
|
||||||
|
shutil.move(old, new)
|
||||||
|
|
||||||
|
# Update DB
|
||||||
|
self._con.executemany(
|
||||||
|
"UPDATE processed SET output_path = ? WHERE id = ?", updates
|
||||||
|
)
|
||||||
|
self._con.commit()
|
||||||
|
|
||||||
|
# Remove empty old group directories
|
||||||
|
for d in sorted(old_dirs, reverse=True):
|
||||||
|
try:
|
||||||
|
if os.path.isdir(d) and not os.listdir(d):
|
||||||
|
os.rmdir(d)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
_log(f"Migrated {len(updates)} rows, moved {len(moves)} files to vid folders")
|
||||||
|
|
||||||
def add(self, filename: str, start_time: float, output_path: str,
|
def add(self, filename: str, start_time: float, output_path: str,
|
||||||
label: str = "", category: str = "",
|
label: str = "", category: str = "",
|
||||||
@@ -306,8 +392,8 @@ class ProcessedDB:
|
|||||||
def get_max_counter(self, folder: str, name: str) -> int:
|
def get_max_counter(self, folder: str, name: str) -> int:
|
||||||
"""Return the highest counter N found in output_paths matching folder/name_NNN*.
|
"""Return the highest counter N found in output_paths matching folder/name_NNN*.
|
||||||
|
|
||||||
Parses the group directory component (e.g. 'clip_035') from stored
|
Parses the counter from filenames (e.g. 'clip_035_0.mp4' → 35).
|
||||||
output_path values. Returns 0 if no matches exist.
|
*folder* is typically the vid folder. Returns 0 if no matches exist.
|
||||||
"""
|
"""
|
||||||
if not self._enabled:
|
if not self._enabled:
|
||||||
return 0
|
return 0
|
||||||
@@ -318,24 +404,66 @@ class ProcessedDB:
|
|||||||
(prefix + "%",),
|
(prefix + "%",),
|
||||||
).fetchall()
|
).fetchall()
|
||||||
max_n = 0
|
max_n = 0
|
||||||
|
name_prefix = name + "_"
|
||||||
for (op,) in rows:
|
for (op,) in rows:
|
||||||
# output_path: .../folder/name_NNN/name_NNN_sub.ext
|
stem = os.path.splitext(os.path.basename(op))[0]
|
||||||
parent = os.path.basename(os.path.dirname(op))
|
# stem: "clip_035_0" or "clip_036_a1_0"
|
||||||
# parent should be "name_NNN"
|
if not stem.startswith(name_prefix):
|
||||||
parts = parent.rsplit("_", 1)
|
continue
|
||||||
if len(parts) == 2:
|
rest = stem[len(name_prefix):] # "035_0" or "036_a1_0"
|
||||||
try:
|
counter_str = rest.split("_")[0]
|
||||||
max_n = max(max_n, int(parts[1]))
|
try:
|
||||||
except ValueError:
|
max_n = max(max_n, int(counter_str))
|
||||||
pass
|
except ValueError:
|
||||||
|
pass
|
||||||
return max_n
|
return max_n
|
||||||
|
|
||||||
|
def get_vid_folder(self, filename: str, profile: str,
|
||||||
|
export_folder: str) -> str:
|
||||||
|
"""Return the vid_NNN folder name for a source video.
|
||||||
|
|
||||||
|
Checks existing DB output_paths first; if the video already has a
|
||||||
|
vid_NNN folder, returns it. Otherwise assigns the next available
|
||||||
|
number, also checking disk for orphan vid folders.
|
||||||
|
"""
|
||||||
|
if not self._enabled:
|
||||||
|
return "vid_001"
|
||||||
|
row = self._con.execute(
|
||||||
|
"SELECT output_path FROM processed"
|
||||||
|
" WHERE filename = ? AND profile = ? LIMIT 1",
|
||||||
|
(filename, profile),
|
||||||
|
).fetchone()
|
||||||
|
if row:
|
||||||
|
parent = os.path.basename(os.path.dirname(row[0]))
|
||||||
|
if parent.startswith("vid_"):
|
||||||
|
return parent
|
||||||
|
# Collect all existing vid_NNN names from DB + disk
|
||||||
|
existing: set[str] = set()
|
||||||
|
rows = self._con.execute(
|
||||||
|
"SELECT DISTINCT output_path FROM processed WHERE profile = ?",
|
||||||
|
(profile,),
|
||||||
|
).fetchall()
|
||||||
|
for (op,) in rows:
|
||||||
|
p = os.path.basename(os.path.dirname(op))
|
||||||
|
if p.startswith("vid_"):
|
||||||
|
existing.add(p)
|
||||||
|
if os.path.isdir(export_folder):
|
||||||
|
for d in os.listdir(export_folder):
|
||||||
|
if d.startswith("vid_") and os.path.isdir(
|
||||||
|
os.path.join(export_folder, d)
|
||||||
|
):
|
||||||
|
existing.add(d)
|
||||||
|
n = 1
|
||||||
|
while f"vid_{n:03d}" in existing:
|
||||||
|
n += 1
|
||||||
|
return f"vid_{n:03d}"
|
||||||
|
|
||||||
def get_export_folders(self, profile: str = "default",
|
def get_export_folders(self, profile: str = "default",
|
||||||
include_scan_exports: bool = False) -> list[str]:
|
include_scan_exports: bool = False) -> list[str]:
|
||||||
"""Return distinct export folder names found in output_paths for a profile.
|
"""Return distinct export folder names found in output_paths for a profile.
|
||||||
|
|
||||||
Export paths follow the structure:
|
Export paths follow the structure:
|
||||||
.../export_folder/group_dir/clip.mp4
|
.../export_folder/vid_NNN/clip.mp4
|
||||||
The export folder is 2 levels up from the clip file.
|
The export folder is 2 levels up from the clip file.
|
||||||
Returns folder names sorted alphabetically (e.g. ["mp4_Intense", "mp4_Soft"]).
|
Returns folder names sorted alphabetically (e.g. ["mp4_Intense", "mp4_Soft"]).
|
||||||
"""
|
"""
|
||||||
|
|||||||
+10
-6
@@ -25,15 +25,19 @@ def _log(*args) -> None:
|
|||||||
|
|
||||||
|
|
||||||
def build_export_path(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
|
def build_export_path(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
|
||||||
group = f"{basename}_{counter:03d}"
|
"""Build clip output path. *folder* should be the vid folder (e.g. .../mp4/vid_001)."""
|
||||||
name = f"{group}_{sub}" if sub is not None else group
|
name = f"{basename}_{counter:03d}"
|
||||||
return os.path.join(folder, group, name + ".mp4")
|
if sub is not None:
|
||||||
|
name = f"{name}_{sub}"
|
||||||
|
return os.path.join(folder, name + ".mp4")
|
||||||
|
|
||||||
|
|
||||||
def build_sequence_dir(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
|
def build_sequence_dir(folder: str, basename: str, counter: int, sub: int | None = None) -> str:
|
||||||
group = f"{basename}_{counter:03d}"
|
"""Build WebP sequence output dir. *folder* should be the vid folder."""
|
||||||
name = f"{group}_{sub}" if sub is not None else group
|
name = f"{basename}_{counter:03d}"
|
||||||
return os.path.join(folder, group, name)
|
if sub is not None:
|
||||||
|
name = f"{name}_{sub}"
|
||||||
|
return os.path.join(folder, name)
|
||||||
|
|
||||||
|
|
||||||
def format_time(seconds: float) -> str:
|
def format_time(seconds: float) -> str:
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# Scan History & Hard Negative Management Design
|
# Scan History & Hard Negative Management — Final Design
|
||||||
|
|
||||||
Date: 2026-04-19
|
Date: 2026-04-19 (implemented on `feat/training-ui`)
|
||||||
|
|
||||||
## Goal
|
## Goal
|
||||||
|
|
||||||
@@ -8,83 +8,198 @@ Date: 2026-04-19
|
|||||||
2. Make hard negatives manageable — viewable, removable, and optionally disabled per training run
|
2. Make hard negatives manageable — viewable, removable, and optionally disabled per training run
|
||||||
3. Fix latent bug: `get_export_folders()` doesn't filter by `scan_export`
|
3. Fix latent bug: `get_export_folders()` doesn't filter by `scan_export`
|
||||||
|
|
||||||
## 1. Scan Result History
|
---
|
||||||
|
|
||||||
### Current behavior
|
## 1. Ghost Folder Fix
|
||||||
|
|
||||||
`save_scan_results()` **replaces** all results for `(filename, profile, model)` on every scan. No history is preserved.
|
|
||||||
|
|
||||||
### Change
|
|
||||||
|
|
||||||
Keep the last N scan results per `(filename, profile, model)` with timestamps. The most recent is the "active" result displayed in the panel; older versions are accessible for comparison.
|
|
||||||
|
|
||||||
### Schema change
|
|
||||||
|
|
||||||
Add column to `scan_results`:
|
|
||||||
|
|
||||||
```sql
|
|
||||||
ALTER TABLE scan_results ADD COLUMN scan_timestamp TEXT NOT NULL DEFAULT '';
|
|
||||||
```
|
|
||||||
|
|
||||||
All rows from the same scan share the same timestamp string (e.g. `"20260419_143022"`).
|
|
||||||
|
|
||||||
### save_scan_results changes
|
|
||||||
|
|
||||||
Instead of `DELETE ... WHERE filename=? AND profile=? AND model=?`, the new flow:
|
|
||||||
|
|
||||||
1. Insert new rows with current timestamp
|
|
||||||
2. Count distinct timestamps for this `(filename, profile, model)`
|
|
||||||
3. If count > N (default 5), delete rows belonging to the oldest timestamps
|
|
||||||
|
|
||||||
### UI changes
|
|
||||||
|
|
||||||
Add a small version dropdown/selector in `ScanResultsPanel` per model tab — shows timestamps of available scan versions. Selecting a version loads that version's results into the tab. The most recent is selected by default.
|
|
||||||
|
|
||||||
The tab label shows the active version's region count, e.g. `HUBERT_XLARGE (12) [v3]`.
|
|
||||||
|
|
||||||
### Cache interaction
|
|
||||||
|
|
||||||
Embedding cache is per `(file, model)` and doesn't change across scans. Only the classifier output changes. History stores the classified regions (start, end, score), not embeddings.
|
|
||||||
|
|
||||||
## 2. Hard Negative Management
|
|
||||||
|
|
||||||
### Current behavior
|
|
||||||
|
|
||||||
- Hard negatives stored in `hard_negatives` table: `(filename, profile, start_time, source_path)`
|
|
||||||
- No model column — applied globally within a profile
|
|
||||||
- Removable one-by-one via N toggle in scan panel, but no bulk management
|
|
||||||
- Always used in training — no way to disable
|
|
||||||
|
|
||||||
### Changes
|
|
||||||
|
|
||||||
#### Schema
|
|
||||||
|
|
||||||
Add `source_model TEXT NOT NULL DEFAULT ''` column to `hard_negatives`. Populated when marking negatives from scan results (we know which model tab is active).
|
|
||||||
|
|
||||||
#### Training toggle
|
|
||||||
|
|
||||||
New checkbox in `TrainDialog`: **"Use hard negatives"** (default checked). When unchecked, `get_training_data()` skips the `hard_negatives` query entirely. Non-destructive — negatives remain in DB.
|
|
||||||
|
|
||||||
#### Management dialog
|
|
||||||
|
|
||||||
New `HardNegativesDialog` accessible from Train dialog via "Manage..." button next to the checkbox. Shows:
|
|
||||||
|
|
||||||
- Table: filename, start time, source model, date added (if we add created_at)
|
|
||||||
- Filter by source model (dropdown)
|
|
||||||
- Multi-select + Delete button
|
|
||||||
- "Clear All" button with confirmation
|
|
||||||
- Count summary at top
|
|
||||||
|
|
||||||
### Training integration
|
|
||||||
|
|
||||||
`get_training_data()` gets a new `use_hard_negatives: bool = True` parameter. When False, the hard negatives query (lines 365-374 of db.py) is skipped entirely.
|
|
||||||
|
|
||||||
## 3. Ghost Folder Fix
|
|
||||||
|
|
||||||
### Bug
|
### Bug
|
||||||
|
|
||||||
`get_export_folders()` queries all `output_path` rows without filtering `scan_export`. Folders that only contain scan-exported clips appear in training dropdowns with 0 clips.
|
`get_export_folders()` queried all `output_path` rows without filtering `scan_export`. Folders that only contained scan-exported clips appeared in training dropdowns with 0 clips.
|
||||||
|
|
||||||
### Fix
|
### Implementation (`core/db.py`)
|
||||||
|
|
||||||
Add `include_scan_exports` parameter to `get_export_folders()`. When False (default), only query rows with `scan_export = 0`. Also filter out folders with 0 clips from `get_training_stats()` result dict.
|
**`get_export_folders(profile, include_scan_exports=False)`** — new parameter. When `False` (default), the SQL query adds `AND scan_export = 0` to exclude scan-only folders. The `get_training_stats()` method passes this through and also filters its return dict to remove folders with 0 clips:
|
||||||
|
|
||||||
|
```python
|
||||||
|
return {k: v for k, v in stats.items() if v["clips"] > 0}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Test
|
||||||
|
|
||||||
|
`tests/test_db.py::test_export_folders_excludes_scan_exports` — verifies scan-only folders are excluded by default and included when `include_scan_exports=True`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Scan Result History
|
||||||
|
|
||||||
|
### Schema
|
||||||
|
|
||||||
|
Added column to `scan_results`:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
scan_timestamp TEXT NOT NULL DEFAULT ''
|
||||||
|
```
|
||||||
|
|
||||||
|
All rows from the same scan share one timestamp string with **microsecond precision** (`%Y%m%d_%H%M%S_%f`, e.g. `"20260419_143022_123456"`). Microsecond precision prevents version collisions on fast successive scans.
|
||||||
|
|
||||||
|
Migration adds the column via `ALTER TABLE` for existing databases. Legacy rows keep `scan_timestamp = ''`.
|
||||||
|
|
||||||
|
### DB methods (`core/db.py`)
|
||||||
|
|
||||||
|
**`save_scan_results(filename, profile, model, regions, max_versions=5)`**
|
||||||
|
1. Inserts new rows with current microsecond-precision timestamp
|
||||||
|
2. Counts distinct timestamps for this `(filename, profile, model)`
|
||||||
|
3. Prunes oldest timestamps beyond `max_versions`
|
||||||
|
|
||||||
|
No more DELETE-then-INSERT — all versions coexist in the table.
|
||||||
|
|
||||||
|
**`get_scan_versions(filename, profile, model)`**
|
||||||
|
Returns `[{timestamp, count, max_score}, ...]` ordered newest first. Filters `scan_timestamp != ''` so legacy rows don't appear as named versions.
|
||||||
|
|
||||||
|
**`get_scan_results(filename, profile, scan_timestamp=None)`**
|
||||||
|
- With `scan_timestamp`: returns rows matching that exact version
|
||||||
|
- Without (default): uses `INNER JOIN` subquery with `MAX(scan_timestamp)` per model to return only the latest version. Legacy rows (empty timestamp) sort before any real timestamp, so they're returned when no versioned scans exist.
|
||||||
|
|
||||||
|
### UI (`main.py` — `ScanResultsPanel`)
|
||||||
|
|
||||||
|
Each model tab wraps its `QTableWidget` in a container `QWidget` with a `QComboBox` for version selection:
|
||||||
|
|
||||||
|
```
|
||||||
|
container (QWidget)
|
||||||
|
├── cmb_version (QComboBox) — hidden when ≤ 1 version
|
||||||
|
└── table (QTableWidget)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Helper methods** unwrap this container:
|
||||||
|
- `_current_table()` — returns `QTableWidget` from active tab (handles both raw table and container)
|
||||||
|
- `_tab_table(index)` — same by tab index
|
||||||
|
|
||||||
|
**Version combo** is populated by `_populate_version_combos()` after every `load_for_file()` and `add_scan_results()` call. Labels use `datetime.strptime` parsing with try/except fallback for robustness:
|
||||||
|
|
||||||
|
```
|
||||||
|
2026-04-19 14:30 (12 regions, best: 0.95)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Version switching** via `_on_version_changed(model, idx)`:
|
||||||
|
1. Reads `scan_timestamp` from combo's `userData`
|
||||||
|
2. Calls `get_scan_results(filename, profile, scan_timestamp=ts)`
|
||||||
|
3. Repopulates the table in-place
|
||||||
|
4. **Clears the undo stack** — stale undo entries from a different version would corrupt data
|
||||||
|
5. Emits `regions_edited` to refresh the timeline
|
||||||
|
|
||||||
|
**Tab switch** connects `tab_changed` signal to `_on_scan_regions_edited` (not just `_update_scan_export_count`), so the timeline updates scan regions when switching model tabs.
|
||||||
|
|
||||||
|
### Cache interaction
|
||||||
|
|
||||||
|
Embedding cache is per `(file, model)` and doesn't change across scans. History stores classified regions (start, end, score), not embeddings.
|
||||||
|
|
||||||
|
### Test
|
||||||
|
|
||||||
|
`tests/test_db.py::test_scan_result_history` — saves 3 versions, verifies counts, ordering, and latest-by-default behavior.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Hard Negative Management
|
||||||
|
|
||||||
|
### Schema
|
||||||
|
|
||||||
|
Added column to `hard_negatives`:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
source_model TEXT NOT NULL DEFAULT ''
|
||||||
|
```
|
||||||
|
|
||||||
|
Migration adds the column via `ALTER TABLE` for existing databases.
|
||||||
|
|
||||||
|
### DB methods (`core/db.py`)
|
||||||
|
|
||||||
|
**`add_hard_negatives(filename, profile, times, source_path="", source_model="")`** — now stores which embedding model produced the scan that led to the negative marking.
|
||||||
|
|
||||||
|
**`get_hard_negatives(profile)`** — returns all rows as `[{id, filename, start_time, source_path, source_model}, ...]` for the management dialog.
|
||||||
|
|
||||||
|
**`delete_hard_negatives_by_ids(ids)`** — bulk delete by row IDs.
|
||||||
|
|
||||||
|
**`get_training_data(..., use_hard_negatives=True)`** — new parameter. When `False`, the hard negatives query is skipped entirely. Non-destructive — negatives remain in DB.
|
||||||
|
|
||||||
|
### Source model tracking (`main.py`)
|
||||||
|
|
||||||
|
`_on_scan_negatives()` now passes `source_model=self._scan_panel.current_model_name()` when marking negatives from scan results. `current_model_name()` extracts the model name from the active tab text (stripping the count suffix).
|
||||||
|
|
||||||
|
### Training toggle (`main.py` — `TrainDialog`)
|
||||||
|
|
||||||
|
Checkbox **"Use hard negatives in training"** (default checked) with "Manage..." button in an HBox layout. The toggle:
|
||||||
|
- Updates live training stats preview via debounced `_update_stats()`
|
||||||
|
- Passes `use_hard_negatives` through `_open_train_dialog()` to `get_training_data()`
|
||||||
|
|
||||||
|
### Management dialog (`main.py` — `HardNegativesDialog`)
|
||||||
|
|
||||||
|
Accessible from TrainDialog's "Manage..." button. Features:
|
||||||
|
|
||||||
|
| Component | Details |
|
||||||
|
|-----------|---------|
|
||||||
|
| **Filter combo** | `(all)` + each distinct `source_model` found in data |
|
||||||
|
| **Summary label** | `<b>N</b> hard negatives` |
|
||||||
|
| **Table** | File, Time (`{:.1f}s`), Source Model, hidden ID column |
|
||||||
|
| **Delete Selected** | Multi-select aware, skips hidden (filtered) rows |
|
||||||
|
| **Clear All** | **Filter-aware**: if a model filter is active, only deletes negatives for that model with an appropriate confirmation message. If `(all)`, deletes everything. |
|
||||||
|
| **Close** | Closes dialog, triggers stats refresh in parent TrainDialog |
|
||||||
|
|
||||||
|
`blockSignals(True)` guards prevent spurious filter callbacks during `_load()` repopulation.
|
||||||
|
|
||||||
|
### Tests
|
||||||
|
|
||||||
|
- `test_hard_negatives_source_model` — verifies source_model stored and retrieved
|
||||||
|
- `test_training_data_skips_hard_negatives` — verifies `use_hard_negatives=False` excludes them
|
||||||
|
- `test_delete_hard_negatives_by_ids` — verifies bulk deletion by ID
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Runtime Fixes (discovered during testing)
|
||||||
|
|
||||||
|
### EAT/torchvision ABI mismatch
|
||||||
|
|
||||||
|
**Problem:** `torchvision` installed from PyPI (CPU build) was incompatible with `torch` from CUDA wheel index, causing `operator torchvision::nms does not exist`.
|
||||||
|
|
||||||
|
**Fix:** Added `torchvision` to the explicit torch install line in both setup scripts:
|
||||||
|
```bash
|
||||||
|
pip install torch torchaudio torchvision --index-url "$TORCH_INDEX"
|
||||||
|
```
|
||||||
|
|
||||||
|
Also added `--extra-index-url "$TORCH_INDEX"` to the `pip install -r requirements.txt` line to prevent transitive dependencies (timm, ultralytics) from pulling CPU-only torch packages.
|
||||||
|
|
||||||
|
Applied to: `setup_env.sh` (both conda and venv paths), `setup-windows.ps1`.
|
||||||
|
|
||||||
|
### EAT / transformers 5.x incompatibility
|
||||||
|
|
||||||
|
**Problem:** transformers 5.x broke EAT's remote model code (`'EATModel' object has no attribute 'all_tied_weights_keys'`).
|
||||||
|
|
||||||
|
**Fix:** Pinned `transformers>=4.30,<5.0` in `requirements.txt`.
|
||||||
|
|
||||||
|
### NumPy non-writable array warning
|
||||||
|
|
||||||
|
**Problem:** Cached HuBERT/EAT embeddings loaded from disk are read-only numpy arrays. `torch.from_numpy()` on a non-writable array triggers a deprecation warning.
|
||||||
|
|
||||||
|
**Fix:** In `core/audio_scan.py`, changed EAT preprocessing to copy the array:
|
||||||
|
```python
|
||||||
|
wav = torch.from_numpy(np.array(chunk)).unsqueeze(0).float()
|
||||||
|
```
|
||||||
|
|
||||||
|
### Timeline not updating on tab switch
|
||||||
|
|
||||||
|
**Problem:** Switching model tabs in the scan results panel didn't refresh the timeline's highlighted regions because `tab_changed` was only connected to `_update_scan_export_count`.
|
||||||
|
|
||||||
|
**Fix:** Connected `tab_changed` to `_on_scan_regions_edited` instead, which handles both timeline refresh and export count update.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## File Summary
|
||||||
|
|
||||||
|
| File | Changes |
|
||||||
|
|------|---------|
|
||||||
|
| `core/db.py` | Schema migrations, `get_export_folders` filter, versioned `save_scan_results`, `get_scan_versions`, version-aware `get_scan_results`, `add_hard_negatives` with `source_model`, `get_hard_negatives`, `delete_hard_negatives_by_ids`, `get_training_data` with `use_hard_negatives` |
|
||||||
|
| `main.py` | `HardNegativesDialog` class, `TrainDialog` hard neg toggle + manage button, `ScanResultsPanel` container/combo architecture, version combo population and switching, `current_model_name()`, tab-switch timeline fix |
|
||||||
|
| `core/audio_scan.py` | `np.array(chunk)` copy for read-only numpy arrays in EAT preprocessing |
|
||||||
|
| `requirements.txt` | `transformers>=4.30,<5.0` pin |
|
||||||
|
| `setup_env.sh` | `torchvision` in torch install, `--extra-index-url` on requirements install |
|
||||||
|
| `setup-windows.ps1` | `torchvision` in torch install, `--extra-index-url` on requirements install, removed skip-if-exists guard |
|
||||||
|
| `tests/test_db.py` | 5 tests covering all DB-layer changes |
|
||||||
|
|||||||
@@ -1,714 +1,94 @@
|
|||||||
# Scan History & Hard Negative Management Implementation Plan
|
# Scan History & Hard Negative Management — Implementation Log
|
||||||
|
|
||||||
> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
|
> All tasks complete. See the design doc for the final specification.
|
||||||
|
|
||||||
**Goal:** Add scan result versioning, hard negative management dialog with training toggle, and fix ghost folder bug.
|
**Branch:** `feat/training-ui`
|
||||||
|
|
||||||
**Architecture:** DB schema changes in `core/db.py` (new columns, new queries). UI changes in `main.py` (version selector in ScanResultsPanel, management dialog, training toggle). No changes to `core/audio_scan.py`.
|
|
||||||
|
|
||||||
**Tech Stack:** SQLite (existing), PyQt6 (existing)
|
|
||||||
|
|
||||||
**Key design notes:**
|
|
||||||
- Scan history stores N versions per `(filename, profile, model)` using a `scan_timestamp` column. All rows from one scan share the same timestamp.
|
|
||||||
- Hard negatives gain a `source_model` column (informational) and training gains a `use_hard_negatives` toggle.
|
|
||||||
- `get_export_folders()` must respect `scan_export` filter to prevent ghost folders.
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Task 1: Fix ghost folder bug in get_export_folders
|
### Task 1: Fix ghost folder bug in get_export_folders -- DONE
|
||||||
|
|
||||||
**Files:**
|
**Commit:** `2614a76 fix: get_export_folders respects scan_export filter`
|
||||||
- Modify: `core/db.py:294-313` (get_export_folders)
|
|
||||||
- Modify: `core/db.py:410-443` (get_training_stats — filter out 0-clip folders)
|
|
||||||
- Test: `tests/test_db.py`
|
|
||||||
|
|
||||||
**Step 1: Write failing test**
|
- `core/db.py` — `get_export_folders(profile, include_scan_exports=False)`: filters `scan_export = 0` by default
|
||||||
|
- `core/db.py` — `get_training_stats()`: passes `include_scan_exports` through, filters out 0-clip folders
|
||||||
```python
|
- `tests/test_db.py` — `test_export_folders_excludes_scan_exports`
|
||||||
def test_export_folders_excludes_scan_exports():
|
|
||||||
"""Scan-export-only folders should not appear when include_scan_exports=False."""
|
|
||||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
|
||||||
path = f.name
|
|
||||||
try:
|
|
||||||
db = ProcessedDB(path)
|
|
||||||
# Manual export
|
|
||||||
db.add("a.mp4", 10.0, "/out/mp4_Intense/g1/clip.mp4", profile="test")
|
|
||||||
# Scan export to different folder
|
|
||||||
db.add("a.mp4", 20.0, "/out/mp4_ScanOnly/g1/clip.mp4", profile="test",
|
|
||||||
scan_export=True)
|
|
||||||
folders = db.get_export_folders("test")
|
|
||||||
assert "mp4_Intense" in folders
|
|
||||||
assert "mp4_ScanOnly" not in folders, "scan-only folder should be excluded"
|
|
||||||
# With include_scan_exports=True, both should appear
|
|
||||||
folders_all = db.get_export_folders("test", include_scan_exports=True)
|
|
||||||
assert "mp4_ScanOnly" in folders_all
|
|
||||||
finally:
|
|
||||||
os.unlink(path)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 2: Fix get_export_folders**
|
|
||||||
|
|
||||||
Add `include_scan_exports` parameter:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def get_export_folders(self, profile: str = "default",
|
|
||||||
include_scan_exports: bool = False) -> list[str]:
|
|
||||||
if not self._enabled:
|
|
||||||
return []
|
|
||||||
if include_scan_exports:
|
|
||||||
rows = self._con.execute(
|
|
||||||
"SELECT DISTINCT output_path FROM processed WHERE profile = ?",
|
|
||||||
(profile,),
|
|
||||||
).fetchall()
|
|
||||||
else:
|
|
||||||
rows = self._con.execute(
|
|
||||||
"SELECT DISTINCT output_path FROM processed"
|
|
||||||
" WHERE profile = ? AND scan_export = 0",
|
|
||||||
(profile,),
|
|
||||||
).fetchall()
|
|
||||||
folder_names: set[str] = set()
|
|
||||||
for (op,) in rows:
|
|
||||||
grandparent = os.path.basename(os.path.dirname(os.path.dirname(op)))
|
|
||||||
if grandparent:
|
|
||||||
folder_names.add(grandparent)
|
|
||||||
return sorted(folder_names)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 3: Update get_training_stats to pass through**
|
|
||||||
|
|
||||||
```python
|
|
||||||
folders = self.get_export_folders(profile, include_scan_exports=include_scan_exports)
|
|
||||||
```
|
|
||||||
|
|
||||||
And filter out empty folders at the end:
|
|
||||||
|
|
||||||
```python
|
|
||||||
return {k: v for k, v in stats.items() if v["clips"] > 0}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 4: Run tests, commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pytest tests/ -v
|
|
||||||
git add core/db.py tests/test_db.py
|
|
||||||
git commit -m "fix: get_export_folders respects scan_export filter"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Task 2: Scan result history — schema and DB methods
|
### Task 2: Scan result history — schema and DB methods -- DONE
|
||||||
|
|
||||||
**Files:**
|
**Commit:** `4fb2ae1 feat: scan result history — keep N versions per (file, model)`
|
||||||
- Modify: `core/db.py:86-98` (scan_results schema — add scan_timestamp column)
|
|
||||||
- Modify: `core/db.py:100-113` (migration — add scan_timestamp to existing tables)
|
|
||||||
- Modify: `core/db.py:447-468` (save_scan_results — version management)
|
|
||||||
- Add: `core/db.py` (get_scan_versions, load_scan_version, delete_scan_version)
|
|
||||||
- Test: `tests/test_db.py`
|
|
||||||
|
|
||||||
**Step 1: Write failing test**
|
- `core/db.py` — added `scan_timestamp TEXT NOT NULL DEFAULT ''` column with migration
|
||||||
|
- `core/db.py` — `save_scan_results()`: versioned insert with microsecond-precision timestamp (`%Y%m%d_%H%M%S_%f`), auto-prunes beyond `max_versions=5`
|
||||||
```python
|
- `core/db.py` — `get_scan_versions()`: returns `[{timestamp, count, max_score}, ...]` newest first
|
||||||
def test_scan_result_history():
|
- `core/db.py` — `get_scan_results(scan_timestamp=None)`: `INNER JOIN` subquery with `MAX(scan_timestamp)` for latest-by-default
|
||||||
"""save_scan_results should keep multiple versions."""
|
- `tests/test_db.py` — `test_scan_result_history`
|
||||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
|
||||||
path = f.name
|
|
||||||
try:
|
|
||||||
db = ProcessedDB(path)
|
|
||||||
# Save three versions
|
|
||||||
db.save_scan_results("v.mp4", "test", "MODEL_A",
|
|
||||||
[(0, 8, 0.9)])
|
|
||||||
db.save_scan_results("v.mp4", "test", "MODEL_A",
|
|
||||||
[(0, 8, 0.8), (10, 18, 0.7)])
|
|
||||||
db.save_scan_results("v.mp4", "test", "MODEL_A",
|
|
||||||
[(5, 13, 0.95)])
|
|
||||||
versions = db.get_scan_versions("v.mp4", "test", "MODEL_A")
|
|
||||||
assert len(versions) == 3
|
|
||||||
# Most recent first
|
|
||||||
assert versions[0]["count"] == 1 # latest: 1 region
|
|
||||||
assert versions[1]["count"] == 2 # middle: 2 regions
|
|
||||||
assert versions[2]["count"] == 1 # oldest: 1 region
|
|
||||||
# get_scan_results returns latest version by default
|
|
||||||
results = db.get_scan_results("v.mp4", "test")
|
|
||||||
assert len(results.get("MODEL_A", [])) == 1
|
|
||||||
finally:
|
|
||||||
os.unlink(path)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 2: Add scan_timestamp column**
|
|
||||||
|
|
||||||
In the CREATE TABLE (line 87-98), add:
|
|
||||||
|
|
||||||
```sql
|
|
||||||
scan_timestamp TEXT NOT NULL DEFAULT ''
|
|
||||||
```
|
|
||||||
|
|
||||||
In the migration block (lines 100-113), add:
|
|
||||||
|
|
||||||
```python
|
|
||||||
("scan_timestamp", "TEXT NOT NULL DEFAULT ''"),
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 3: Modify save_scan_results**
|
|
||||||
|
|
||||||
Replace the current DELETE+INSERT with versioned insert + cleanup:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def save_scan_results(self, filename: str, profile: str, model: str,
|
|
||||||
regions: list[tuple[float, float, float]],
|
|
||||||
max_versions: int = 5) -> None:
|
|
||||||
if not self._enabled:
|
|
||||||
return
|
|
||||||
from datetime import datetime
|
|
||||||
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
||||||
with self._lock:
|
|
||||||
self._con.executemany(
|
|
||||||
"INSERT INTO scan_results"
|
|
||||||
" (filename, profile, model, start_time, end_time, score,"
|
|
||||||
" orig_start_time, orig_end_time, scan_timestamp)"
|
|
||||||
" VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
||||||
[(filename, profile, model, s, e, sc, s, e, ts)
|
|
||||||
for s, e, sc in regions],
|
|
||||||
)
|
|
||||||
# Prune old versions beyond max_versions
|
|
||||||
versions = self._con.execute(
|
|
||||||
"SELECT DISTINCT scan_timestamp FROM scan_results"
|
|
||||||
" WHERE filename = ? AND profile = ? AND model = ?"
|
|
||||||
" ORDER BY scan_timestamp DESC",
|
|
||||||
(filename, profile, model),
|
|
||||||
).fetchall()
|
|
||||||
if len(versions) > max_versions:
|
|
||||||
old_ts = [v[0] for v in versions[max_versions:]]
|
|
||||||
self._con.execute(
|
|
||||||
"DELETE FROM scan_results"
|
|
||||||
" WHERE filename = ? AND profile = ? AND model = ?"
|
|
||||||
f" AND scan_timestamp IN ({','.join('?' * len(old_ts))})",
|
|
||||||
(filename, profile, model, *old_ts),
|
|
||||||
)
|
|
||||||
self._con.commit()
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 4: Add get_scan_versions**
|
|
||||||
|
|
||||||
```python
|
|
||||||
def get_scan_versions(self, filename: str, profile: str, model: str
|
|
||||||
) -> list[dict]:
|
|
||||||
"""Return list of scan versions for (filename, profile, model).
|
|
||||||
|
|
||||||
Returns [{timestamp, count, max_score}, ...] ordered newest first.
|
|
||||||
"""
|
|
||||||
if not self._enabled:
|
|
||||||
return []
|
|
||||||
rows = self._con.execute(
|
|
||||||
"SELECT scan_timestamp, COUNT(*), MAX(score)"
|
|
||||||
" FROM scan_results"
|
|
||||||
" WHERE filename = ? AND profile = ? AND model = ?"
|
|
||||||
" AND scan_timestamp != ''"
|
|
||||||
" GROUP BY scan_timestamp"
|
|
||||||
" ORDER BY scan_timestamp DESC",
|
|
||||||
(filename, profile, model),
|
|
||||||
).fetchall()
|
|
||||||
return [{"timestamp": ts, "count": cnt, "max_score": sc}
|
|
||||||
for ts, cnt, sc in rows]
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 5: Modify get_scan_results to support version selection**
|
|
||||||
|
|
||||||
Add optional `scan_timestamp` parameter. When None (default), returns latest version:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def get_scan_results(self, filename: str, profile: str,
|
|
||||||
scan_timestamp: str | None = None
|
|
||||||
) -> dict[str, list[tuple]]:
|
|
||||||
if not self._enabled:
|
|
||||||
return {}
|
|
||||||
if scan_timestamp:
|
|
||||||
rows = self._con.execute(
|
|
||||||
"SELECT id, model, start_time, end_time, score, disabled,"
|
|
||||||
" orig_start_time, orig_end_time"
|
|
||||||
" FROM scan_results"
|
|
||||||
" WHERE filename = ? AND profile = ? AND scan_timestamp = ?"
|
|
||||||
" ORDER BY model, start_time",
|
|
||||||
(filename, profile, scan_timestamp),
|
|
||||||
).fetchall()
|
|
||||||
else:
|
|
||||||
# For each model, get rows from the latest timestamp only
|
|
||||||
rows = self._con.execute(
|
|
||||||
"SELECT r.id, r.model, r.start_time, r.end_time, r.score,"
|
|
||||||
" r.disabled, r.orig_start_time, r.orig_end_time"
|
|
||||||
" FROM scan_results r"
|
|
||||||
" INNER JOIN ("
|
|
||||||
" SELECT model, MAX(scan_timestamp) AS latest"
|
|
||||||
" FROM scan_results"
|
|
||||||
" WHERE filename = ? AND profile = ?"
|
|
||||||
" GROUP BY model"
|
|
||||||
" ) m ON r.model = m.model AND r.scan_timestamp = m.latest"
|
|
||||||
" WHERE r.filename = ? AND r.profile = ?"
|
|
||||||
" ORDER BY r.model, r.start_time",
|
|
||||||
(filename, profile, filename, profile),
|
|
||||||
).fetchall()
|
|
||||||
result: dict[str, list] = {}
|
|
||||||
for row_id, model, s, e, sc, dis, os_, oe in rows:
|
|
||||||
result.setdefault(model, []).append(
|
|
||||||
(row_id, s, e, sc, bool(dis),
|
|
||||||
os_ if os_ is not None else s,
|
|
||||||
oe if oe is not None else e))
|
|
||||||
return result
|
|
||||||
```
|
|
||||||
|
|
||||||
**Important:** Legacy rows (before this change) have `scan_timestamp = ''`. The `MAX(scan_timestamp)` query handles this correctly — empty string sorts before any real timestamp, so legacy rows are returned when they're the only version. The `get_scan_versions` query filters `scan_timestamp != ''` so legacy rows don't appear as named versions.
|
|
||||||
|
|
||||||
**Step 6: Run tests, commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pytest tests/ -v
|
|
||||||
git add core/db.py tests/test_db.py
|
|
||||||
git commit -m "feat: scan result history — keep N versions per (file, model)"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Task 3: Scan history UI — version selector in ScanResultsPanel
|
### Task 3: Scan history UI — version selector in ScanResultsPanel -- DONE
|
||||||
|
|
||||||
**Files:**
|
**Commit:** `8ed9fbf feat: scan version selector in results panel`
|
||||||
- Modify: `main.py` (ScanResultsPanel — add version combo per tab)
|
|
||||||
- Modify: `main.py` (ScanResultsPanel.load_for_file — populate versions)
|
|
||||||
|
|
||||||
**Step 1: Add version combo to tab UI**
|
- `main.py` — `_add_tab()`: wraps table in container `QWidget` with version `QComboBox` (hidden when ≤ 1 version)
|
||||||
|
- `main.py` — `_current_table()` / `_tab_table(idx)`: unwrap container to get `QTableWidget`
|
||||||
In `ScanResultsPanel._add_tab()`, add a small QComboBox above the table. When no history exists, hide it. When versions exist, populate with timestamps and connect to a slot that reloads the tab with that version.
|
- `main.py` — `_populate_version_combos()`: queries `get_scan_versions()`, formats labels with `datetime.strptime` + try/except fallback
|
||||||
|
- `main.py` — `_on_version_changed()`: reloads table from specific version, clears undo stack, emits `regions_edited`
|
||||||
```python
|
- `main.py` — `current_model_name()`: extracts model name from tab text
|
||||||
# In _add_tab, create a container widget with version combo + table
|
|
||||||
container = QWidget()
|
|
||||||
layout = QVBoxLayout(container)
|
|
||||||
layout.setContentsMargins(0, 0, 0, 0)
|
|
||||||
|
|
||||||
cmb_version = QComboBox()
|
|
||||||
cmb_version.setMaximumWidth(200)
|
|
||||||
cmb_version.setToolTip("Scan version history")
|
|
||||||
cmb_version.hide() # Hidden when only 1 version
|
|
||||||
layout.addWidget(cmb_version)
|
|
||||||
layout.addWidget(table)
|
|
||||||
|
|
||||||
self._tabs.addTab(container, label)
|
|
||||||
```
|
|
||||||
|
|
||||||
Store the combo and table as properties on the container widget for later access.
|
|
||||||
|
|
||||||
**Step 2: Populate versions in load_for_file**
|
|
||||||
|
|
||||||
After creating each model tab, query `get_scan_versions()`. If > 1 version, show the combo with entries like `"2026-04-19 14:30 (12 regions, best: 0.95)"`. Connect `currentIndexChanged` to reload that version's results.
|
|
||||||
|
|
||||||
**Step 3: Version switching slot**
|
|
||||||
|
|
||||||
When user selects a different version from the combo:
|
|
||||||
1. Call `db.get_scan_results(filename, profile, scan_timestamp=selected_ts)`
|
|
||||||
2. Repopulate the table with that version's rows
|
|
||||||
3. Update timeline regions
|
|
||||||
|
|
||||||
**Step 4: Test manually, commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git add main.py
|
|
||||||
git commit -m "feat: scan version selector in results panel"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Task 4: Hard negatives — schema and training toggle
|
### Task 4: Hard negatives — schema and training toggle -- DONE
|
||||||
|
|
||||||
**Files:**
|
**Commit:** `edc5784 feat: hard negative source_model tracking, training toggle`
|
||||||
- Modify: `core/db.py:118-130` (hard_negatives schema — add source_model column)
|
|
||||||
- Modify: `core/db.py:548-560` (add_hard_negatives — accept source_model)
|
|
||||||
- Modify: `core/db.py:365-374` (get_training_data — use_hard_negatives parameter)
|
|
||||||
- Modify: `main.py` (TrainDialog — add "Use hard negatives" checkbox)
|
|
||||||
- Modify: `main.py` (_open_train_dialog — pass use_hard_negatives to get_training_data)
|
|
||||||
- Test: `tests/test_db.py`
|
|
||||||
|
|
||||||
**Step 1: Write failing test**
|
- `core/db.py` — added `source_model TEXT NOT NULL DEFAULT ''` column to `hard_negatives` with migration
|
||||||
|
- `core/db.py` — `add_hard_negatives(source_model="")`: stores originating model
|
||||||
```python
|
- `core/db.py` — `get_hard_negatives(profile)`: returns full rows as list of dicts
|
||||||
def test_hard_negatives_source_model():
|
- `core/db.py` — `delete_hard_negatives_by_ids(ids)`: bulk delete by row IDs
|
||||||
"""Hard negatives should store source_model."""
|
- `core/db.py` — `get_training_data(use_hard_negatives=True)`: conditionally skips hard negatives query
|
||||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
- `main.py` — `TrainDialog`: "Use hard negatives" checkbox + "Manage..." button in HBox layout
|
||||||
path = f.name
|
- `main.py` — `_on_scan_negatives()`: passes `source_model=self._scan_panel.current_model_name()`
|
||||||
try:
|
- `tests/test_db.py` — `test_hard_negatives_source_model`, `test_training_data_skips_hard_negatives`, `test_delete_hard_negatives_by_ids`
|
||||||
db = ProcessedDB(path)
|
|
||||||
db.add_hard_negatives("a.mp4", "test", [10.0, 20.0],
|
|
||||||
source_path="/a.mp4", source_model="HUBERT_XLARGE")
|
|
||||||
rows = db.get_hard_negatives("test")
|
|
||||||
assert len(rows) == 2
|
|
||||||
assert all(r["source_model"] == "HUBERT_XLARGE" for r in rows)
|
|
||||||
finally:
|
|
||||||
os.unlink(path)
|
|
||||||
|
|
||||||
def test_training_data_skips_hard_negatives():
|
|
||||||
"""get_training_data with use_hard_negatives=False should skip them."""
|
|
||||||
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
|
|
||||||
path = f.name
|
|
||||||
try:
|
|
||||||
db = ProcessedDB(path)
|
|
||||||
db.add("a.mp4", 10.0, "/out/folder/g/clip.mp4", profile="test",
|
|
||||||
source_path="/videos/a.mp4")
|
|
||||||
db.add_hard_negatives("a.mp4", "test", [500.0], source_path="/videos/a.mp4")
|
|
||||||
# With hard negatives
|
|
||||||
data_with = db.get_training_data("test", "folder", use_hard_negatives=True)
|
|
||||||
# Without hard negatives
|
|
||||||
data_without = db.get_training_data("test", "folder", use_hard_negatives=False)
|
|
||||||
# Both should find the video, but negative counts differ
|
|
||||||
assert len(data_with) >= 1
|
|
||||||
neg_with = sum(len(vi[3]) for vi in data_with)
|
|
||||||
neg_without = sum(len(vi[3]) for vi in data_without)
|
|
||||||
assert neg_with > neg_without or neg_with == neg_without # depends on margin
|
|
||||||
finally:
|
|
||||||
os.unlink(path)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 2: Add source_model column to hard_negatives**
|
|
||||||
|
|
||||||
In CREATE TABLE (line 119-125), add:
|
|
||||||
|
|
||||||
```sql
|
|
||||||
source_model TEXT NOT NULL DEFAULT ''
|
|
||||||
```
|
|
||||||
|
|
||||||
In migration section, add after the hard_negatives table creation:
|
|
||||||
|
|
||||||
```python
|
|
||||||
hn_cols = {
|
|
||||||
row[1]
|
|
||||||
for row in self._con.execute("PRAGMA table_info(hard_negatives)").fetchall()
|
|
||||||
}
|
|
||||||
if "source_model" not in hn_cols:
|
|
||||||
self._con.execute(
|
|
||||||
"ALTER TABLE hard_negatives ADD COLUMN source_model TEXT NOT NULL DEFAULT ''"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 3: Update add_hard_negatives to accept source_model**
|
|
||||||
|
|
||||||
```python
|
|
||||||
def add_hard_negatives(self, filename: str, profile: str,
|
|
||||||
times: list[float], source_path: str = "",
|
|
||||||
source_model: str = "") -> None:
|
|
||||||
if not self._enabled or not times:
|
|
||||||
return
|
|
||||||
with self._lock:
|
|
||||||
for t in times:
|
|
||||||
self._con.execute(
|
|
||||||
"INSERT INTO hard_negatives"
|
|
||||||
" (filename, profile, start_time, source_path, source_model)"
|
|
||||||
" VALUES (?, ?, ?, ?, ?)",
|
|
||||||
(filename, profile, t, source_path, source_model),
|
|
||||||
)
|
|
||||||
self._con.commit()
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 4: Add get_hard_negatives (full rows for management dialog)**
|
|
||||||
|
|
||||||
```python
|
|
||||||
def get_hard_negatives(self, profile: str) -> list[dict]:
|
|
||||||
"""Return all hard negatives for a profile with full details."""
|
|
||||||
if not self._enabled:
|
|
||||||
return []
|
|
||||||
rows = self._con.execute(
|
|
||||||
"SELECT id, filename, start_time, source_path, source_model"
|
|
||||||
" FROM hard_negatives WHERE profile = ?"
|
|
||||||
" ORDER BY filename, start_time",
|
|
||||||
(profile,),
|
|
||||||
).fetchall()
|
|
||||||
return [{"id": r[0], "filename": r[1], "start_time": r[2],
|
|
||||||
"source_path": r[3], "source_model": r[4]} for r in rows]
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 5: Add delete_hard_negatives_by_ids**
|
|
||||||
|
|
||||||
```python
|
|
||||||
def delete_hard_negatives_by_ids(self, ids: list[int]) -> None:
|
|
||||||
"""Delete hard negatives by row IDs."""
|
|
||||||
if not self._enabled or not ids:
|
|
||||||
return
|
|
||||||
with self._lock:
|
|
||||||
self._con.execute(
|
|
||||||
f"DELETE FROM hard_negatives WHERE id IN ({','.join('?' * len(ids))})",
|
|
||||||
ids,
|
|
||||||
)
|
|
||||||
self._con.commit()
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 6: Add use_hard_negatives parameter to get_training_data**
|
|
||||||
|
|
||||||
In `get_training_data()` (line 315), add parameter:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def get_training_data(self, profile: str, positive_folder: str,
|
|
||||||
negative_folder: str = "",
|
|
||||||
fallback_video_dir: str = "",
|
|
||||||
include_scan_exports: bool = False,
|
|
||||||
use_hard_negatives: bool = True,
|
|
||||||
) -> list[tuple[str, list[float], list[float], list[float]]]:
|
|
||||||
```
|
|
||||||
|
|
||||||
Then wrap the hard negatives query (lines 365-374) in a conditional:
|
|
||||||
|
|
||||||
```python
|
|
||||||
if use_hard_negatives:
|
|
||||||
hard_rows = self._con.execute(
|
|
||||||
"SELECT filename, start_time, source_path FROM hard_negatives"
|
|
||||||
" WHERE profile = ?",
|
|
||||||
(profile,),
|
|
||||||
).fetchall()
|
|
||||||
for fn, st, sp in hard_rows:
|
|
||||||
neg_by_video.setdefault(fn, set()).add(st)
|
|
||||||
if sp:
|
|
||||||
source_by_filename.setdefault(fn, sp)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 7: Pass source_model when marking negatives from scan panel**
|
|
||||||
|
|
||||||
In `main.py`, `_on_scan_negatives()` needs to pass the current scan model. The scan panel knows which tab is active:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def _on_scan_negatives(self, times: list) -> None:
|
|
||||||
if not self._file_path:
|
|
||||||
return
|
|
||||||
filename = os.path.basename(self._file_path)
|
|
||||||
# Get current model tab name for source_model
|
|
||||||
source_model = self._scan_panel.current_model_name()
|
|
||||||
self._db.add_hard_negatives(filename, self._profile, times,
|
|
||||||
source_path=self._file_path,
|
|
||||||
source_model=source_model)
|
|
||||||
```
|
|
||||||
|
|
||||||
Add `current_model_name()` to ScanResultsPanel:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def current_model_name(self) -> str:
|
|
||||||
"""Return the model name of the currently active tab."""
|
|
||||||
idx = self._tabs.currentIndex()
|
|
||||||
if idx >= 0:
|
|
||||||
return self._tabs.tabText(idx).split(" (")[0] # strip count suffix
|
|
||||||
return ""
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 8: Add training toggle to TrainDialog**
|
|
||||||
|
|
||||||
After the existing `_chk_scan_exports` checkbox:
|
|
||||||
|
|
||||||
```python
|
|
||||||
self._chk_hard_negatives = QCheckBox("Use hard negatives in training")
|
|
||||||
self._chk_hard_negatives.setChecked(True)
|
|
||||||
self._chk_hard_negatives.setToolTip(
|
|
||||||
"When unchecked, manually marked hard negatives are excluded from training.\n"
|
|
||||||
"Useful when training a new model type where old negatives may not apply.")
|
|
||||||
self._chk_hard_negatives.stateChanged.connect(lambda: self._debounce.start())
|
|
||||||
form.addRow("", self._chk_hard_negatives)
|
|
||||||
```
|
|
||||||
|
|
||||||
Add property:
|
|
||||||
|
|
||||||
```python
|
|
||||||
@property
|
|
||||||
def use_hard_negatives(self) -> bool:
|
|
||||||
return self._chk_hard_negatives.isChecked()
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 9: Wire toggle through _open_train_dialog**
|
|
||||||
|
|
||||||
In `_open_train_dialog()`, pass the flag:
|
|
||||||
|
|
||||||
```python
|
|
||||||
video_infos = self._db.get_training_data(
|
|
||||||
self._profile, pos_folder, negative_folder=neg_folder,
|
|
||||||
fallback_video_dir=video_dir,
|
|
||||||
include_scan_exports=inc_scan,
|
|
||||||
use_hard_negatives=dlg.use_hard_negatives,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Also update `_update_stats()` in TrainDialog to pass it through for accurate counts:
|
|
||||||
|
|
||||||
```python
|
|
||||||
use_neg = self._chk_hard_negatives.isChecked() if hasattr(self, '_chk_hard_negatives') else True
|
|
||||||
video_infos = self._db.get_training_data(
|
|
||||||
self._profile, folder, negative_folder=neg_folder,
|
|
||||||
fallback_video_dir=self._txt_video_dir.text(),
|
|
||||||
include_scan_exports=inc_scan,
|
|
||||||
use_hard_negatives=use_neg,
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 10: Run tests, commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pytest tests/ -v
|
|
||||||
git add core/db.py main.py tests/test_db.py
|
|
||||||
git commit -m "feat: hard negative source_model tracking, training toggle"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Task 5: Hard negatives management dialog
|
### Task 5: Hard negatives management dialog -- DONE
|
||||||
|
|
||||||
**Files:**
|
**Commit:** `e6db83f feat: hard negatives management dialog with filter and bulk delete`
|
||||||
- Modify: `main.py` (add HardNegativesDialog class)
|
|
||||||
- Modify: `main.py` (TrainDialog — add "Manage..." button)
|
|
||||||
|
|
||||||
**Step 1: Create HardNegativesDialog**
|
- `main.py` — `HardNegativesDialog`: table with File/Time/Source Model/hidden ID columns, model filter combo, delete selected, filter-aware clear all, close button
|
||||||
|
- Filter-aware "Clear All": respects active model filter, shows appropriate confirmation message
|
||||||
Place before TrainDialog class:
|
|
||||||
|
|
||||||
```python
|
|
||||||
class HardNegativesDialog(QDialog):
|
|
||||||
"""View and manage hard negative training examples."""
|
|
||||||
|
|
||||||
def __init__(self, db: ProcessedDB, profile: str, parent=None):
|
|
||||||
super().__init__(parent)
|
|
||||||
self.setWindowTitle("Hard Negatives")
|
|
||||||
self.setMinimumSize(600, 400)
|
|
||||||
self._db = db
|
|
||||||
self._profile = profile
|
|
||||||
|
|
||||||
layout = QVBoxLayout(self)
|
|
||||||
|
|
||||||
# Filter row
|
|
||||||
filter_row = QHBoxLayout()
|
|
||||||
filter_row.addWidget(QLabel("Filter model:"))
|
|
||||||
self._cmb_filter = QComboBox()
|
|
||||||
self._cmb_filter.addItem("(all)")
|
|
||||||
self._cmb_filter.currentIndexChanged.connect(self._apply_filter)
|
|
||||||
filter_row.addWidget(self._cmb_filter, 1)
|
|
||||||
layout.addLayout(filter_row)
|
|
||||||
|
|
||||||
# Summary
|
|
||||||
self._lbl_summary = QLabel()
|
|
||||||
layout.addWidget(self._lbl_summary)
|
|
||||||
|
|
||||||
# Table
|
|
||||||
self._table = QTableWidget(0, 4)
|
|
||||||
self._table.setHorizontalHeaderLabels(
|
|
||||||
["File", "Time", "Source Model", "ID"])
|
|
||||||
self._table.horizontalHeader().setSectionResizeMode(
|
|
||||||
0, QHeaderView.ResizeMode.Stretch)
|
|
||||||
self._table.setEditTriggers(QTableWidget.EditTrigger.NoEditTriggers)
|
|
||||||
self._table.setSelectionBehavior(QTableWidget.SelectionBehavior.SelectRows)
|
|
||||||
self._table.setColumnHidden(3, True) # hide ID column
|
|
||||||
layout.addWidget(self._table)
|
|
||||||
|
|
||||||
# Buttons
|
|
||||||
btn_row = QHBoxLayout()
|
|
||||||
btn_delete = QPushButton("Delete Selected")
|
|
||||||
btn_delete.clicked.connect(self._delete_selected)
|
|
||||||
btn_row.addWidget(btn_delete)
|
|
||||||
btn_clear = QPushButton("Clear All")
|
|
||||||
btn_clear.clicked.connect(self._clear_all)
|
|
||||||
btn_row.addWidget(btn_clear)
|
|
||||||
btn_row.addStretch()
|
|
||||||
btn_close = QPushButton("Close")
|
|
||||||
btn_close.clicked.connect(self.close)
|
|
||||||
btn_row.addWidget(btn_close)
|
|
||||||
layout.addLayout(btn_row)
|
|
||||||
|
|
||||||
self._load()
|
|
||||||
|
|
||||||
def _load(self):
|
|
||||||
rows = self._db.get_hard_negatives(self._profile)
|
|
||||||
models = sorted(set(r["source_model"] for r in rows if r["source_model"]))
|
|
||||||
self._cmb_filter.blockSignals(True)
|
|
||||||
self._cmb_filter.clear()
|
|
||||||
self._cmb_filter.addItem("(all)")
|
|
||||||
for m in models:
|
|
||||||
self._cmb_filter.addItem(m)
|
|
||||||
self._cmb_filter.blockSignals(False)
|
|
||||||
|
|
||||||
self._table.setRowCount(len(rows))
|
|
||||||
for i, r in enumerate(rows):
|
|
||||||
self._table.setItem(i, 0, QTableWidgetItem(r["filename"]))
|
|
||||||
self._table.setItem(i, 1, QTableWidgetItem(f'{r["start_time"]:.1f}s'))
|
|
||||||
self._table.setItem(i, 2, QTableWidgetItem(r["source_model"]))
|
|
||||||
item = QTableWidgetItem(str(r["id"]))
|
|
||||||
self._table.setItem(i, 3, item)
|
|
||||||
self._lbl_summary.setText(f"<b>{len(rows)}</b> hard negatives")
|
|
||||||
|
|
||||||
def _apply_filter(self):
|
|
||||||
model = self._cmb_filter.currentText()
|
|
||||||
for row in range(self._table.rowCount()):
|
|
||||||
if model == "(all)":
|
|
||||||
self._table.setRowHidden(row, False)
|
|
||||||
else:
|
|
||||||
src = self._table.item(row, 2).text()
|
|
||||||
self._table.setRowHidden(row, src != model)
|
|
||||||
|
|
||||||
def _delete_selected(self):
|
|
||||||
ids = []
|
|
||||||
for row in sorted(set(i.row() for i in self._table.selectedItems()), reverse=True):
|
|
||||||
if not self._table.isRowHidden(row):
|
|
||||||
ids.append(int(self._table.item(row, 3).text()))
|
|
||||||
if ids:
|
|
||||||
self._db.delete_hard_negatives_by_ids(ids)
|
|
||||||
self._load()
|
|
||||||
|
|
||||||
def _clear_all(self):
|
|
||||||
reply = QMessageBox.question(
|
|
||||||
self, "Clear All",
|
|
||||||
f"Delete all hard negatives for profile '{self._profile}'?",
|
|
||||||
QMessageBox.StandardButton.Yes | QMessageBox.StandardButton.No,
|
|
||||||
)
|
|
||||||
if reply == QMessageBox.StandardButton.Yes:
|
|
||||||
all_rows = self._db.get_hard_negatives(self._profile)
|
|
||||||
self._db.delete_hard_negatives_by_ids([r["id"] for r in all_rows])
|
|
||||||
self._load()
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 2: Add "Manage..." button to TrainDialog**
|
|
||||||
|
|
||||||
After the hard negatives checkbox, add a button:
|
|
||||||
|
|
||||||
```python
|
|
||||||
neg_row = QHBoxLayout()
|
|
||||||
neg_row.addWidget(self._chk_hard_negatives)
|
|
||||||
btn_manage_neg = QPushButton("Manage…")
|
|
||||||
btn_manage_neg.setFixedWidth(80)
|
|
||||||
btn_manage_neg.clicked.connect(self._manage_negatives)
|
|
||||||
neg_row.addWidget(btn_manage_neg)
|
|
||||||
form.addRow("", neg_row) # replaces the standalone checkbox addRow
|
|
||||||
```
|
|
||||||
|
|
||||||
Add handler:
|
|
||||||
|
|
||||||
```python
|
|
||||||
def _manage_negatives(self):
|
|
||||||
dlg = HardNegativesDialog(self._db, self._profile, parent=self)
|
|
||||||
dlg.exec()
|
|
||||||
self._debounce.start() # refresh stats after potential deletions
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 3: Test manually, commit**
|
|
||||||
|
|
||||||
```bash
|
|
||||||
pytest tests/ -v
|
|
||||||
git add main.py
|
|
||||||
git commit -m "feat: hard negatives management dialog with filter and bulk delete"
|
|
||||||
```
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Task 6: Final integration test and push
|
### Task 6: Code review fixes -- DONE
|
||||||
|
|
||||||
**Step 1: Manual test checklist**
|
**Commit:** `5d45b8d fix: timestamp collision, undo stack invalidation, label parsing, filter-aware clear`
|
||||||
|
|
||||||
- [ ] Open Train dialog — verify no ghost folders appear
|
Four issues found during code review:
|
||||||
- [ ] Train with "Use hard negatives" unchecked — verify training works
|
1. **Timestamp collision** — second-precision timestamps could merge versions on sub-second calls. Fixed with microsecond precision `%f`
|
||||||
- [ ] Train with "Use hard negatives" checked — verify negatives are used
|
2. **Undo stack invalidation** — switching scan versions left stale undo entries. Fixed by clearing undo stack in `_on_version_changed()`
|
||||||
- [ ] Open Manage dialog — verify negatives listed with source model
|
3. **Timestamp label fragile parsing** — hard-coded string slicing. Fixed with `datetime.strptime` + try/except fallback
|
||||||
- [ ] Delete selected negatives — verify they're removed
|
4. **Clear All ignoring filter** — deleted all negatives regardless of model filter. Fixed to respect active filter
|
||||||
- [ ] Scan a video — verify results saved with timestamp
|
|
||||||
- [ ] Rescan same video — verify version history appears
|
|
||||||
- [ ] Switch version in scan panel — verify correct results display
|
|
||||||
- [ ] Mark negative from scan results — verify source_model stored
|
|
||||||
|
|
||||||
**Step 2: Push**
|
---
|
||||||
|
|
||||||
```bash
|
### Runtime fixes (discovered during manual testing)
|
||||||
git push
|
|
||||||
```
|
| Commit | Fix |
|
||||||
|
|--------|-----|
|
||||||
|
| `a3c657c` | Install `torchvision` from CUDA wheel index (was pulling CPU build from PyPI) |
|
||||||
|
| `3c3b1d7` | Remove "skip if torch exists" guard in Windows setup so re-runs fix broken envs |
|
||||||
|
| `fd043f4` | Pin `transformers>=4.30,<5.0` — EAT remote model code incompatible with transformers 5.x |
|
||||||
|
| `7d6fee9` | Copy read-only numpy array before `torch.from_numpy()` in EAT preprocessing |
|
||||||
|
| `bd345ab` | Connect `tab_changed` to `_on_scan_regions_edited` so timeline refreshes on tab switch |
|
||||||
|
| `d8b3972` | Add `--extra-index-url` to `pip install -r requirements.txt` in both setup scripts |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Test results
|
||||||
|
|
||||||
|
All 68 tests pass (5 new DB tests + 63 existing).
|
||||||
|
|||||||
@@ -1756,16 +1756,18 @@ class TimelineWidget(QWidget):
|
|||||||
|
|
||||||
def mousePressEvent(self, event):
|
def mousePressEvent(self, event):
|
||||||
x = event.position().x()
|
x = event.position().x()
|
||||||
# Check for scan region edge drag
|
# Check for scan region edge drag — require Shift to avoid accidental resizes
|
||||||
hit = self._hit_scan_edge(x)
|
mods = event.modifiers()
|
||||||
if hit is not None:
|
if mods & Qt.KeyboardModifier.ShiftModifier:
|
||||||
idx, edge = hit
|
hit = self._hit_scan_edge(x)
|
||||||
r = self._scan_regions[idx]
|
if hit is not None:
|
||||||
self._drag_idx = idx
|
idx, edge = hit
|
||||||
self._drag_edge = edge
|
r = self._scan_regions[idx]
|
||||||
self._drag_start_val = r[0]
|
self._drag_idx = idx
|
||||||
self._drag_end_val = r[1]
|
self._drag_edge = edge
|
||||||
return
|
self._drag_start_val = r[0]
|
||||||
|
self._drag_end_val = r[1]
|
||||||
|
return
|
||||||
self._seek(x)
|
self._seek(x)
|
||||||
|
|
||||||
def mouseDoubleClickEvent(self, event):
|
def mouseDoubleClickEvent(self, event):
|
||||||
@@ -1801,9 +1803,9 @@ class TimelineWidget(QWidget):
|
|||||||
self.update()
|
self.update()
|
||||||
return
|
return
|
||||||
|
|
||||||
# Hover cursor: resize arrow near edges, normal otherwise
|
# Hover cursor: resize arrow near edges (only with Shift held)
|
||||||
hit = self._hit_scan_edge(x)
|
mods = event.modifiers()
|
||||||
if hit is not None:
|
if (mods & Qt.KeyboardModifier.ShiftModifier) and self._hit_scan_edge(x):
|
||||||
self.setCursor(Qt.CursorShape.SizeHorCursor)
|
self.setCursor(Qt.CursorShape.SizeHorCursor)
|
||||||
else:
|
else:
|
||||||
self.unsetCursor()
|
self.unsetCursor()
|
||||||
@@ -3224,6 +3226,67 @@ class MainWindow(QMainWindow):
|
|||||||
self._playlist._select(0)
|
self._playlist._select(0)
|
||||||
_log(f"Resumed session: {len(valid)} file(s)")
|
_log(f"Resumed session: {len(valid)} file(s)")
|
||||||
|
|
||||||
|
self._show_changelog()
|
||||||
|
|
||||||
|
# ── Changelog ────────────────────────────────────────────
|
||||||
|
|
||||||
|
APP_VERSION = "1.0"
|
||||||
|
CHANGELOG: list[tuple[str, list[str]]] = [
|
||||||
|
("1.0", [
|
||||||
|
"<b>New export layout</b> — clips are now stored in per-video "
|
||||||
|
"<code>vid_NNN/</code> folders instead of per-clip "
|
||||||
|
"<code>clip_NNN/</code> group dirs. "
|
||||||
|
"Each source video gets its own folder with flat clip files inside "
|
||||||
|
"(e.g. <code>mp4/vid_001/clip_001_0.mp4</code>). "
|
||||||
|
"Old databases are migrated automatically on startup: "
|
||||||
|
"DB paths are rewritten and files are moved to the new layout.",
|
||||||
|
"<b>Counter is now per-video</b> — clip numbering restarts in each "
|
||||||
|
"vid folder, and the DB is cross-checked to prevent overwrites "
|
||||||
|
"even if the export folder is temporarily empty.",
|
||||||
|
"<b>Audio detection models</b> — three new embedding models for "
|
||||||
|
"audio scanning: <b>AST</b> (Audio Spectrogram Transformer), "
|
||||||
|
"<b>EAT</b> (Efficient Audio Transformer), and <b>multi-layer "
|
||||||
|
"HuBERT/Wav2Vec2</b> extraction. Classifier probabilities are now "
|
||||||
|
"calibrated with isotonic regression for more meaningful scores.",
|
||||||
|
"<b>Scan result history</b> — scan results are versioned per "
|
||||||
|
"(file, model); switch between past scan versions from a dropdown.",
|
||||||
|
"<b>Hard negatives</b> — management dialog to review, filter, and "
|
||||||
|
"bulk-delete hard negatives; source model is tracked per negative.",
|
||||||
|
"<b>Scan workflow</b> — disable/resize scan regions, undo edits, "
|
||||||
|
"interruptible Scan All with resume, audio prefetch, review mode.",
|
||||||
|
"<b>Dataset statistics</b> — dialog showing per-video clip breakdown "
|
||||||
|
"and class balance.",
|
||||||
|
"<b>Waveform overlay</b> on timeline.",
|
||||||
|
]),
|
||||||
|
]
|
||||||
|
|
||||||
|
def _show_changelog(self) -> None:
|
||||||
|
last = self._settings.value("last_seen_version", "")
|
||||||
|
if last == self.APP_VERSION:
|
||||||
|
return
|
||||||
|
# Collect entries newer than last seen
|
||||||
|
lines: list[str] = []
|
||||||
|
for ver, items in self.CHANGELOG:
|
||||||
|
if ver == last:
|
||||||
|
break
|
||||||
|
lines.append(f"<h3>v{ver}</h3><ul>")
|
||||||
|
for item in items:
|
||||||
|
lines.append(f"<li>{item}</li>")
|
||||||
|
lines.append("</ul>")
|
||||||
|
if not lines:
|
||||||
|
self._settings.setValue("last_seen_version", self.APP_VERSION)
|
||||||
|
return
|
||||||
|
msg = QMessageBox(self)
|
||||||
|
msg.setWindowTitle("What's new")
|
||||||
|
msg.setIcon(QMessageBox.Icon.Information)
|
||||||
|
msg.setTextFormat(Qt.TextFormat.RichText)
|
||||||
|
msg.setText("".join(lines))
|
||||||
|
cb = QCheckBox("Don't show again for this version")
|
||||||
|
msg.setCheckBox(cb)
|
||||||
|
msg.exec()
|
||||||
|
if cb.isChecked():
|
||||||
|
self._settings.setValue("last_seen_version", self.APP_VERSION)
|
||||||
|
|
||||||
def _show_shortcuts(self) -> None:
|
def _show_shortcuts(self) -> None:
|
||||||
text = (
|
text = (
|
||||||
"<table cellpadding='4' style='font-size:13px'>"
|
"<table cellpadding='4' style='font-size:13px'>"
|
||||||
@@ -3248,7 +3311,7 @@ class MainWindow(QMainWindow):
|
|||||||
"<tr><td><b>Double-click marker</b></td><td>Enter overwrite mode (locked: jump to end of clip span)</td></tr>"
|
"<tr><td><b>Double-click marker</b></td><td>Enter overwrite mode (locked: jump to end of clip span)</td></tr>"
|
||||||
"<tr><td><b>Right-click marker</b></td><td>Delete clip group</td></tr>"
|
"<tr><td><b>Right-click marker</b></td><td>Delete clip group</td></tr>"
|
||||||
"<tr><td><b>Click video / crop bar</b></td><td>Reposition portrait crop</td></tr>"
|
"<tr><td><b>Click video / crop bar</b></td><td>Reposition portrait crop</td></tr>"
|
||||||
"<tr><td><b>Drag scan region edge</b></td><td>Resize scan region</td></tr>"
|
"<tr><td><b>Shift+drag scan region edge</b></td><td>Resize scan region</td></tr>"
|
||||||
"</table>"
|
"</table>"
|
||||||
)
|
)
|
||||||
QMessageBox.information(self, "Keyboard shortcuts", text)
|
QMessageBox.information(self, "Keyboard shortcuts", text)
|
||||||
|
|||||||
+23
-23
@@ -5,21 +5,21 @@ from main import ProcessedDB
|
|||||||
|
|
||||||
|
|
||||||
def test_build_export_path_first():
|
def test_build_export_path_first():
|
||||||
assert build_export_path("/out", "clip", 1) == "/out/clip_001/clip_001.mp4"
|
assert build_export_path("/out", "clip", 1) == "/out/clip_001.mp4"
|
||||||
|
|
||||||
def test_build_export_path_counter():
|
def test_build_export_path_counter():
|
||||||
assert build_export_path("/out", "clip", 42) == "/out/clip_042/clip_042.mp4"
|
assert build_export_path("/out", "clip", 42) == "/out/clip_042.mp4"
|
||||||
|
|
||||||
def test_build_export_path_deep_counter():
|
def test_build_export_path_deep_counter():
|
||||||
assert build_export_path("/out", "shot", 999) == "/out/shot_999/shot_999.mp4"
|
assert build_export_path("/out", "shot", 999) == "/out/shot_999.mp4"
|
||||||
|
|
||||||
def test_build_export_path_sub():
|
def test_build_export_path_sub():
|
||||||
assert build_export_path("/out", "clip", 1, sub=0) == "/out/clip_001/clip_001_0.mp4"
|
assert build_export_path("/out", "clip", 1, sub=0) == "/out/clip_001_0.mp4"
|
||||||
assert build_export_path("/out", "clip", 1, sub=2) == "/out/clip_001/clip_001_2.mp4"
|
assert build_export_path("/out", "clip", 1, sub=2) == "/out/clip_001_2.mp4"
|
||||||
|
|
||||||
def test_build_sequence_dir_sub():
|
def test_build_sequence_dir_sub():
|
||||||
assert build_sequence_dir("/out", "clip", 1, sub=0) == "/out/clip_001/clip_001_0"
|
assert build_sequence_dir("/out", "clip", 1, sub=0) == "/out/clip_001_0"
|
||||||
assert build_sequence_dir("/out", "clip", 1, sub=1) == "/out/clip_001/clip_001_1"
|
assert build_sequence_dir("/out", "clip", 1, sub=1) == "/out/clip_001_1"
|
||||||
|
|
||||||
def test_format_time_seconds():
|
def test_format_time_seconds():
|
||||||
assert format_time(0.0) == "0:00.0"
|
assert format_time(0.0) == "0:00.0"
|
||||||
@@ -178,10 +178,10 @@ def test_audio_extract_timing():
|
|||||||
|
|
||||||
|
|
||||||
def test_build_sequence_dir_basic():
|
def test_build_sequence_dir_basic():
|
||||||
assert build_sequence_dir("/out", "clip", 1) == "/out/clip_001/clip_001"
|
assert build_sequence_dir("/out", "clip", 1) == "/out/clip_001"
|
||||||
|
|
||||||
def test_build_sequence_dir_counter():
|
def test_build_sequence_dir_counter():
|
||||||
assert build_sequence_dir("/out", "clip", 42) == "/out/clip_042/clip_042"
|
assert build_sequence_dir("/out", "clip", 42) == "/out/clip_042"
|
||||||
|
|
||||||
def test_ffmpeg_command_image_sequence():
|
def test_ffmpeg_command_image_sequence():
|
||||||
cmd = build_ffmpeg_command("/in/v.mp4", 0.0, "/out/seq_001", image_sequence=True)
|
cmd = build_ffmpeg_command("/in/v.mp4", 0.0, "/out/seq_001", image_sequence=True)
|
||||||
@@ -265,13 +265,13 @@ def test_db_get_group_returns_all_sub_clips():
|
|||||||
path = f.name
|
path = f.name
|
||||||
try:
|
try:
|
||||||
db = ProcessedDB(path)
|
db = ProcessedDB(path)
|
||||||
db.add("video.mp4", 10.0, "/out/clip_001/clip_001_0.mp4")
|
db.add("video.mp4", 10.0, "/out/vid_001/clip_001_0.mp4")
|
||||||
db.add("video.mp4", 10.0, "/out/clip_001/clip_001_1.mp4")
|
db.add("video.mp4", 10.0, "/out/vid_001/clip_001_1.mp4")
|
||||||
db.add("video.mp4", 10.0, "/out/clip_001/clip_001_2.mp4")
|
db.add("video.mp4", 10.0, "/out/vid_001/clip_001_2.mp4")
|
||||||
group = db.get_group("/out/clip_001/clip_001_0.mp4")
|
group = db.get_group("/out/vid_001/clip_001_0.mp4")
|
||||||
assert len(group) == 3
|
assert len(group) == 3
|
||||||
assert "/out/clip_001/clip_001_0.mp4" in group
|
assert "/out/vid_001/clip_001_0.mp4" in group
|
||||||
assert "/out/clip_001/clip_001_2.mp4" in group
|
assert "/out/vid_001/clip_001_2.mp4" in group
|
||||||
finally:
|
finally:
|
||||||
os.unlink(path)
|
os.unlink(path)
|
||||||
|
|
||||||
@@ -281,10 +281,10 @@ def test_db_get_group_isolates_by_start_time():
|
|||||||
path = f.name
|
path = f.name
|
||||||
try:
|
try:
|
||||||
db = ProcessedDB(path)
|
db = ProcessedDB(path)
|
||||||
db.add("video.mp4", 10.0, "/out/clip_001/clip_001_0.mp4")
|
db.add("video.mp4", 10.0, "/out/vid_001/clip_001_0.mp4")
|
||||||
db.add("video.mp4", 10.0, "/out/clip_001/clip_001_1.mp4")
|
db.add("video.mp4", 10.0, "/out/vid_001/clip_001_1.mp4")
|
||||||
db.add("video.mp4", 30.0, "/out/clip_002/clip_002_0.mp4")
|
db.add("video.mp4", 30.0, "/out/vid_001/clip_002_0.mp4")
|
||||||
group = db.get_group("/out/clip_001/clip_001_0.mp4")
|
group = db.get_group("/out/vid_001/clip_001_0.mp4")
|
||||||
assert len(group) == 2
|
assert len(group) == 2
|
||||||
finally:
|
finally:
|
||||||
os.unlink(path)
|
os.unlink(path)
|
||||||
@@ -295,10 +295,10 @@ def test_db_delete_group_removes_all():
|
|||||||
path = f.name
|
path = f.name
|
||||||
try:
|
try:
|
||||||
db = ProcessedDB(path)
|
db = ProcessedDB(path)
|
||||||
db.add("video.mp4", 10.0, "/out/clip_001/clip_001_0.mp4")
|
db.add("video.mp4", 10.0, "/out/vid_001/clip_001_0.mp4")
|
||||||
db.add("video.mp4", 10.0, "/out/clip_001/clip_001_1.mp4")
|
db.add("video.mp4", 10.0, "/out/vid_001/clip_001_1.mp4")
|
||||||
db.add("video.mp4", 30.0, "/out/clip_002/clip_002_0.mp4")
|
db.add("video.mp4", 30.0, "/out/vid_001/clip_002_0.mp4")
|
||||||
deleted = db.delete_group("/out/clip_001/clip_001_0.mp4")
|
deleted = db.delete_group("/out/vid_001/clip_001_0.mp4")
|
||||||
assert len(deleted) == 2
|
assert len(deleted) == 2
|
||||||
# clip_002 should still exist
|
# clip_002 should still exist
|
||||||
markers = db.get_markers("video.mp4")
|
markers = db.get_markers("video.mp4")
|
||||||
|
|||||||
Reference in New Issue
Block a user