feat: disable/resize scan regions, undo, training fixes, cross-platform cleanup

- Scan regions can be disabled (Del/Backspace) instead of deleted, shown greyed out
- Resize scan regions by dragging timeline edges or editing table cells
- Grey ghost overlay shows trimmed portions of resized regions
- Ctrl+Z undo for disable, resize, drag, and negative toggle actions
- Fix training stats including scan-exported clips when checkbox unchecked
- Switch classifier to HistGradientBoostingClassifier (multi-threaded)
- Timestamped model saves with latest copy at base path
- Fix next-folder counter not detecting scan export folders
- Each scan area exports to its own numbered clip folder
- Platform-aware HW encoder detection (Linux/Windows/macOS)
- Auto-detect VAAPI render device instead of hardcoding
- Use shutil.move for cross-drive safety on Windows
- Comprehensive README rewrite with scan workflow documentation

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-18 20:34:56 +02:00
parent b161412d94
commit 6ddfcde8ee
5 changed files with 826 additions and 139 deletions
+16 -15
View File
@@ -322,7 +322,7 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
dict with 'classifier', 'embed_model', and metadata, or None on failure.
"""
from concurrent.futures import ThreadPoolExecutor, as_completed
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
def _progress(msg: str) -> None:
_log(msg)
@@ -411,8 +411,8 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
rng.shuffle(train_idx)
_progress(f"Fitting classifier on {len(train_idx)} samples...")
clf = GradientBoostingClassifier(
n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42,
clf = HistGradientBoostingClassifier(
max_iter=200, max_depth=5, learning_rate=0.1, random_state=42,
)
clf.fit(X[train_idx], y_arr[train_idx])
_log("audio_scan: classifier trained")
@@ -422,19 +422,20 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
if model_path:
import joblib
from datetime import datetime
parent = os.path.dirname(model_path)
if parent:
os.makedirs(parent, exist_ok=True)
# Version backup: keep previous model before overwriting
if os.path.exists(model_path):
from datetime import datetime
stem, ext = os.path.splitext(model_path)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
backup = f"{stem}_{ts}{ext}"
os.rename(model_path, backup)
_log(f"audio_scan: previous model backed up to {os.path.basename(backup)}")
joblib.dump(model, model_path)
_log(f"audio_scan: model saved to {model_path}")
# Save with timestamp in name; keep a symlink/copy as the "latest"
stem, ext = os.path.splitext(model_path)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
versioned = f"{stem}_{ts}{ext}"
joblib.dump(model, versioned)
_log(f"audio_scan: model saved to {versioned}")
# Update the base path to point to latest version (copy)
import shutil
shutil.copy2(versioned, model_path)
_log(f"audio_scan: latest model updated: {model_path}")
return model
@@ -488,6 +489,7 @@ def list_model_versions(profile_name: str = "default",
def restore_model_version(version_path: str, profile_name: str = "default",
embed_model: str | None = None) -> None:
"""Restore a backup version as the active model."""
import shutil
from datetime import datetime
current = default_model_path(profile_name, embed_model)
if version_path == current:
@@ -496,8 +498,7 @@ def restore_model_version(version_path: str, profile_name: str = "default",
if os.path.exists(current):
stem, ext = os.path.splitext(current)
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
os.rename(current, f"{stem}_{ts}{ext}")
import shutil
shutil.move(current, f"{stem}_{ts}{ext}")
shutil.copy2(version_path, current)
_log(f"audio_scan: restored {os.path.basename(version_path)} as active model")
+88 -23
View File
@@ -84,15 +84,32 @@ class ProcessedDB:
)
self._con.execute(
"CREATE TABLE IF NOT EXISTS scan_results ("
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
" filename TEXT NOT NULL,"
" profile TEXT NOT NULL DEFAULT 'default',"
" model TEXT NOT NULL,"
" start_time REAL NOT NULL,"
" end_time REAL NOT NULL,"
" score REAL NOT NULL"
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
" filename TEXT NOT NULL,"
" profile TEXT NOT NULL DEFAULT 'default',"
" model TEXT NOT NULL,"
" start_time REAL NOT NULL,"
" end_time REAL NOT NULL,"
" score REAL NOT NULL,"
" disabled INTEGER NOT NULL DEFAULT 0,"
" orig_start_time REAL,"
" orig_end_time REAL"
")"
)
# Migrate: add new columns to existing scan_results tables
sr_cols = {
row[1]
for row in self._con.execute("PRAGMA table_info(scan_results)").fetchall()
}
for col, typedef in [
("disabled", "INTEGER NOT NULL DEFAULT 0"),
("orig_start_time", "REAL"),
("orig_end_time", "REAL"),
]:
if col not in sr_cols:
self._con.execute(
f"ALTER TABLE scan_results ADD COLUMN {col} {typedef}"
)
self._con.execute(
"CREATE INDEX IF NOT EXISTS idx_scan_file_profile_model"
" ON scan_results(filename, profile, model)"
@@ -238,11 +255,22 @@ class ProcessedDB:
def get_markers(self, filename: str, profile: str = "default") -> list[tuple[float, int, str]]:
"""Return [(start_time, marker_number, output_path), ...] for exact
filename match, sorted by start_time. Empty list if no match."""
filename match, sorted by start_time. Empty list if no match.
Excludes scan exports (shown via scan panel instead)."""
if not self._enabled:
return []
return self._get_markers_for(filename, profile)
def get_clip_count(self, filename: str, profile: str = "default") -> int:
"""Return total number of exported clips (including scan exports)."""
if not self._enabled:
return 0
row = self._con.execute(
"SELECT COUNT(*) FROM processed WHERE filename = ? AND profile = ?",
(filename, profile),
).fetchone()
return row[0] if row else 0
def get_profiles(self) -> list[str]:
"""Return distinct profile names, ordered alphabetically."""
if not self._enabled:
@@ -378,7 +406,8 @@ class ProcessedDB:
result.append((sp, gt_pos, gt_soft, gt_neg))
return result
def get_training_stats(self, profile: str) -> dict[str, dict]:
def get_training_stats(self, profile: str,
include_scan_exports: bool = False) -> dict[str, dict]:
"""Return per-subprofile stats for training readiness display.
Returns dict mapping subprofile_name → {
@@ -388,10 +417,17 @@ class ProcessedDB:
"""
if not self._enabled:
return {}
rows = self._con.execute(
"SELECT filename, output_path FROM processed WHERE profile = ?",
(profile,),
).fetchall()
if include_scan_exports:
rows = self._con.execute(
"SELECT filename, output_path FROM processed WHERE profile = ?",
(profile,),
).fetchall()
else:
rows = self._con.execute(
"SELECT filename, output_path FROM processed"
" WHERE profile = ? AND scan_export = 0",
(profile,),
).fetchall()
folders = self.get_export_folders(profile)
stats: dict[str, dict] = {}
for folder_name in folders:
@@ -423,30 +459,36 @@ class ProcessedDB:
)
self._con.executemany(
"INSERT INTO scan_results"
" (filename, profile, model, start_time, end_time, score)"
" VALUES (?, ?, ?, ?, ?, ?)",
[(filename, profile, model, s, e, sc) for s, e, sc in regions],
" (filename, profile, model, start_time, end_time, score,"
" orig_start_time, orig_end_time)"
" VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
[(filename, profile, model, s, e, sc, s, e) for s, e, sc in regions],
)
self._con.commit()
def get_scan_results(self, filename: str, profile: str
) -> dict[str, list[tuple[int, float, float, float]]]:
) -> dict[str, list[tuple[int, float, float, float, bool, float, float]]]:
"""Return scan results grouped by model.
Returns {model: [(row_id, start_time, end_time, score), ...]} sorted by
start_time.
Returns {model: [(row_id, start, end, score, disabled, orig_start, orig_end), ...]}
sorted by start_time.
"""
if not self._enabled:
return {}
rows = self._con.execute(
"SELECT id, model, start_time, end_time, score FROM scan_results"
"SELECT id, model, start_time, end_time, score, disabled,"
" orig_start_time, orig_end_time"
" FROM scan_results"
" WHERE filename = ? AND profile = ?"
" ORDER BY model, start_time",
(filename, profile),
).fetchall()
result: dict[str, list[tuple[int, float, float, float]]] = {}
for row_id, model, s, e, sc in rows:
result.setdefault(model, []).append((row_id, s, e, sc))
result: dict[str, list[tuple[int, float, float, float, bool, float, float]]] = {}
for row_id, model, s, e, sc, dis, os_, oe in rows:
# Fall back to current bounds for legacy rows without orig
result.setdefault(model, []).append(
(row_id, s, e, sc, bool(dis), os_ if os_ is not None else s,
oe if oe is not None else e))
return result
def delete_scan_result(self, row_id: int) -> None:
@@ -457,6 +499,29 @@ class ProcessedDB:
self._con.execute("DELETE FROM scan_results WHERE id = ?", (row_id,))
self._con.commit()
def toggle_scan_result_disabled(self, row_id: int, disabled: bool) -> None:
"""Set disabled flag on a scan result row."""
if not self._enabled:
return
with self._lock:
self._con.execute(
"UPDATE scan_results SET disabled = ? WHERE id = ?",
(1 if disabled else 0, row_id),
)
self._con.commit()
def update_scan_result_times(self, row_id: int,
start: float, end: float) -> None:
"""Update start/end times of a scan result row (resize)."""
if not self._enabled:
return
with self._lock:
self._con.execute(
"UPDATE scan_results SET start_time = ?, end_time = ? WHERE id = ?",
(start, end, row_id),
)
self._con.commit()
def get_scan_models(self, filename: str, profile: str) -> list[str]:
"""Return model names that have scan results for this file."""
if not self._enabled:
+27 -9
View File
@@ -1,6 +1,7 @@
import os
import re
import subprocess
import sys
from .paths import _bin, _log
@@ -63,6 +64,13 @@ def apply_keyframes_to_jobs(
return result
def _find_vaapi_device() -> str:
"""Return the first available VAAPI render device path (Linux)."""
import glob
devices = sorted(glob.glob("/dev/dri/renderD*"))
return devices[0] if devices else "/dev/dri/renderD128"
def build_ffmpeg_command(
input_path: str, start: float, output_path: str,
short_side: int | None = None,
@@ -74,13 +82,15 @@ def build_ffmpeg_command(
# -ss before -i: fast input-seeking. Safe here because we always re-encode,
# so there is no keyframe-alignment issue from pre-input seek.
# Image sequences always use libwebp, so skip HW encoder setup.
use_hw_vaapi = encoder == "h264_vaapi" and not image_sequence
use_hw_vaapi = (encoder == "h264_vaapi" and not image_sequence
and sys.platform == "linux")
cmd = [_bin("ffmpeg"), "-y"]
# VAAPI needs a device for hardware context.
# VAAPI needs a render device for hardware context (Linux only).
if use_hw_vaapi:
vaapi_dev = _find_vaapi_device()
cmd += ["-hwaccel", "vaapi", "-hwaccel_output_format", "vaapi",
"-vaapi_device", "/dev/dri/renderD128"]
"-vaapi_device", vaapi_dev]
cmd += [
"-threads", "0",
@@ -137,8 +147,19 @@ def build_audio_extract_command(input_path: str, start: float, sequence_dir: str
def detect_hw_encoders() -> list[str]:
"""Probe ffmpeg for available H.264 hardware encoders."""
_HW_ENCODERS = ["h264_nvenc", "h264_vaapi", "h264_qsv", "h264_amf", "h264_videotoolbox"]
"""Probe ffmpeg for available H.264 hardware encoders.
Returns only encoders relevant to the current platform:
- Windows: h264_nvenc, h264_qsv, h264_amf
- Linux: h264_nvenc, h264_vaapi, h264_qsv
- macOS: h264_videotoolbox
"""
if sys.platform == "win32":
candidates = ["h264_nvenc", "h264_qsv", "h264_amf"]
elif sys.platform == "darwin":
candidates = ["h264_videotoolbox"]
else:
candidates = ["h264_nvenc", "h264_vaapi", "h264_qsv"]
try:
result = subprocess.run(
[_bin("ffmpeg"), "-hide_banner", "-encoders"],
@@ -149,10 +170,7 @@ def detect_hw_encoders() -> list[str]:
output = result.stdout
except Exception:
return []
available = []
for enc in _HW_ENCODERS:
if re.search(rf'\b{enc}\b', output):
available.append(enc)
available = [enc for enc in candidates if re.search(rf'\b{enc}\b', output)]
if available:
_log(f"HW encoders detected: {', '.join(available)}")
else: