feat: scan results panel, model switching, batch scan, and training improvements
- Replace librosa with direct ffmpeg subprocess for 10x faster audio loading - Add ScanResultsPanel with per-model tabs, seek-on-click, delete, and export - Persist scan results in DB per (filename, profile, model) - Add model selector dropdown to switch between trained embedding models - Add "Scan All" button for batch scanning playlist videos - Support manual negative examples via negative class folder - Configurable auto-negative margin (default 30s, 0 = disabled) - Deduplicate nearby training markers (8s min gap) - Parallel audio loading with ThreadPoolExecutor during training - Progress callbacks from training for UI status updates - Cache bypass in scan_video (skip audio loading when embeddings cached) - Move all caches (models, embeddings, downloads) into project directory - Add 8cut.sh launcher script with auto venv/conda detection - Fix 11 bugs across thread safety, signal handling, and state management Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+168
-30
@@ -2,15 +2,39 @@
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import subprocess
|
||||
import warnings
|
||||
import numpy as np
|
||||
import librosa
|
||||
|
||||
from .paths import _log
|
||||
from .paths import _bin, _log
|
||||
|
||||
_SR = 16000 # lower sr = faster
|
||||
|
||||
|
||||
def _load_audio_ffmpeg(path: str, sr: int = _SR) -> np.ndarray:
|
||||
"""Load audio from any file as mono float32 numpy array using ffmpeg directly."""
|
||||
cmd = [
|
||||
_bin("ffmpeg"), "-i", path,
|
||||
"-vn", # skip video
|
||||
"-ac", "1", # mono
|
||||
"-ar", str(sr), # resample
|
||||
"-f", "f32le", # raw 32-bit float little-endian
|
||||
"-loglevel", "error",
|
||||
"pipe:1",
|
||||
]
|
||||
proc = subprocess.run(cmd, capture_output=True, timeout=300)
|
||||
if proc.returncode != 0:
|
||||
raise RuntimeError(f"ffmpeg failed: {proc.stderr.decode().strip()}")
|
||||
return np.frombuffer(proc.stdout, dtype=np.float32)
|
||||
_WINDOW = 8.0 # seconds
|
||||
_MODEL_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "models")
|
||||
_W2V_CACHE_DIR = os.path.join(os.path.expanduser("~"), ".8cut_cache", "w2v")
|
||||
_PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
_MODEL_DIR = os.path.join(_PROJECT_DIR, "models")
|
||||
_W2V_CACHE_DIR = os.path.join(_PROJECT_DIR, "cache", "w2v")
|
||||
_DL_CACHE_DIR = os.path.join(_PROJECT_DIR, "cache", "downloads")
|
||||
|
||||
# Redirect torch hub and huggingface downloads into the project
|
||||
os.environ.setdefault("TORCH_HOME", _DL_CACHE_DIR)
|
||||
os.environ.setdefault("HF_HOME", os.path.join(_DL_CACHE_DIR, "huggingface"))
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Embedding extraction (lazy-loaded)
|
||||
@@ -33,7 +57,7 @@ _EMBED_MODELS = {
|
||||
_DEFAULT_EMBED_MODEL = "WAV2VEC2_BASE"
|
||||
|
||||
_BEATS_CHECKPOINT = os.path.join(
|
||||
os.path.expanduser("~"), ".cache", "huggingface", "hub",
|
||||
_DL_CACHE_DIR, "huggingface", "hub",
|
||||
"models--lpepino--beats_ckpts", "snapshots",
|
||||
"5b53b0404df452a3a607d7e67687227730e5bad1", "BEATs_iter3_plus_AS2M.pt",
|
||||
)
|
||||
@@ -86,6 +110,30 @@ def _w2v_cache_path(video_path: str, hop: float, window: float,
|
||||
return os.path.join(_W2V_CACHE_DIR, f"{h}.npz")
|
||||
|
||||
|
||||
def _w2v_cache_exists(video_path: str, hop: float, window: float,
|
||||
model_name: str | None = None) -> bool:
|
||||
"""Check if embedding cache exists for a video."""
|
||||
try:
|
||||
path = _w2v_cache_path(video_path, hop, window, model_name)
|
||||
return os.path.exists(path)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def _w2v_cache_load(video_path: str, hop: float, window: float,
|
||||
model_name: str | None = None) -> tuple[np.ndarray, np.ndarray] | None:
|
||||
"""Load embeddings from cache. Returns (timestamps, embeddings) or None."""
|
||||
try:
|
||||
path = _w2v_cache_path(video_path, hop, window, model_name)
|
||||
if os.path.exists(path):
|
||||
data = np.load(path)
|
||||
_log(f"audio_scan: cache hit ({path})")
|
||||
return data["timestamps"], data["embeddings"]
|
||||
except Exception as e:
|
||||
_log(f"audio_scan: cache read failed: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def _extract_w2v_windows(y: np.ndarray, sr: int = _SR,
|
||||
hop: float = 1.0, window: float = _WINDOW,
|
||||
video_path: str | None = None,
|
||||
@@ -162,6 +210,7 @@ def _extract_w2v_targeted(y: np.ndarray, sr: int, gt_intense: list[float],
|
||||
gt_soft: list[float], tolerance: float = 12.0,
|
||||
neg_margin: float = 120.0,
|
||||
model_name: str | None = None,
|
||||
gt_negative: list[float] | None = None,
|
||||
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
||||
"""Extract embeddings only near positives and distant negatives.
|
||||
|
||||
@@ -180,13 +229,24 @@ def _extract_w2v_targeted(y: np.ndarray, sr: int, gt_intense: list[float],
|
||||
if 0 <= t <= duration - _WINDOW:
|
||||
pos_times.add(int(t))
|
||||
|
||||
# Negative windows: every 4s, far from any marker
|
||||
# Manual negative windows: near explicit negative markers
|
||||
manual_neg_times = set()
|
||||
if gt_negative:
|
||||
for gt in gt_negative:
|
||||
for offset in range(-int(tolerance), int(tolerance) + 1):
|
||||
t = gt + offset
|
||||
if 0 <= t <= duration - _WINDOW:
|
||||
manual_neg_times.add(int(t))
|
||||
# Don't let manual negatives overlap with positives
|
||||
manual_neg_times -= pos_times
|
||||
|
||||
# Auto negative windows: every 4s, far from any marker (skip if margin <= 0)
|
||||
neg_times = set()
|
||||
for t in range(0, int(duration - _WINDOW), 4):
|
||||
if min((abs(t - g) for g in all_gt), default=9999) > neg_margin:
|
||||
if neg_margin > 0 and min((abs(t - g) for g in all_gt), default=9999) > neg_margin:
|
||||
neg_times.add(t)
|
||||
|
||||
all_times = sorted(pos_times | neg_times)
|
||||
all_times = sorted(pos_times | neg_times | manual_neg_times)
|
||||
# Filter out windows that go past the end
|
||||
valid_times = [t for t in all_times if int(t * sr) + win_samples <= len(y)]
|
||||
|
||||
@@ -225,9 +285,10 @@ def _extract_w2v_targeted(y: np.ndarray, sr: int, gt_intense: list[float],
|
||||
for i, t in enumerate(timestamps):
|
||||
di = min((abs(t - g) for g in gt_intense), default=9999)
|
||||
da = min((abs(t - g) for g in all_gt), default=9999)
|
||||
dm = min((abs(t - g) for g in (gt_negative or [])), default=9999)
|
||||
if di < tolerance:
|
||||
labels[i] = 1
|
||||
elif da > neg_margin:
|
||||
elif dm < tolerance or (neg_margin > 0 and da > neg_margin):
|
||||
labels[i] = -1
|
||||
return timestamps, embeddings, labels
|
||||
|
||||
@@ -241,7 +302,9 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
|
||||
tolerance: float = 12.0,
|
||||
neg_margin: float = 120.0,
|
||||
embed_model: str | None = None,
|
||||
cancel_flag: object = None) -> dict:
|
||||
cancel_flag: object = None,
|
||||
n_workers: int = 4,
|
||||
progress_cb: object = None) -> dict:
|
||||
"""Train a classifier from labeled videos.
|
||||
|
||||
Args:
|
||||
@@ -250,24 +313,62 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
|
||||
tolerance/neg_margin: labeling parameters
|
||||
embed_model: embedding model name (e.g. "HUBERT_BASE", "BEATS"), defaults to WAV2VEC2_BASE
|
||||
cancel_flag: object with _cancel attribute; if set, training aborts early
|
||||
n_workers: number of threads for parallel audio loading
|
||||
|
||||
Returns:
|
||||
dict with 'classifier', 'embed_model', and metadata, or None on failure.
|
||||
"""
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from sklearn.ensemble import GradientBoostingClassifier
|
||||
|
||||
all_X, all_y = [], []
|
||||
def _progress(msg: str) -> None:
|
||||
_log(msg)
|
||||
if progress_cb:
|
||||
progress_cb(msg)
|
||||
|
||||
for vi, (vpath, gt_intense, gt_soft) in enumerate(video_infos):
|
||||
def _load_audio(path: str) -> np.ndarray:
|
||||
return _load_audio_ffmpeg(path, sr=_SR)
|
||||
|
||||
# Phase 1: load all audio in parallel (cap workers — disk I/O bound)
|
||||
n = len(video_infos)
|
||||
load_workers = min(n_workers, 4)
|
||||
_progress(f"Loading audio: 0/{n} videos ({load_workers} workers)...")
|
||||
audio_data: dict[int, np.ndarray] = {}
|
||||
with ThreadPoolExecutor(max_workers=load_workers) as pool:
|
||||
future_to_idx = {
|
||||
pool.submit(_load_audio, vi[0]): i
|
||||
for i, vi in enumerate(video_infos)
|
||||
}
|
||||
failed = set()
|
||||
for future in as_completed(future_to_idx):
|
||||
if cancel_flag and getattr(cancel_flag, '_cancel', False):
|
||||
_log("audio_scan: training cancelled")
|
||||
return None
|
||||
idx = future_to_idx[future]
|
||||
try:
|
||||
audio_data[idx] = future.result()
|
||||
except Exception as e:
|
||||
_log(f"audio_scan: failed to load {os.path.basename(video_infos[idx][0])}: {e}")
|
||||
failed.add(idx)
|
||||
_progress(f"Loading audio: {len(audio_data) + len(failed)}/{n}")
|
||||
|
||||
# Phase 2: extract embeddings sequentially on GPU
|
||||
_progress(f"Extracting embeddings: 0/{n}")
|
||||
all_X, all_y = [], []
|
||||
for vi, vinfo in enumerate(video_infos):
|
||||
if vi in failed:
|
||||
continue
|
||||
vpath, gt_intense, gt_soft = vinfo[0], vinfo[1], vinfo[2]
|
||||
gt_negative = vinfo[3] if len(vinfo) > 3 else []
|
||||
if cancel_flag and getattr(cancel_flag, '_cancel', False):
|
||||
_log("audio_scan: training cancelled")
|
||||
return None
|
||||
_log(f"audio_scan: training [{vi+1}/{len(video_infos)}] {os.path.basename(vpath)}")
|
||||
y, _ = librosa.load(vpath, sr=_SR, mono=True)
|
||||
_progress(f"Extracting embeddings: {vi+1}/{n}")
|
||||
y = audio_data.pop(vi)
|
||||
|
||||
timestamps, embeddings, labels = _extract_w2v_targeted(
|
||||
y, _SR, gt_intense, gt_soft, tolerance, neg_margin,
|
||||
model_name=embed_model,
|
||||
model_name=embed_model, gt_negative=gt_negative,
|
||||
)
|
||||
if len(timestamps) == 0:
|
||||
continue
|
||||
@@ -306,6 +407,7 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
|
||||
train_idx = np.concatenate([pos_idx, neg_sample])
|
||||
rng.shuffle(train_idx)
|
||||
|
||||
_progress(f"Fitting classifier on {len(train_idx)} samples...")
|
||||
clf = GradientBoostingClassifier(
|
||||
n_estimators=200, max_depth=5, learning_rate=0.1, random_state=42,
|
||||
)
|
||||
@@ -334,11 +436,41 @@ def load_classifier(model_path: str) -> dict | None:
|
||||
return joblib.load(model_path)
|
||||
|
||||
|
||||
def default_model_path(profile_name: str = "default") -> str:
|
||||
"""Return the default path for a profile's classifier model."""
|
||||
def default_model_path(profile_name: str = "default",
|
||||
embed_model: str | None = None) -> str:
|
||||
"""Return the path for a profile's classifier model.
|
||||
|
||||
When embed_model is given the file is ``{profile}_{model}.joblib``,
|
||||
otherwise ``{profile}.joblib`` (legacy single-model layout).
|
||||
"""
|
||||
if embed_model:
|
||||
return os.path.join(_MODEL_DIR, f"{profile_name}_{embed_model}.joblib")
|
||||
return os.path.join(_MODEL_DIR, f"{profile_name}.joblib")
|
||||
|
||||
|
||||
def list_trained_models(profile_name: str = "default") -> list[str]:
|
||||
"""Return embedding model names that have a trained .joblib for *profile_name*.
|
||||
|
||||
Looks for files matching ``{profile}_{MODEL}.joblib`` in the models dir.
|
||||
"""
|
||||
prefix = f"{profile_name}_"
|
||||
suffix = ".joblib"
|
||||
result = []
|
||||
if not os.path.isdir(_MODEL_DIR):
|
||||
return result
|
||||
for fname in os.listdir(_MODEL_DIR):
|
||||
if fname.startswith(prefix) and fname.endswith(suffix):
|
||||
model_name = fname[len(prefix):-len(suffix)]
|
||||
if model_name in _EMBED_MODELS:
|
||||
result.append(model_name)
|
||||
# Also check legacy {profile}.joblib
|
||||
legacy = os.path.join(_MODEL_DIR, f"{profile_name}.joblib")
|
||||
if os.path.exists(legacy) and not result:
|
||||
# Legacy model — we don't know the embed model, but it's usable
|
||||
result.append("")
|
||||
return sorted(result)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Scanning
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -359,22 +491,28 @@ def scan_video(
|
||||
_log("audio_scan: no model provided")
|
||||
return []
|
||||
|
||||
_log(f"audio_scan: loading {video_path}")
|
||||
y, sr = librosa.load(video_path, sr=_SR, mono=True)
|
||||
duration = len(y) / sr
|
||||
_log(f"audio_scan: {duration:.1f}s loaded, extracting features...")
|
||||
|
||||
if cancel_flag and getattr(cancel_flag, '_cancel', False):
|
||||
return []
|
||||
|
||||
clf = model["classifier"]
|
||||
embed_model = model.get("embed_model")
|
||||
|
||||
_log(f"audio_scan: extracting embeddings ({embed_model or 'default'})...")
|
||||
timestamps, window_vectors = _extract_w2v_windows(
|
||||
y, sr, hop=hop, window=window, video_path=video_path,
|
||||
cancel_flag=cancel_flag, model_name=embed_model,
|
||||
)
|
||||
# Try cache first — skip expensive audio loading if embeddings exist
|
||||
cached = _w2v_cache_load(video_path, hop, window, embed_model)
|
||||
if cached is not None:
|
||||
timestamps, window_vectors = cached
|
||||
else:
|
||||
_log(f"audio_scan: loading {video_path}")
|
||||
y = _load_audio_ffmpeg(video_path, sr=_SR)
|
||||
sr = _SR
|
||||
_log(f"audio_scan: {len(y)/sr:.1f}s loaded")
|
||||
|
||||
if cancel_flag and getattr(cancel_flag, '_cancel', False):
|
||||
return []
|
||||
|
||||
_log(f"audio_scan: extracting embeddings ({embed_model or 'default'})...")
|
||||
timestamps, window_vectors = _extract_w2v_windows(
|
||||
y, sr, hop=hop, window=window, video_path=video_path,
|
||||
cancel_flag=cancel_flag, model_name=embed_model,
|
||||
)
|
||||
|
||||
if len(timestamps) == 0:
|
||||
_log("audio_scan: video shorter than window")
|
||||
return []
|
||||
|
||||
+131
-10
@@ -81,6 +81,21 @@ class ProcessedDB:
|
||||
" PRIMARY KEY (filename, profile)"
|
||||
")"
|
||||
)
|
||||
self._con.execute(
|
||||
"CREATE TABLE IF NOT EXISTS scan_results ("
|
||||
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
|
||||
" filename TEXT NOT NULL,"
|
||||
" profile TEXT NOT NULL DEFAULT 'default',"
|
||||
" model TEXT NOT NULL,"
|
||||
" start_time REAL NOT NULL,"
|
||||
" end_time REAL NOT NULL,"
|
||||
" score REAL NOT NULL"
|
||||
")"
|
||||
)
|
||||
self._con.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_scan_file_profile_model"
|
||||
" ON scan_results(filename, profile, model)"
|
||||
)
|
||||
self._con.commit()
|
||||
|
||||
def add(self, filename: str, start_time: float, output_path: str,
|
||||
@@ -248,18 +263,20 @@ class ProcessedDB:
|
||||
return sorted(folder_names)
|
||||
|
||||
def get_training_data(self, profile: str, positive_folder: str,
|
||||
negative_folder: str = "",
|
||||
fallback_video_dir: str = "",
|
||||
) -> list[tuple[str, list[float], list[float]]]:
|
||||
) -> list[tuple[str, list[float], list[float], list[float]]]:
|
||||
"""Build training video_infos from DB data.
|
||||
|
||||
Args:
|
||||
profile: profile name
|
||||
positive_folder: export folder name for positive class (e.g. "mp4_Intense")
|
||||
negative_folder: export folder name for explicit negatives (optional)
|
||||
fallback_video_dir: if source_path is empty, try filename in this dir
|
||||
|
||||
Returns:
|
||||
list of (source_video_path, positive_times, soft_times) per video.
|
||||
Soft times = clips from any other export folder.
|
||||
list of (source_video_path, positive_times, soft_times, negative_times)
|
||||
per video. Soft times = clips from any other non-negative folder.
|
||||
"""
|
||||
if not self._enabled:
|
||||
return []
|
||||
@@ -269,8 +286,9 @@ class ProcessedDB:
|
||||
(profile,),
|
||||
).fetchall()
|
||||
|
||||
# Collect times by video, split by positive vs other folders
|
||||
# Collect times by video, split by folder role
|
||||
pos_by_video: dict[str, set[float]] = {}
|
||||
neg_by_video: dict[str, set[float]] = {}
|
||||
soft_by_video: dict[str, set[float]] = {}
|
||||
source_by_filename: dict[str, str] = {}
|
||||
|
||||
@@ -280,26 +298,43 @@ class ProcessedDB:
|
||||
grandparent = os.path.basename(os.path.dirname(os.path.dirname(op)))
|
||||
if grandparent == positive_folder:
|
||||
pos_by_video.setdefault(fn, set()).add(st)
|
||||
elif negative_folder and grandparent == negative_folder:
|
||||
neg_by_video.setdefault(fn, set()).add(st)
|
||||
else:
|
||||
soft_by_video.setdefault(fn, set()).add(st)
|
||||
|
||||
# Remove positive times from soft to avoid conflicting labels
|
||||
# Remove positive times from soft/neg to avoid conflicting labels
|
||||
for fn in pos_by_video:
|
||||
if fn in soft_by_video:
|
||||
soft_by_video[fn] -= pos_by_video[fn]
|
||||
if fn in neg_by_video:
|
||||
neg_by_video[fn] -= pos_by_video[fn]
|
||||
|
||||
# Deduplicate nearby markers (spread clips from same position)
|
||||
def _dedup_times(times: set[float], min_gap: float = 8.0) -> list[float]:
|
||||
if not times:
|
||||
return []
|
||||
ordered = sorted(times)
|
||||
result = [ordered[0]]
|
||||
for t in ordered[1:]:
|
||||
if t - result[-1] >= min_gap:
|
||||
result.append(t)
|
||||
return result
|
||||
|
||||
# Include videos that have positives OR explicit negatives
|
||||
all_videos = set(pos_by_video) | set(neg_by_video)
|
||||
result = []
|
||||
for fn in pos_by_video:
|
||||
for fn in all_videos:
|
||||
sp = source_by_filename.get(fn, "")
|
||||
if not sp or not os.path.exists(sp):
|
||||
# Fallback: try video_dir / filename
|
||||
if fallback_video_dir:
|
||||
sp = os.path.join(fallback_video_dir, fn)
|
||||
if not sp or not os.path.exists(sp):
|
||||
continue
|
||||
gt_pos = sorted(pos_by_video[fn])
|
||||
gt_soft = sorted(soft_by_video.get(fn, set()))
|
||||
result.append((sp, gt_pos, gt_soft))
|
||||
gt_pos = _dedup_times(pos_by_video.get(fn, set()))
|
||||
gt_soft = _dedup_times(soft_by_video.get(fn, set()))
|
||||
gt_neg = _dedup_times(neg_by_video.get(fn, set()))
|
||||
result.append((sp, gt_pos, gt_soft, gt_neg))
|
||||
return result
|
||||
|
||||
def get_training_stats(self, profile: str) -> dict[str, dict]:
|
||||
@@ -329,6 +364,92 @@ class ProcessedDB:
|
||||
stats[folder_name] = {"videos": len(videos), "clips": clips}
|
||||
return stats
|
||||
|
||||
# ── Scan results ─────────────────────────────────────────────
|
||||
|
||||
def save_scan_results(self, filename: str, profile: str, model: str,
|
||||
regions: list[tuple[float, float, float]]) -> None:
|
||||
"""Replace scan results for (filename, profile, model) with new regions.
|
||||
|
||||
regions: list of (start_time, end_time, score).
|
||||
"""
|
||||
if not self._enabled:
|
||||
return
|
||||
with self._lock:
|
||||
self._con.execute(
|
||||
"DELETE FROM scan_results"
|
||||
" WHERE filename = ? AND profile = ? AND model = ?",
|
||||
(filename, profile, model),
|
||||
)
|
||||
self._con.executemany(
|
||||
"INSERT INTO scan_results"
|
||||
" (filename, profile, model, start_time, end_time, score)"
|
||||
" VALUES (?, ?, ?, ?, ?, ?)",
|
||||
[(filename, profile, model, s, e, sc) for s, e, sc in regions],
|
||||
)
|
||||
self._con.commit()
|
||||
|
||||
def get_scan_results(self, filename: str, profile: str
|
||||
) -> dict[str, list[tuple[int, float, float, float]]]:
|
||||
"""Return scan results grouped by model.
|
||||
|
||||
Returns {model: [(row_id, start_time, end_time, score), ...]} sorted by
|
||||
start_time.
|
||||
"""
|
||||
if not self._enabled:
|
||||
return {}
|
||||
rows = self._con.execute(
|
||||
"SELECT id, model, start_time, end_time, score FROM scan_results"
|
||||
" WHERE filename = ? AND profile = ?"
|
||||
" ORDER BY model, start_time",
|
||||
(filename, profile),
|
||||
).fetchall()
|
||||
result: dict[str, list[tuple[int, float, float, float]]] = {}
|
||||
for row_id, model, s, e, sc in rows:
|
||||
result.setdefault(model, []).append((row_id, s, e, sc))
|
||||
return result
|
||||
|
||||
def delete_scan_result(self, row_id: int) -> None:
|
||||
"""Delete a single scan result row."""
|
||||
if not self._enabled:
|
||||
return
|
||||
with self._lock:
|
||||
self._con.execute("DELETE FROM scan_results WHERE id = ?", (row_id,))
|
||||
self._con.commit()
|
||||
|
||||
def get_scan_models(self, filename: str, profile: str) -> list[str]:
|
||||
"""Return model names that have scan results for this file."""
|
||||
if not self._enabled:
|
||||
return []
|
||||
rows = self._con.execute(
|
||||
"SELECT DISTINCT model FROM scan_results"
|
||||
" WHERE filename = ? AND profile = ? ORDER BY model",
|
||||
(filename, profile),
|
||||
).fetchall()
|
||||
return [r[0] for r in rows]
|
||||
|
||||
def get_scanned_filenames(self, profile: str, model: str) -> set[str]:
|
||||
"""Return filenames that already have scan results for this model."""
|
||||
if not self._enabled:
|
||||
return set()
|
||||
rows = self._con.execute(
|
||||
"SELECT DISTINCT filename FROM scan_results"
|
||||
" WHERE profile = ? AND model = ?",
|
||||
(profile, model),
|
||||
).fetchall()
|
||||
return {r[0] for r in rows}
|
||||
|
||||
def get_training_filenames(self, profile: str) -> set[str]:
|
||||
"""Return filenames used in training (have exported clips)."""
|
||||
if not self._enabled:
|
||||
return set()
|
||||
rows = self._con.execute(
|
||||
"SELECT DISTINCT filename FROM processed WHERE profile = ?",
|
||||
(profile,),
|
||||
).fetchall()
|
||||
return {r[0] for r in rows}
|
||||
|
||||
# ── Hidden files ───────────────────────────────────────────
|
||||
|
||||
def hide_file(self, filename: str, profile: str = "default") -> None:
|
||||
if not self._enabled:
|
||||
return
|
||||
|
||||
Reference in New Issue
Block a user