Compare commits
33 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1d49ce7cee | |||
| 109bc658c3 | |||
| ec7138f51b | |||
| 68c633ab46 | |||
| d0a94e7b68 | |||
| 632c2dc076 | |||
| 0f335c5e66 | |||
| f1f8fd5244 | |||
| 299779cf29 | |||
| 56218c18f4 | |||
| 2c45aff668 | |||
| 07e2f733b9 | |||
| 8c5a4c4524 | |||
| 4e5b631efb | |||
| ec77b8224f | |||
| 9becd5a06d | |||
| fae5560e2d | |||
| 07e3a1223c | |||
| 3af6e05fb7 | |||
| d787871735 | |||
| 85c08d7c48 | |||
| f6966a092a | |||
| 7cee3ab768 | |||
| 47f910644d | |||
| e972c7a2ae | |||
| cb805c5bda | |||
| bf14247b00 | |||
| 73396659dc | |||
| c8bc629419 | |||
| de8840e1eb | |||
| def966a913 | |||
| bc4ae21153 | |||
| a731fbfc32 |
@@ -3,6 +3,7 @@
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
ENV_NAME="8cut"
|
||||
CONDA_PREFIX_BASE="/media/p5/miniforge3"
|
||||
export LD_PRELOAD=/usr/lib/libstdc++.so.6
|
||||
|
||||
# 1. Try .venv in project dir
|
||||
if [ -f "$SCRIPT_DIR/.venv/bin/activate" ]; then
|
||||
|
||||
+13
-7
@@ -67,7 +67,7 @@ _EMBED_MODELS = {
|
||||
"EAT": 768,
|
||||
"EAT_LARGE": 1024,
|
||||
}
|
||||
_DEFAULT_EMBED_MODEL = "WAV2VEC2_BASE"
|
||||
_DEFAULT_EMBED_MODEL = "EAT_LARGE"
|
||||
|
||||
_BEATS_CHECKPOINT = os.path.join(
|
||||
_DL_CACHE_DIR, "huggingface", "hub",
|
||||
@@ -674,9 +674,11 @@ def restore_model_version(version_path: str, profile_name: str = "default",
|
||||
|
||||
|
||||
def list_trained_models(profile_name: str = "default") -> list[str]:
|
||||
"""Return embedding model names that have a trained .joblib for *profile_name*.
|
||||
"""Return embedding model keys that have a trained .joblib for *profile_name*.
|
||||
|
||||
Looks for files matching ``{profile}_{MODEL}.joblib`` in the models dir.
|
||||
Looks for files matching ``{profile}_{KEY}.joblib`` in the models dir.
|
||||
KEY is either a bare embed model name (e.g. ``EAT_LARGE``) or
|
||||
``{MODEL}_{name}`` for user-named variants.
|
||||
"""
|
||||
prefix = f"{profile_name}_"
|
||||
suffix = ".joblib"
|
||||
@@ -685,13 +687,17 @@ def list_trained_models(profile_name: str = "default") -> list[str]:
|
||||
return result
|
||||
for fname in os.listdir(_MODEL_DIR):
|
||||
if fname.startswith(prefix) and fname.endswith(suffix):
|
||||
model_name = fname[len(prefix):-len(suffix)]
|
||||
if model_name in _EMBED_MODELS:
|
||||
result.append(model_name)
|
||||
key = fname[len(prefix):-len(suffix)]
|
||||
if key in _EMBED_MODELS:
|
||||
result.append(key)
|
||||
else:
|
||||
for m in _EMBED_MODELS:
|
||||
if key.startswith(m + "_"):
|
||||
result.append(key)
|
||||
break
|
||||
# Also check legacy {profile}.joblib
|
||||
legacy = os.path.join(_MODEL_DIR, f"{profile_name}.joblib")
|
||||
if os.path.exists(legacy) and not result:
|
||||
# Legacy model — we don't know the embed model, but it's usable
|
||||
result.append("")
|
||||
return sorted(result)
|
||||
|
||||
|
||||
+490
-32
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import re
|
||||
import sqlite3
|
||||
import threading
|
||||
from datetime import datetime, timezone
|
||||
@@ -7,6 +8,12 @@ from pathlib import Path
|
||||
from .paths import _log
|
||||
|
||||
|
||||
def _extract_m_number(output_path: str) -> int | None:
|
||||
"""Extract the manual export number from a path like clip_001_m3_0.mp4."""
|
||||
m = re.search(r'_m(\d+)[_.]', os.path.basename(output_path))
|
||||
return int(m.group(1)) if m else None
|
||||
|
||||
|
||||
class ProcessedDB:
|
||||
_SCHEMA_VERSION = 4 # bump when schema changes
|
||||
|
||||
@@ -46,6 +53,7 @@ class ProcessedDB:
|
||||
" crop_center REAL NOT NULL DEFAULT 0.5,"
|
||||
" format TEXT NOT NULL DEFAULT 'MP4',"
|
||||
" clip_count INTEGER NOT NULL DEFAULT 3,"
|
||||
" clip_duration REAL NOT NULL DEFAULT 8.0,"
|
||||
" spread REAL NOT NULL DEFAULT 3.0,"
|
||||
" profile TEXT NOT NULL DEFAULT 'default',"
|
||||
" source_path TEXT NOT NULL DEFAULT '',"
|
||||
@@ -63,6 +71,7 @@ class ProcessedDB:
|
||||
"crop_center": "REAL NOT NULL DEFAULT 0.5",
|
||||
"format": "TEXT NOT NULL DEFAULT 'MP4'",
|
||||
"clip_count": "INTEGER NOT NULL DEFAULT 3",
|
||||
"clip_duration": "REAL NOT NULL DEFAULT 8.0",
|
||||
"spread": "REAL NOT NULL DEFAULT 3.0",
|
||||
"profile": "TEXT NOT NULL DEFAULT 'default'",
|
||||
"source_path": "TEXT NOT NULL DEFAULT ''",
|
||||
@@ -232,7 +241,8 @@ class ProcessedDB:
|
||||
label: str = "", category: str = "",
|
||||
short_side: int | None = None, portrait_ratio: str = "",
|
||||
crop_center: float = 0.5, fmt: str = "MP4",
|
||||
clip_count: int = 3, spread: float = 3.0,
|
||||
clip_count: int = 3, clip_duration: float = 8.0,
|
||||
spread: float = 3.0,
|
||||
profile: str = "default", source_path: str = "",
|
||||
scan_export: bool = False) -> None:
|
||||
if not self._enabled:
|
||||
@@ -242,16 +252,60 @@ class ProcessedDB:
|
||||
"INSERT INTO processed"
|
||||
" (filename, start_time, output_path, label, category,"
|
||||
" short_side, portrait_ratio, crop_center, format,"
|
||||
" clip_count, spread, profile, source_path, scan_export, processed_at)"
|
||||
" VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
" clip_count, clip_duration, spread, profile, source_path,"
|
||||
" scan_export, processed_at)"
|
||||
" VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(filename, start_time, output_path, label, category,
|
||||
short_side, portrait_ratio, crop_center, fmt,
|
||||
clip_count, spread, profile, source_path,
|
||||
clip_count, clip_duration, spread, profile, source_path,
|
||||
1 if scan_export else 0,
|
||||
datetime.now(timezone.utc).isoformat()),
|
||||
)
|
||||
self._con.commit()
|
||||
|
||||
def update_source_paths(self, new_dir: str,
|
||||
playlist_paths: list[str] | None = None,
|
||||
profile: str = "") -> int:
|
||||
"""Re-resolve source_path for all rows whose current path is missing.
|
||||
|
||||
Checks *new_dir* and *playlist_paths* by filename match.
|
||||
Returns the number of rows updated.
|
||||
"""
|
||||
if not self._enabled:
|
||||
return 0
|
||||
lookup: dict[str, str] = {}
|
||||
if playlist_paths:
|
||||
for p in playlist_paths:
|
||||
lookup[os.path.basename(p)] = p
|
||||
if new_dir and os.path.isdir(new_dir):
|
||||
for f in os.listdir(new_dir):
|
||||
fp = os.path.join(new_dir, f)
|
||||
if os.path.isfile(fp):
|
||||
lookup[f] = fp
|
||||
if not lookup:
|
||||
return 0
|
||||
query = "SELECT DISTINCT filename, source_path FROM processed"
|
||||
params: tuple = ()
|
||||
if profile:
|
||||
query += " WHERE profile = ?"
|
||||
params = (profile,)
|
||||
rows = self._con.execute(query, params).fetchall()
|
||||
updated = 0
|
||||
with self._lock:
|
||||
for fn, sp in rows:
|
||||
if sp and os.path.exists(sp):
|
||||
continue
|
||||
new_path = lookup.get(fn)
|
||||
if new_path and os.path.isfile(new_path):
|
||||
self._con.execute(
|
||||
"UPDATE processed SET source_path = ? WHERE filename = ?",
|
||||
(new_path, fn),
|
||||
)
|
||||
updated += 1
|
||||
if updated:
|
||||
self._con.commit()
|
||||
return updated
|
||||
|
||||
def get_labels(self) -> list[str]:
|
||||
"""Return distinct non-empty labels ordered by most recently used."""
|
||||
if not self._enabled:
|
||||
@@ -278,19 +332,37 @@ class ProcessedDB:
|
||||
cur.row_factory = sqlite3.Row
|
||||
row = cur.execute(
|
||||
"SELECT label, category, short_side, portrait_ratio, crop_center, format,"
|
||||
" clip_count, spread"
|
||||
" clip_count, clip_duration, spread"
|
||||
" FROM processed WHERE output_path = ?",
|
||||
(output_path,),
|
||||
).fetchone()
|
||||
return dict(row) if row else None
|
||||
|
||||
def delete_by_output_path(self, output_path: str) -> None:
|
||||
def delete_by_output_path(self, output_path: str, profile: str = "") -> None:
|
||||
if not self._enabled:
|
||||
return
|
||||
with self._lock:
|
||||
self._con.execute("DELETE FROM processed WHERE output_path = ?", (output_path,))
|
||||
if profile:
|
||||
self._con.execute(
|
||||
"DELETE FROM processed WHERE output_path = ? AND profile = ?",
|
||||
(output_path, profile),
|
||||
)
|
||||
else:
|
||||
self._con.execute(
|
||||
"DELETE FROM processed WHERE output_path = ?", (output_path,),
|
||||
)
|
||||
self._con.commit()
|
||||
|
||||
def is_path_used_by_other_profiles(self, output_path: str, profile: str) -> bool:
|
||||
"""Return True if *output_path* is referenced by any profile other than *profile*."""
|
||||
if not self._enabled:
|
||||
return False
|
||||
row = self._con.execute(
|
||||
"SELECT 1 FROM processed WHERE output_path = ? AND profile != ? LIMIT 1",
|
||||
(output_path, profile),
|
||||
).fetchone()
|
||||
return row is not None
|
||||
|
||||
def get_group(self, output_path: str, profile: str = "") -> list[str]:
|
||||
"""Return all output_paths sharing the same (filename, start_time, profile) as *output_path*."""
|
||||
if not self._enabled:
|
||||
@@ -336,29 +408,120 @@ class ProcessedDB:
|
||||
self._con.commit()
|
||||
return paths
|
||||
|
||||
def _get_markers_for(self, match: str, profile: str = "default") -> list[tuple[float, int, str]]:
|
||||
rows = self._con.execute(
|
||||
"SELECT start_time, output_path FROM processed"
|
||||
" WHERE filename = ? AND profile = ? AND scan_export = 0"
|
||||
" ORDER BY start_time",
|
||||
(match, profile),
|
||||
).fetchall()
|
||||
# Deduplicate by start_time — batch exports share the same cursor.
|
||||
seen_times: dict[float, tuple[float, int, str]] = {}
|
||||
n = 0
|
||||
for t, p in rows:
|
||||
def _get_markers_for(self, match: str, profile: str = "default",
|
||||
export_folder: str = "") -> list[tuple[float, int, str, float]]:
|
||||
if export_folder:
|
||||
rows = self._con.execute(
|
||||
"SELECT start_time, output_path, clip_duration, clip_count, spread"
|
||||
" FROM processed"
|
||||
" WHERE filename = ? AND profile = ? AND scan_export = 0"
|
||||
" AND output_path LIKE ?"
|
||||
" ORDER BY start_time",
|
||||
(match, profile, export_folder.rstrip("/") + "/%"),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = self._con.execute(
|
||||
"SELECT start_time, output_path, clip_duration, clip_count, spread"
|
||||
" FROM processed"
|
||||
" WHERE filename = ? AND profile = ? AND scan_export = 0"
|
||||
" ORDER BY start_time",
|
||||
(match, profile),
|
||||
).fetchall()
|
||||
seen_times: dict[float, tuple[float, int, str, float]] = {}
|
||||
seq = 0
|
||||
for t, p, dur, cnt, spr in rows:
|
||||
if t not in seen_times:
|
||||
n += 1
|
||||
seen_times[t] = (t, n, p)
|
||||
seq += 1
|
||||
num = _extract_m_number(p) or seq
|
||||
span = (dur or 8.0) + ((cnt or 1) - 1) * (spr or 3.0)
|
||||
seen_times[t] = (t, num, p, span)
|
||||
return list(seen_times.values())
|
||||
|
||||
def get_markers(self, filename: str, profile: str = "default") -> list[tuple[float, int, str]]:
|
||||
"""Return [(start_time, marker_number, output_path), ...] for exact
|
||||
filename match, sorted by start_time. Empty list if no match.
|
||||
Excludes scan exports (shown via scan panel instead)."""
|
||||
def get_markers(self, filename: str, profile: str = "default",
|
||||
export_folder: str = "") -> list[tuple[float, int, str, float]]:
|
||||
"""Return [(start_time, marker_number, output_path, clip_span), ...]
|
||||
for exact filename match, sorted by start_time. Empty list if no match.
|
||||
Excludes scan exports (shown via scan panel instead).
|
||||
If export_folder is set, only markers in that folder are returned."""
|
||||
if not self._enabled:
|
||||
return []
|
||||
return self._get_markers_for(filename, profile)
|
||||
return self._get_markers_for(filename, profile, export_folder)
|
||||
|
||||
def get_other_folder_markers(self, filename: str, profile: str = "default",
|
||||
export_folder: str = ""
|
||||
) -> dict[str, list[tuple[float, int, str, float]]]:
|
||||
"""Return {folder_name: [(start_time, num, path, span), ...]} for
|
||||
markers NOT in export_folder, grouped by their base export folder."""
|
||||
if not self._enabled or not export_folder:
|
||||
return {}
|
||||
rows = self._con.execute(
|
||||
"SELECT start_time, output_path, clip_duration, clip_count, spread"
|
||||
" FROM processed"
|
||||
" WHERE filename = ? AND profile = ? AND scan_export = 0"
|
||||
" AND output_path NOT LIKE ?"
|
||||
" ORDER BY start_time",
|
||||
(filename, profile, export_folder.rstrip("/") + "/%"),
|
||||
).fetchall()
|
||||
by_folder: dict[str, list] = {}
|
||||
for t, p, dur, cnt, spr in rows:
|
||||
parts = p.split("/")
|
||||
for i, part in enumerate(parts):
|
||||
if part.startswith("vid_"):
|
||||
folder = "/".join(parts[:i])
|
||||
break
|
||||
else:
|
||||
folder = os.path.dirname(os.path.dirname(p))
|
||||
by_folder.setdefault(folder, []).append((t, p, dur, cnt, spr))
|
||||
result: dict[str, list[tuple[float, int, str, float]]] = {}
|
||||
for folder, folder_rows in by_folder.items():
|
||||
seen: dict[float, tuple[float, int, str, float]] = {}
|
||||
seq = 0
|
||||
for t, p, dur, cnt, spr in folder_rows:
|
||||
if t not in seen:
|
||||
seq += 1
|
||||
num = _extract_m_number(p) or seq
|
||||
span = (dur or 8.0) + ((cnt or 1) - 1) * (spr or 3.0)
|
||||
seen[t] = (t, num, p, span)
|
||||
name = os.path.basename(folder)
|
||||
if name.endswith("_disabled"):
|
||||
continue # disabled clips are excluded from the timeline
|
||||
result[name] = list(seen.values())
|
||||
return result
|
||||
|
||||
def get_manual_export_groups(self, filename: str, profile: str = "default"
|
||||
) -> list[dict]:
|
||||
"""Return manual (non-scan) export groups for *filename*.
|
||||
|
||||
Each group dict has:
|
||||
start_time, paths (list[str] sorted), clip_count, clip_duration,
|
||||
spread, short_side, portrait_ratio, crop_center, format, label,
|
||||
category
|
||||
"""
|
||||
if not self._enabled:
|
||||
return []
|
||||
rows = self._con.execute(
|
||||
"SELECT start_time, output_path, clip_count, clip_duration, spread,"
|
||||
" short_side, portrait_ratio, crop_center, format, label, category"
|
||||
" FROM processed"
|
||||
" WHERE filename = ? AND profile = ? AND scan_export = 0"
|
||||
" ORDER BY start_time, output_path",
|
||||
(filename, profile),
|
||||
).fetchall()
|
||||
groups: dict[float, dict] = {}
|
||||
for r in rows:
|
||||
t = r[0]
|
||||
if t not in groups:
|
||||
groups[t] = {
|
||||
"start_time": t,
|
||||
"paths": [],
|
||||
"clip_count": r[2], "clip_duration": r[3],
|
||||
"spread": r[4],
|
||||
"short_side": r[5], "portrait_ratio": r[6],
|
||||
"crop_center": r[7], "format": r[8],
|
||||
"label": r[9], "category": r[10],
|
||||
}
|
||||
groups[t]["paths"].append(r[1])
|
||||
return list(groups.values())
|
||||
|
||||
def get_clip_count(self, filename: str, profile: str = "default") -> int:
|
||||
"""Return total number of exported clips (including scan exports)."""
|
||||
@@ -370,15 +533,235 @@ class ProcessedDB:
|
||||
).fetchone()
|
||||
return row[0] if row else 0
|
||||
|
||||
def get_clip_counts_by_folder(self, filename: str,
|
||||
profile: str = "default") -> dict[str, int]:
|
||||
"""Return per-export-folder clip counts for a single video.
|
||||
|
||||
Folder name is the grandparent dir of each clip's output_path
|
||||
(e.g. ``mp4_doggy_clap``).
|
||||
"""
|
||||
if not self._enabled:
|
||||
return {}
|
||||
rows = self._con.execute(
|
||||
"SELECT output_path FROM processed WHERE filename = ? AND profile = ?",
|
||||
(filename, profile),
|
||||
).fetchall()
|
||||
counts: dict[str, int] = {}
|
||||
for (op,) in rows:
|
||||
folder = os.path.basename(os.path.dirname(os.path.dirname(op)))
|
||||
counts[folder] = counts.get(folder, 0) + 1
|
||||
return counts
|
||||
|
||||
def get_all_folder_counts(self, profile: str = "default") -> dict[str, int]:
|
||||
"""Return clip counts per export folder across all videos in *profile*.
|
||||
|
||||
Includes ``_disabled`` folders so callers can offer enable/disable.
|
||||
"""
|
||||
if not self._enabled:
|
||||
return {}
|
||||
rows = self._con.execute(
|
||||
"SELECT output_path FROM processed WHERE profile = ?",
|
||||
(profile,),
|
||||
).fetchall()
|
||||
counts: dict[str, int] = {}
|
||||
for (op,) in rows:
|
||||
folder = os.path.basename(os.path.dirname(os.path.dirname(op)))
|
||||
counts[folder] = counts.get(folder, 0) + 1
|
||||
return counts
|
||||
|
||||
def relocate_video_clips(self, filename: "str | None", profile: str,
|
||||
src_folder_name: str,
|
||||
dst_folder_name: str) -> int:
|
||||
"""Move clips from one export folder to a sibling folder.
|
||||
|
||||
Matches rows whose grandparent dir basename == *src_folder_name*
|
||||
(restricted to *filename* when given, else every video in *profile*),
|
||||
then moves each clip (and any ``.wav`` sidecar) on disk into a sibling
|
||||
folder named *dst_folder_name*, migrates its dataset.json annotation,
|
||||
and rewrites output_path in the DB. Returns the number of clips moved.
|
||||
"""
|
||||
if not self._enabled:
|
||||
return 0
|
||||
import shutil
|
||||
from .annotations import remove_clip_annotation, upsert_clip_annotation
|
||||
|
||||
if filename is None:
|
||||
rows = self._con.execute(
|
||||
"SELECT id, output_path, label FROM processed WHERE profile = ?",
|
||||
(profile,),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = self._con.execute(
|
||||
"SELECT id, output_path, label FROM processed"
|
||||
" WHERE filename = ? AND profile = ?",
|
||||
(filename, profile),
|
||||
).fetchall()
|
||||
|
||||
moves: list[tuple[str, str]] = [] # (old_path, new_path)
|
||||
updates: list[tuple[str, int]] = [] # (new_path, id)
|
||||
ann: list[tuple[str, str, str, str, str]] = [] # old_fold,new_fold,old,new,label
|
||||
new_dirs: set[str] = set()
|
||||
old_vid_dirs: set[str] = set()
|
||||
|
||||
for rid, op, label in rows:
|
||||
vid_dir = os.path.dirname(op)
|
||||
export_folder = os.path.dirname(vid_dir)
|
||||
if os.path.basename(export_folder) != src_folder_name:
|
||||
continue
|
||||
new_export_folder = os.path.join(
|
||||
os.path.dirname(export_folder), dst_folder_name)
|
||||
new_vid_dir = os.path.join(new_export_folder, os.path.basename(vid_dir))
|
||||
new_op = os.path.join(new_vid_dir, os.path.basename(op))
|
||||
updates.append((new_op, rid))
|
||||
new_dirs.add(new_vid_dir)
|
||||
old_vid_dirs.add(vid_dir)
|
||||
if os.path.exists(op):
|
||||
moves.append((op, new_op))
|
||||
ann.append((export_folder, new_export_folder, op, new_op, label or ""))
|
||||
|
||||
if not updates:
|
||||
return 0
|
||||
|
||||
with self._lock:
|
||||
for d in sorted(new_dirs):
|
||||
os.makedirs(d, exist_ok=True)
|
||||
for old, new in moves:
|
||||
if os.path.exists(old) and not os.path.exists(new):
|
||||
shutil.move(old, new)
|
||||
wav_old, wav_new = old + ".wav", new + ".wav"
|
||||
if os.path.exists(wav_old) and not os.path.exists(wav_new):
|
||||
shutil.move(wav_old, wav_new)
|
||||
self._con.executemany(
|
||||
"UPDATE processed SET output_path = ? WHERE id = ?", updates)
|
||||
self._con.commit()
|
||||
|
||||
# Migrate dataset.json entries (best-effort, outside the DB lock).
|
||||
for old_fold, new_fold, old_op, new_op, label in ann:
|
||||
remove_clip_annotation(old_fold, old_op)
|
||||
if label:
|
||||
upsert_clip_annotation(new_fold, new_op, label)
|
||||
|
||||
# Remove now-empty old vid dirs and their export folder if empty.
|
||||
for d in sorted(old_vid_dirs):
|
||||
try:
|
||||
if os.path.isdir(d) and not os.listdir(d):
|
||||
os.rmdir(d)
|
||||
parent = os.path.dirname(d)
|
||||
if os.path.isdir(parent) and not os.listdir(parent):
|
||||
os.rmdir(parent)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
_log(f"Relocated {len(updates)} clip(s) of {filename or 'all videos'}: "
|
||||
f"{src_folder_name} -> {dst_folder_name}")
|
||||
return len(updates)
|
||||
|
||||
def get_profiles(self) -> list[str]:
|
||||
"""Return distinct profile names, ordered alphabetically."""
|
||||
"""Return distinct profile names across all tables, ordered alphabetically."""
|
||||
if not self._enabled:
|
||||
return []
|
||||
rows = self._con.execute(
|
||||
"SELECT DISTINCT profile FROM processed ORDER BY profile"
|
||||
"SELECT DISTINCT profile FROM processed"
|
||||
" UNION SELECT DISTINCT profile FROM scan_results"
|
||||
" UNION SELECT DISTINCT profile FROM hard_negatives"
|
||||
" ORDER BY profile"
|
||||
).fetchall()
|
||||
return [r[0] for r in rows]
|
||||
|
||||
def duplicate_profile(self, src: str, dst: str) -> int:
|
||||
"""Copy all profile data from *src* to *dst*.
|
||||
|
||||
Copies processed (exports), scan_results, hard_negatives, and
|
||||
hidden_files. Returns total number of rows copied.
|
||||
"""
|
||||
if not self._enabled or src == dst:
|
||||
return 0
|
||||
total = 0
|
||||
with self._lock:
|
||||
# processed (exports)
|
||||
rows = self._con.execute(
|
||||
"SELECT filename, start_time, output_path, label, category,"
|
||||
" short_side, portrait_ratio, crop_center, format,"
|
||||
" clip_count, clip_duration, spread, source_path, scan_export,"
|
||||
" processed_at"
|
||||
" FROM processed WHERE profile = ?", (src,),
|
||||
).fetchall()
|
||||
for r in rows:
|
||||
self._con.execute(
|
||||
"INSERT INTO processed"
|
||||
" (filename, start_time, output_path, label, category,"
|
||||
" short_side, portrait_ratio, crop_center, format,"
|
||||
" clip_count, clip_duration, spread, profile,"
|
||||
" source_path, scan_export, processed_at)"
|
||||
" VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
|
||||
(*r[:12], dst, *r[12:]),
|
||||
)
|
||||
total += len(rows)
|
||||
# scan_results
|
||||
rows = self._con.execute(
|
||||
"SELECT filename, model, start_time, end_time, score,"
|
||||
" disabled, orig_start_time, orig_end_time, scan_timestamp"
|
||||
" FROM scan_results WHERE profile = ?", (src,),
|
||||
).fetchall()
|
||||
for r in rows:
|
||||
self._con.execute(
|
||||
"INSERT INTO scan_results"
|
||||
" (filename, profile, model, start_time, end_time, score,"
|
||||
" disabled, orig_start_time, orig_end_time, scan_timestamp)"
|
||||
" VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(r[0], dst, r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]),
|
||||
)
|
||||
total += len(rows)
|
||||
# hard_negatives
|
||||
rows = self._con.execute(
|
||||
"SELECT filename, start_time, source_path, source_model"
|
||||
" FROM hard_negatives WHERE profile = ?", (src,),
|
||||
).fetchall()
|
||||
for r in rows:
|
||||
self._con.execute(
|
||||
"INSERT INTO hard_negatives"
|
||||
" (filename, profile, start_time, source_path, source_model)"
|
||||
" VALUES (?, ?, ?, ?, ?)",
|
||||
(r[0], dst, r[1], r[2], r[3]),
|
||||
)
|
||||
total += len(rows)
|
||||
# hidden_files
|
||||
rows = self._con.execute(
|
||||
"SELECT filename FROM hidden_files WHERE profile = ?", (src,),
|
||||
).fetchall()
|
||||
for r in rows:
|
||||
self._con.execute(
|
||||
"INSERT OR IGNORE INTO hidden_files (filename, profile)"
|
||||
" VALUES (?, ?)",
|
||||
(r[0], dst),
|
||||
)
|
||||
total += len(rows)
|
||||
self._con.commit()
|
||||
return total
|
||||
|
||||
def count_profile_rows(self, profile: str) -> int:
|
||||
"""Return total number of rows across all tables for *profile*."""
|
||||
if not self._enabled:
|
||||
return 0
|
||||
n = 0
|
||||
for table in ("processed", "scan_results", "hard_negatives", "hidden_files"):
|
||||
row = self._con.execute(
|
||||
f"SELECT COUNT(*) FROM {table} WHERE profile = ?", (profile,),
|
||||
).fetchone()
|
||||
n += row[0] if row else 0
|
||||
return n
|
||||
|
||||
def delete_profile(self, profile: str) -> None:
|
||||
"""Delete all rows for *profile* from every table."""
|
||||
if not self._enabled:
|
||||
return
|
||||
with self._lock:
|
||||
for table in ("processed", "scan_results", "hard_negatives", "hidden_files"):
|
||||
self._con.execute(
|
||||
f"DELETE FROM {table} WHERE profile = ?", (profile,),
|
||||
)
|
||||
self._con.commit()
|
||||
|
||||
def get_all_export_paths(self, profile: str = "default") -> list[str]:
|
||||
"""Return all unique output_path values for a given profile."""
|
||||
if not self._enabled:
|
||||
@@ -418,6 +801,32 @@ class ProcessedDB:
|
||||
pass
|
||||
return max_n
|
||||
|
||||
def get_scan_export_rep_paths_in_range(self, filename: str, profile: str,
|
||||
start: float, end: float) -> list[str]:
|
||||
"""Return one representative output_path per distinct scan-export
|
||||
start_time inside [start, end] for (filename, profile)."""
|
||||
if not self._enabled:
|
||||
return []
|
||||
rows = self._con.execute(
|
||||
"SELECT output_path FROM processed"
|
||||
" WHERE filename = ? AND profile = ? AND scan_export = 1"
|
||||
" AND start_time BETWEEN ? AND ?"
|
||||
" GROUP BY start_time",
|
||||
(filename, profile, start, end),
|
||||
).fetchall()
|
||||
return [r[0] for r in rows]
|
||||
|
||||
def get_scan_export_times(self, filename: str, profile: str) -> list[float]:
|
||||
"""Return start_times of scan_export=1 rows for this file/profile."""
|
||||
if not self._enabled:
|
||||
return []
|
||||
rows = self._con.execute(
|
||||
"SELECT start_time FROM processed"
|
||||
" WHERE filename = ? AND profile = ? AND scan_export = 1",
|
||||
(filename, profile),
|
||||
).fetchall()
|
||||
return [r[0] for r in rows]
|
||||
|
||||
def delete_scan_exports(self, filename: str, profile: str) -> int:
|
||||
"""Delete all scan_export entries for *filename* in *profile*.
|
||||
|
||||
@@ -504,13 +913,15 @@ class ProcessedDB:
|
||||
folder_names: set[str] = set()
|
||||
for (op,) in rows:
|
||||
grandparent = os.path.basename(os.path.dirname(os.path.dirname(op)))
|
||||
if grandparent:
|
||||
if grandparent and not grandparent.endswith("_disabled"):
|
||||
folder_names.add(grandparent)
|
||||
return sorted(folder_names)
|
||||
|
||||
def get_training_data(self, profile: str, positive_folder: str,
|
||||
def get_training_data(self, profile: str,
|
||||
positive_folder: "str | list[str]",
|
||||
negative_folder: str = "",
|
||||
fallback_video_dir: str = "",
|
||||
playlist_paths: list[str] | None = None,
|
||||
include_scan_exports: bool = False,
|
||||
use_hard_negatives: bool = True,
|
||||
) -> list[tuple[str, list[float], list[float], list[float]]]:
|
||||
@@ -518,18 +929,20 @@ class ProcessedDB:
|
||||
|
||||
Args:
|
||||
profile: profile name
|
||||
positive_folder: export folder name for positive class (e.g. "mp4_Intense")
|
||||
positive_folder: export folder name(s) for positive class
|
||||
negative_folder: export folder name for explicit negatives (optional)
|
||||
fallback_video_dir: if source_path is empty, try filename in this dir
|
||||
playlist_paths: loaded playlist paths to resolve filenames
|
||||
include_scan_exports: if True, include auto-exported scan clips
|
||||
use_hard_negatives: if False, skip hard negatives from scan feedback
|
||||
|
||||
Returns:
|
||||
list of (source_video_path, positive_times, soft_times, negative_times)
|
||||
per video. Soft times = clips from any other non-negative folder.
|
||||
per video. Soft times = clips from any other non-positive/non-negative folder.
|
||||
"""
|
||||
if not self._enabled:
|
||||
return []
|
||||
pos_folders = {positive_folder} if isinstance(positive_folder, str) else set(positive_folder)
|
||||
if include_scan_exports:
|
||||
rows = self._con.execute(
|
||||
"SELECT filename, start_time, output_path, source_path"
|
||||
@@ -553,7 +966,9 @@ class ProcessedDB:
|
||||
if sp:
|
||||
source_by_filename[fn] = sp
|
||||
grandparent = os.path.basename(os.path.dirname(os.path.dirname(op)))
|
||||
if grandparent == positive_folder:
|
||||
if grandparent.endswith("_disabled"):
|
||||
continue # disabled clips are excluded from training entirely
|
||||
if grandparent in pos_folders:
|
||||
pos_by_video.setdefault(fn, set()).add(st)
|
||||
elif negative_folder and grandparent == negative_folder:
|
||||
neg_by_video.setdefault(fn, set()).add(st)
|
||||
@@ -590,11 +1005,19 @@ class ProcessedDB:
|
||||
result.append(t)
|
||||
return result
|
||||
|
||||
# Build filename→path lookup from playlist
|
||||
playlist_lookup: dict[str, str] = {}
|
||||
if playlist_paths:
|
||||
for p in playlist_paths:
|
||||
playlist_lookup[os.path.basename(p)] = p
|
||||
|
||||
# Include videos that have positives OR explicit negatives
|
||||
all_videos = set(pos_by_video) | set(neg_by_video)
|
||||
result = []
|
||||
for fn in all_videos:
|
||||
sp = source_by_filename.get(fn, "")
|
||||
if not sp or not os.path.exists(sp):
|
||||
sp = playlist_lookup.get(fn, "")
|
||||
if not sp or not os.path.exists(sp):
|
||||
if fallback_video_dir:
|
||||
sp = os.path.join(fallback_video_dir, fn)
|
||||
@@ -777,6 +1200,41 @@ class ProcessedDB:
|
||||
)
|
||||
self._con.commit()
|
||||
|
||||
def insert_scan_result(self, filename: str, profile: str, model: str,
|
||||
start: float, end: float, score: float,
|
||||
disabled: bool, orig_start: float, orig_end: float,
|
||||
scan_timestamp: str = "") -> int:
|
||||
"""Insert a single scan result row; returns its new id."""
|
||||
if not self._enabled:
|
||||
return -1
|
||||
with self._lock:
|
||||
cur = self._con.execute(
|
||||
"INSERT INTO scan_results"
|
||||
" (filename, profile, model, start_time, end_time, score,"
|
||||
" disabled, orig_start_time, orig_end_time, scan_timestamp)"
|
||||
" VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(filename, profile, model, start, end, score,
|
||||
1 if disabled else 0, orig_start, orig_end, scan_timestamp),
|
||||
)
|
||||
self._con.commit()
|
||||
return int(cur.lastrowid or -1)
|
||||
|
||||
def update_scan_result_full(self, row_id: int, start: float, end: float,
|
||||
score: float, orig_start: float,
|
||||
orig_end: float) -> None:
|
||||
"""Update bounds, score and orig_* fields — used after merging rows."""
|
||||
if not self._enabled:
|
||||
return
|
||||
with self._lock:
|
||||
self._con.execute(
|
||||
"UPDATE scan_results"
|
||||
" SET start_time = ?, end_time = ?, score = ?,"
|
||||
" orig_start_time = ?, orig_end_time = ?"
|
||||
" WHERE id = ?",
|
||||
(start, end, score, orig_start, orig_end, row_id),
|
||||
)
|
||||
self._con.commit()
|
||||
|
||||
def get_scan_models(self, filename: str, profile: str) -> list[str]:
|
||||
"""Return model names that have scan results for this file."""
|
||||
if not self._enabled:
|
||||
|
||||
+5
-3
@@ -78,6 +78,7 @@ def build_ffmpeg_command(
|
||||
crop_center: float = 0.5,
|
||||
image_sequence: bool = False,
|
||||
encoder: str = "libx264",
|
||||
duration: float = 8.0,
|
||||
) -> list[str]:
|
||||
# -ss before -i: fast input-seeking. Safe here because we always re-encode,
|
||||
# so there is no keyframe-alignment issue from pre-input seek.
|
||||
@@ -96,7 +97,7 @@ def build_ffmpeg_command(
|
||||
"-threads", "0",
|
||||
"-ss", str(start),
|
||||
"-i", input_path,
|
||||
"-t", "8",
|
||||
"-t", str(duration),
|
||||
]
|
||||
|
||||
filters: list[str] = []
|
||||
@@ -141,14 +142,15 @@ def build_ffmpeg_command(
|
||||
return cmd
|
||||
|
||||
|
||||
def build_audio_extract_command(input_path: str, start: float, sequence_dir: str) -> list[str]:
|
||||
def build_audio_extract_command(input_path: str, start: float, sequence_dir: str,
|
||||
duration: float = 8.0) -> list[str]:
|
||||
"""Return an ffmpeg command that extracts audio to <sequence_dir>.wav."""
|
||||
audio_path = sequence_dir + ".wav"
|
||||
return [
|
||||
_bin("ffmpeg"), "-y",
|
||||
"-ss", str(start),
|
||||
"-i", input_path,
|
||||
"-t", "8",
|
||||
"-t", str(duration),
|
||||
"-vn",
|
||||
"-c:a", "pcm_s16le",
|
||||
audio_path,
|
||||
|
||||
Reference in New Issue
Block a user