perf: single-pass get_training_stats (was O(folders × rows))

Group clips by export folder in one scan instead of re-scanning every row for
each folder; also drops the extra get_export_folders() query. Speeds up the
train-dialog stats with many subcategories.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-07 19:52:13 +02:00
parent b738a19304
commit 35c67f4bd5
+11 -11
View File
@@ -1087,18 +1087,18 @@ class ProcessedDB:
" WHERE profile = ? AND scan_export = 0", " WHERE profile = ? AND scan_export = 0",
(profile,), (profile,),
).fetchall() ).fetchall()
folders = self.get_export_folders(profile, include_scan_exports=include_scan_exports) # Single pass: group by export folder (grandparent dir), counting
stats: dict[str, dict] = {} # clips and distinct source videos. (Was O(folders × rows).)
for folder_name in folders: videos: dict[str, set[str]] = {}
videos: set[str] = set() clips: dict[str, int] = {}
clips = 0
for fn, op in rows: for fn, op in rows:
grandparent = os.path.basename(os.path.dirname(os.path.dirname(op))) folder_name = os.path.basename(os.path.dirname(os.path.dirname(op)))
if grandparent == folder_name: if not folder_name or folder_name.endswith("_disabled"):
videos.add(fn) continue
clips += 1 videos.setdefault(folder_name, set()).add(fn)
stats[folder_name] = {"videos": len(videos), "clips": clips} clips[folder_name] = clips.get(folder_name, 0) + 1
return {k: v for k, v in stats.items() if v["clips"] > 0} return {f: {"videos": len(videos[f]), "clips": n}
for f, n in clips.items() if n > 0}
# ── Scan results ───────────────────────────────────────────── # ── Scan results ─────────────────────────────────────────────