perf: single-pass get_training_stats (was O(folders × rows))

Group clips by export folder in one scan instead of re-scanning every row for
each folder; also drops the extra get_export_folders() query. Speeds up the
train-dialog stats with many subcategories.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-07 19:52:13 +02:00
parent b738a19304
commit 35c67f4bd5
+11 -11
View File
@@ -1087,18 +1087,18 @@ class ProcessedDB:
" WHERE profile = ? AND scan_export = 0",
(profile,),
).fetchall()
folders = self.get_export_folders(profile, include_scan_exports=include_scan_exports)
stats: dict[str, dict] = {}
for folder_name in folders:
videos: set[str] = set()
clips = 0
# Single pass: group by export folder (grandparent dir), counting
# clips and distinct source videos. (Was O(folders × rows).)
videos: dict[str, set[str]] = {}
clips: dict[str, int] = {}
for fn, op in rows:
grandparent = os.path.basename(os.path.dirname(os.path.dirname(op)))
if grandparent == folder_name:
videos.add(fn)
clips += 1
stats[folder_name] = {"videos": len(videos), "clips": clips}
return {k: v for k, v in stats.items() if v["clips"] > 0}
folder_name = os.path.basename(os.path.dirname(os.path.dirname(op)))
if not folder_name or folder_name.endswith("_disabled"):
continue
videos.setdefault(folder_name, set()).add(fn)
clips[folder_name] = clips.get(folder_name, 0) + 1
return {f: {"videos": len(videos[f]), "clips": n}
for f, n in clips.items() if n > 0}
# ── Scan results ─────────────────────────────────────────────