perf: single-pass get_training_stats (was O(folders × rows))
Group clips by export folder in one scan instead of re-scanning every row for each folder; also drops the extra get_export_folders() query. Speeds up the train-dialog stats with many subcategories. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+11
-11
@@ -1087,18 +1087,18 @@ class ProcessedDB:
|
|||||||
" WHERE profile = ? AND scan_export = 0",
|
" WHERE profile = ? AND scan_export = 0",
|
||||||
(profile,),
|
(profile,),
|
||||||
).fetchall()
|
).fetchall()
|
||||||
folders = self.get_export_folders(profile, include_scan_exports=include_scan_exports)
|
# Single pass: group by export folder (grandparent dir), counting
|
||||||
stats: dict[str, dict] = {}
|
# clips and distinct source videos. (Was O(folders × rows).)
|
||||||
for folder_name in folders:
|
videos: dict[str, set[str]] = {}
|
||||||
videos: set[str] = set()
|
clips: dict[str, int] = {}
|
||||||
clips = 0
|
|
||||||
for fn, op in rows:
|
for fn, op in rows:
|
||||||
grandparent = os.path.basename(os.path.dirname(os.path.dirname(op)))
|
folder_name = os.path.basename(os.path.dirname(os.path.dirname(op)))
|
||||||
if grandparent == folder_name:
|
if not folder_name or folder_name.endswith("_disabled"):
|
||||||
videos.add(fn)
|
continue
|
||||||
clips += 1
|
videos.setdefault(folder_name, set()).add(fn)
|
||||||
stats[folder_name] = {"videos": len(videos), "clips": clips}
|
clips[folder_name] = clips.get(folder_name, 0) + 1
|
||||||
return {k: v for k, v in stats.items() if v["clips"] > 0}
|
return {f: {"videos": len(videos[f]), "clips": n}
|
||||||
|
for f, n in clips.items() if n > 0}
|
||||||
|
|
||||||
# ── Scan results ─────────────────────────────────────────────
|
# ── Scan results ─────────────────────────────────────────────
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user