feat: prefetch audio during Scan All, fix file-switch interruption, fix Windows setup

- Prefetch next video's audio while GPU processes current embeddings - Don't cancel Scan All when switching files in playlist - Windows setup script now creates venv, installs PyTorch + requirements - 8cut.bat auto-detects venv Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-18 21:50:33 +02:00
parent 7dffcb08eb
commit cd0552197f
4 changed files with 115 additions and 19 deletions
@@ -1,3 +1,7 @@
@echo off
 cd /d "%~dp0"
-python main.py %*
+if exist ".venv\Scripts\python.exe" (
+    .venv\Scripts\python.exe main.py %*
+) else (
+    python main.py %*
+)
@@ -560,6 +560,21 @@ def _fuse_regions(regions: list[tuple[float, float, float]]
    return fused


+def prefetch_audio(video_path: str, embed_model: str | None = None,
+                    hop: float = 1.0, window: float = _WINDOW) -> np.ndarray | None:
+    """Pre-load audio for a video if embeddings aren't cached.
+
+    Returns the raw audio array, or None if cache already exists.
+    Call from a background thread while the GPU is busy with another video.
+    """
+    if _w2v_cache_exists(video_path, hop, window, embed_model):
+        return None
+    _log(f"audio_scan: prefetching {os.path.basename(video_path)}")
+    y = _load_audio_ffmpeg(video_path, sr=_SR)
+    _log(f"audio_scan: prefetched {len(y)/_SR:.1f}s")
+    return y
+
+
 def scan_video(
    video_path: str,
    model: dict = None,
@@ -567,10 +582,12 @@ def scan_video(
    hop: float = 1.0,
    window: float = _WINDOW,
    cancel_flag: object = None,
+    prefetched_audio: np.ndarray | None = None,
 ) -> list[tuple[float, float, float]]:
    """Scan a video for matching audio regions using a trained classifier.

    Returns list of (start_time, end_time, score) above threshold.
+    If prefetched_audio is provided, skips the ffmpeg decode step.
    """
    if model is None:
        _log("audio_scan: no model provided")
@@ -584,8 +601,12 @@ def scan_video(
    if cached is not None:
        timestamps, window_vectors = cached
    else:
-        _log(f"audio_scan: loading {video_path}")
-        y = _load_audio_ffmpeg(video_path, sr=_SR)
+        if prefetched_audio is not None:
+            _log(f"audio_scan: using prefetched audio")
+            y = prefetched_audio
+        else:
+            _log(f"audio_scan: loading {video_path}")
+            y = _load_audio_ffmpeg(video_path, sr=_SR)
        sr = _SR
        _log(f"audio_scan: {len(y)/sr:.1f}s loaded")

@@ -194,11 +194,13 @@ class ScanWorker(QThread):
    progress = pyqtSignal(str)    # status message

    def __init__(self, video_path: str, model: dict,
-                 threshold: float = 0.30):
+                 threshold: float = 0.30,
+                 prefetched_audio=None):
        super().__init__()
        self._video_path = video_path
        self._model = model
        self._threshold = threshold
+        self._prefetched_audio = prefetched_audio
        self._cancel = False

    def cancel(self) -> None:
@@ -211,7 +213,9 @@ class ScanWorker(QThread):
            regions = scan_video(
                self._video_path, model=self._model,
                threshold=self._threshold, cancel_flag=self,
+                prefetched_audio=self._prefetched_audio,
            )
+            self._prefetched_audio = None  # free memory
            if not self._cancel:
                self.scan_done.emit(regions)
        except Exception as e:
@@ -2939,13 +2943,14 @@ class MainWindow(QMainWindow):
        self._crop_keyframes.clear()
        self._timeline.set_crop_keyframes([])
        self._timeline.clear_scan_regions()
-        if self._scan_worker and self._scan_worker.isRunning():
-            self._scan_worker.cancel()
-        self._cleanup_scan_worker()
-        self._scan_all_queue.clear()
-        self._btn_scan.setEnabled(True)
-        self._btn_scan_all.setText("Scan All")
-        self._btn_scan_all.setEnabled(True)
+        # Don't interrupt Scan All when switching files — only cancel solo scans
+        if not self._scan_all_queue and not getattr(self, '_scan_all_stopping', False):
+            if self._scan_worker and self._scan_worker.isRunning():
+                self._scan_worker.cancel()
+            self._cleanup_scan_worker()
+            self._btn_scan.setEnabled(True)
+            self._btn_scan_all.setText("Scan All")
+            self._btn_scan_all.setEnabled(True)
        # Load saved scan results for this file
        if self._file_path:
            filename = os.path.basename(self._file_path)
@@ -3732,6 +3737,7 @@ class MainWindow(QMainWindow):
            else:
                self._show_status(f"Scan All complete: {self._scan_all_total} videos scanned")
            self._scan_all_stopping = False
+            self._scan_all_prefetched = {}
            return

        self._cleanup_scan_worker()
@@ -3742,14 +3748,51 @@ class MainWindow(QMainWindow):
            f"Scan All: {remaining}/{self._scan_all_total} — "
            f"{os.path.basename(path)}")

+        # Use prefetched audio if available
+        prefetched = getattr(self, '_scan_all_prefetched', {}).pop(path, None)
+
        threshold = self._sld_threshold.value()
        self._scan_worker = ScanWorker(
            path, model=self._scan_all_model, threshold=threshold,
+            prefetched_audio=prefetched,
        )
        self._scan_worker.scan_done.connect(self._on_scan_all_done)
        self._scan_worker.error.connect(self._on_scan_all_error)
        self._scan_worker.start()

+        # Prefetch audio for the next video while GPU is busy
+        self._prefetch_next()
+
+    def _prefetch_next(self) -> None:
+        """Prefetch audio for the next queued video in a background thread."""
+        if not self._scan_all_queue:
+            return
+        next_path = self._scan_all_queue[0]
+        if not hasattr(self, '_scan_all_prefetched'):
+            self._scan_all_prefetched = {}
+        if next_path in self._scan_all_prefetched:
+            return
+        embed_model = self._scan_all_model.get("embed_model")
+        from concurrent.futures import ThreadPoolExecutor
+        if not hasattr(self, '_prefetch_pool'):
+            self._prefetch_pool = ThreadPoolExecutor(max_workers=1)
+        def _do_prefetch(p, em):
+            from core.audio_scan import prefetch_audio
+            return p, prefetch_audio(p, embed_model=em)
+        future = self._prefetch_pool.submit(_do_prefetch, next_path, embed_model)
+        future.add_done_callback(self._on_prefetch_done)
+
+    def _on_prefetch_done(self, future) -> None:
+        """Store prefetched audio data (called from thread pool)."""
+        try:
+            path, audio = future.result()
+            if audio is not None:
+                if not hasattr(self, '_scan_all_prefetched'):
+                    self._scan_all_prefetched = {}
+                self._scan_all_prefetched[path] = audio
+        except Exception as e:
+            _log(f"Prefetch error: {e}")
+
    def _on_scan_all_done(self, regions: list) -> None:
        """Save batch scan results and continue to next video."""
        path = getattr(self, '_scan_all_current_path', '')
@@ -3759,6 +3802,9 @@ class MainWindow(QMainWindow):
            profile = getattr(self, '_scan_all_profile', self._profile)
            self._db.save_scan_results(
                filename, profile, model_label, regions)
+            done = self._scan_all_total - len(self._scan_all_queue)
+            _log(f"Scan All: {done}/{self._scan_all_total} done — "
+                 f"{filename}: {len(regions)} regions")
            # If this is the currently loaded file, update the panel
            if self._file_path and os.path.basename(self._file_path) == filename:
                self._scan_panel.load_for_file(filename, profile)
@@ -1,7 +1,7 @@
 # 8-cut Windows setup script
 # Run once: powershell -ExecutionPolicy Bypass -File setup-windows.ps1
 #
-# Prerequisites: Python 3.10+ must be installed and on PATH
+# Prerequisites: Python 3.11+ must be installed and on PATH
 #   https://www.python.org/downloads/

 $ErrorActionPreference = "Stop"
@@ -9,11 +9,32 @@ $root = Split-Path -Parent $MyInvocation.MyCommand.Path

 Write-Host "=== 8-cut Windows Setup ===" -ForegroundColor Cyan

-# ── Python deps ────────────────────────────────────────────
-Write-Host "`nInstalling Python dependencies..."
-pip install PyQt6 python-mpv
+# ── Virtual environment ───────────────────────────────────
+$venvDir = Join-Path $root ".venv"
+if (Test-Path (Join-Path $venvDir "Scripts\python.exe")) {
+    Write-Host "`nVirtual environment already exists, activating..." -ForegroundColor Green
+} else {
+    Write-Host "`nCreating virtual environment..."
+    python -m venv $venvDir
+    Write-Host "Virtual environment created at $venvDir" -ForegroundColor Green
+}
+& "$venvDir\Scripts\Activate.ps1"

-# ── libmpv ─────────────────────────────────────────────────
+# ── PyTorch ───────────────────────────────────────────────
+$hasTorch = python -c "import torch" 2>&1
+if ($LASTEXITCODE -eq 0) {
+    Write-Host "`nPyTorch already installed, skipping." -ForegroundColor Green
+} else {
+    Write-Host "`nInstalling PyTorch with CUDA 12.8..."
+    Write-Host "(For CPU-only: pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu)" -ForegroundColor Yellow
+    pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu128
+}
+
+# ── Python deps ───────────────────────────────────────────
+Write-Host "`nInstalling project dependencies..."
+pip install -r (Join-Path $root "requirements.txt")
+
+# ── libmpv ────────────────────────────────────────────────
 $mpvDll = Join-Path $root "libmpv-2.dll"
 if (Test-Path $mpvDll) {
    Write-Host "`nlibmpv-2.dll already present, skipping." -ForegroundColor Green
@@ -30,12 +51,11 @@ if (Test-Path $mpvDll) {
    Write-Host "libmpv-2.dll downloaded." -ForegroundColor Green
 }

-# ── ffmpeg ─────────────────────────────────────────────────
+# ── ffmpeg ────────────────────────────────────────────────
 $ffmpeg = Join-Path $root "ffmpeg.exe"
 if (Test-Path $ffmpeg) {
    Write-Host "`nffmpeg.exe already present, skipping." -ForegroundColor Green
 } else {
-    # Check if ffmpeg is on PATH
    $onPath = Get-Command ffmpeg -ErrorAction SilentlyContinue
    if ($onPath) {
        Write-Host "`nffmpeg found on PATH: $($onPath.Source)" -ForegroundColor Green
@@ -54,6 +74,11 @@ if (Test-Path $ffmpeg) {
    }
 }

+# ── Verify ────────────────────────────────────────────────
+Write-Host "`n--- Verification ---" -ForegroundColor Cyan
+python -c "import torch; print('PyTorch', torch.__version__, 'CUDA', torch.version.cuda)"
+python -c "import sklearn, librosa, torchaudio; print('All imports OK')"
+
 Write-Host "`n=== Setup complete ===" -ForegroundColor Cyan
-Write-Host "Run 8-cut with: python main.py"
+Write-Host "Run 8-cut with: .venv\Scripts\python.exe main.py"
 Write-Host "Or double-click: 8cut.bat"