diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index d9c5ed7..0000000 --- a/Dockerfile +++ /dev/null @@ -1,13 +0,0 @@ -FROM nvidia/cuda:12.6.3-runtime-ubuntu24.04 - -RUN apt-get update && apt-get install -y --no-install-recommends \ - python3 python3-pip ffmpeg \ - && rm -rf /var/lib/apt/lists/* - -WORKDIR /app -COPY core/ core/ -COPY server/ server/ -RUN pip install --no-cache-dir --break-system-packages fastapi uvicorn[standard] - -EXPOSE 8000 -CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/core/audio_scan.py b/core/audio_scan.py index 96956c0..2c3ff49 100644 --- a/core/audio_scan.py +++ b/core/audio_scan.py @@ -240,7 +240,8 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]], model_path: str | None = None, tolerance: float = 12.0, neg_margin: float = 120.0, - embed_model: str | None = None) -> dict: + embed_model: str | None = None, + cancel_flag: object = None) -> dict: """Train a classifier from labeled videos. Args: @@ -248,6 +249,7 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]], model_path: if given, save model to this path tolerance/neg_margin: labeling parameters embed_model: embedding model name (e.g. "HUBERT_BASE", "BEATS"), defaults to WAV2VEC2_BASE + cancel_flag: object with _cancel attribute; if set, training aborts early Returns: dict with 'classifier', 'embed_model', and metadata, or None on failure. @@ -257,6 +259,9 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]], all_X, all_y = [], [] for vi, (vpath, gt_intense, gt_soft) in enumerate(video_infos): + if cancel_flag and getattr(cancel_flag, '_cancel', False): + _log("audio_scan: training cancelled") + return None _log(f"audio_scan: training [{vi+1}/{len(video_infos)}] {os.path.basename(vpath)}") y, _ = librosa.load(vpath, sr=_SR, mono=True) diff --git a/core/db.py b/core/db.py index 3a970d0..68eb25f 100644 --- a/core/db.py +++ b/core/db.py @@ -283,6 +283,11 @@ class ProcessedDB: else: soft_by_video.setdefault(fn, set()).add(st) + # Remove positive times from soft to avoid conflicting labels + for fn in pos_by_video: + if fn in soft_by_video: + soft_by_video[fn] -= pos_by_video[fn] + result = [] for fn in pos_by_video: sp = source_by_filename.get(fn, "") diff --git a/core/export.py b/core/export.py deleted file mode 100644 index 9c3eb89..0000000 --- a/core/export.py +++ /dev/null @@ -1,127 +0,0 @@ -import os -import subprocess -import threading -from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Callable - -from .ffmpeg import build_ffmpeg_command, build_audio_extract_command -from .paths import _log - - -class ExportRunner: - """Run ffmpeg export jobs in a background thread pool. - - Callbacks: - on_clip_done(path: str) - on_all_done() - on_error(msg: str) - """ - - def __init__( - self, - input_path: str, - jobs: list[tuple[float, str, str | None, float]], - short_side: int | None = None, - image_sequence: bool = False, - max_workers: int | None = None, - encoder: str = "libx264", - on_clip_done: Callable[[str], None] | None = None, - on_all_done: Callable[[], None] | None = None, - on_error: Callable[[str], None] | None = None, - ): - self._input = input_path - self._jobs = jobs - self._short_side = short_side - self._image_sequence = image_sequence - self._max_workers = max_workers - self._encoder = encoder - self._on_clip_done = on_clip_done - self._on_all_done = on_all_done - self._on_error = on_error - self._cancel = False - self._procs: list[subprocess.Popen] = [] - self._procs_lock = threading.Lock() - self._thread: threading.Thread | None = None - - def start(self): - self._thread = threading.Thread(target=self._run, daemon=True) - self._thread.start() - - def cancel(self): - self._cancel = True - with self._procs_lock: - for proc in self._procs: - try: - proc.kill() - except OSError: - pass - - def is_running(self) -> bool: - return self._thread is not None and self._thread.is_alive() - - def _run_one(self, start: float, output: str, - portrait_ratio: str | None, crop_center: float) -> str: - if self._cancel: - raise RuntimeError("cancelled") - if self._image_sequence: - os.makedirs(output, exist_ok=True) - cmd = build_ffmpeg_command( - self._input, start, output, - short_side=self._short_side, - portrait_ratio=portrait_ratio, - crop_center=crop_center, - image_sequence=self._image_sequence, - encoder=self._encoder, - ) - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - with self._procs_lock: - self._procs.append(proc) - try: - _, stderr = proc.communicate(timeout=120) - except subprocess.TimeoutExpired: - proc.kill() - raise RuntimeError("ffmpeg timed out") - finally: - with self._procs_lock: - self._procs.remove(proc) - if self._cancel: - raise RuntimeError("cancelled") - if proc.returncode != 0: - msg = stderr.decode(errors='replace')[-500:] if stderr else "ffmpeg failed" - raise RuntimeError(msg) - if self._image_sequence: - audio_cmd = build_audio_extract_command(self._input, start, output) - audio_result = subprocess.run(audio_cmd, capture_output=True, text=True, timeout=60) - if audio_result.returncode != 0: - msg = (audio_result.stderr or "audio extraction failed")[-500:] - raise RuntimeError(msg) - return output - - def _run(self): - cap = self._max_workers or (os.cpu_count() or 2) - workers = min(len(self._jobs), cap) - try: - with ThreadPoolExecutor(max_workers=workers) as pool: - futures = { - pool.submit(self._run_one, s, o, pr, cc): o - for s, o, pr, cc in self._jobs - } - for fut in as_completed(futures): - if self._cancel: - break - try: - path = fut.result() - if self._on_clip_done: - self._on_clip_done(path) - except Exception as e: - if "cancelled" not in str(e) and self._on_error: - self._on_error(str(e)) - return - except Exception as e: - if self._on_error: - self._on_error(str(e)) - return - if self._cancel: - return - if self._on_all_done: - self._on_all_done() diff --git a/docker-compose.yml b/docker-compose.yml deleted file mode 100644 index af816de..0000000 --- a/docker-compose.yml +++ /dev/null @@ -1,24 +0,0 @@ -services: - 8cut: - build: . - ports: - - "8000:8000" - volumes: - - /path/to/videos:/videos:ro - - /path/to/exports:/exports - - 8cut-data:/data - environment: - MEDIA_DIRS: /videos - EXPORT_DIR: /exports - DB_PATH: /data/8cut.db - CACHE_DIR: /data/cache - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [gpu] - -volumes: - 8cut-data: diff --git a/main.py b/main.py index c4e79e4..40ba5fd 100755 --- a/main.py +++ b/main.py @@ -350,6 +350,7 @@ class TrainWorker(QThread): self._video_infos, model_path=self._model_path, embed_model=self._embed_model, + cancel_flag=self, ) if self._cancel: return diff --git a/requirements.txt b/requirements.txt index 8de194c..4a0d9f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,8 +9,9 @@ scikit-learn>=1.3 joblib>=1.3 soundfile>=0.12 -# Deep learning (torch installed separately for CUDA support) -# torch and torchaudio are installed via --index-url in setup_env.sh +# Deep learning — install via setup_env.sh for correct CUDA version, +# or manually: pip install torch torchaudio --index-url https://download.pytorch.org/whl/cu128 +torch>=2.0 torchaudio>=2.0 # Object detection diff --git a/tests/test_utils.py b/tests/test_utils.py index 0d9d9c7..c34297f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,6 @@ import tempfile, os, json -from main import build_export_path, format_time, build_ffmpeg_command, build_sequence_dir, build_audio_extract_command, build_annotation_json_path, upsert_clip_annotation, resolve_keyframe, apply_keyframes_to_jobs +from main import build_export_path, format_time, build_ffmpeg_command, build_sequence_dir, build_audio_extract_command, resolve_keyframe, apply_keyframes_to_jobs +from core.annotations import build_annotation_json_path, upsert_clip_annotation from main import ProcessedDB