feat: integrate training UI, BEATs model, and clean up legacy code

- Remove legacy distance-mode scanning (build_profile, _similarity, etc.)
  and hand-crafted intensity features — pipeline is now embedding-only
- Integrate Microsoft BEATs as embedding option alongside wav2vec2/HuBERT
- Add TrainDialog with positive class selector, model picker, video dir
  fallback, and live training stats
- Add TrainWorker QThread with cancel support and proper lifecycle cleanup
- Add source_path column to DB for robust source video tracking
- Add get_export_folders/get_training_data/get_training_stats to DB
- Wire source_path in all export DB writes (_on_clip_done, _on_auto_clip_done)
- Cancel scan/train workers in closeEvent to prevent use-after-free crashes
- Add setup_env.sh supporting both conda and python venv (CUDA 12.8)
- Update requirements.txt with all actual dependencies
- Update 8cut_train.py with --positive flag for new DB-driven training

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-18 11:52:27 +02:00
parent f2c38aee79
commit 12ed183f1b
11 changed files with 2608 additions and 338 deletions
Executable
+108
View File
@@ -0,0 +1,108 @@
#!/usr/bin/env bash
set -euo pipefail
# ──────────────────────────────────────────────────────────────────────
# 8-cut environment setup — supports conda (miniforge) or python venv
#
# Usage:
# ./setup_env.sh # auto-detect (prefers conda if available)
# ./setup_env.sh --conda # force conda
# ./setup_env.sh --venv # force python venv
# ────────────────────────────────────────────────────────────────────
ENV_NAME="8cut"
PYTHON_VERSION="3.12"
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
VENV_DIR="$SCRIPT_DIR/.venv"
# CUDA version for PyTorch index URL
TORCH_INDEX="https://download.pytorch.org/whl/cu128"
# ── Parse args ────────────────────────────────────────────────────────
MODE=""
for arg in "$@"; do
case "$arg" in
--conda) MODE="conda" ;;
--venv) MODE="venv" ;;
*) echo "Unknown arg: $arg"; exit 1 ;;
esac
done
if [ -z "$MODE" ]; then
if command -v conda &>/dev/null; then
MODE="conda"
else
MODE="venv"
fi
echo "Auto-detected mode: $MODE"
fi
# ── Conda setup ──────────────────────────────────────────────────────
setup_conda() {
echo "==> Setting up conda environment: $ENV_NAME"
# Source conda shell hooks if not already active
if ! command -v conda &>/dev/null; then
echo "conda not found in PATH"
exit 1
fi
eval "$(conda shell.bash hook)"
if conda env list | grep -qw "$ENV_NAME"; then
echo " Environment '$ENV_NAME' already exists, updating..."
conda activate "$ENV_NAME"
else
echo " Creating environment '$ENV_NAME' with Python $PYTHON_VERSION..."
conda create -y -n "$ENV_NAME" python="$PYTHON_VERSION"
conda activate "$ENV_NAME"
fi
echo " Installing PyTorch + torchaudio (CUDA 12.8)..."
pip install torch torchaudio --index-url "$TORCH_INDEX"
echo " Installing project dependencies..."
pip install -r "$SCRIPT_DIR/requirements.txt"
echo ""
echo "Done! Activate with:"
echo " conda activate $ENV_NAME"
}
# ── Venv setup ───────────────────────────────────────────────────────
setup_venv() {
echo "==> Setting up Python venv at: $VENV_DIR"
if [ ! -d "$VENV_DIR" ]; then
python3 -m venv "$VENV_DIR"
echo " Created venv"
else
echo " Venv already exists, updating..."
fi
source "$VENV_DIR/bin/activate"
echo " Installing PyTorch + torchaudio (CUDA 12.8)..."
pip install torch torchaudio --index-url "$TORCH_INDEX"
echo " Installing project dependencies..."
pip install -r "$SCRIPT_DIR/requirements.txt"
echo ""
echo "Done! Activate with:"
echo " source $VENV_DIR/bin/activate"
}
# ── Run ───────────────────────────────────────────────────────────────
case "$MODE" in
conda) setup_conda ;;
venv) setup_venv ;;
esac
echo ""
echo "Verify with:"
echo " python -c \"import torch; print('PyTorch', torch.__version__, 'CUDA', torch.version.cuda)\""
echo " python -c \"import librosa, torchaudio, sklearn; print('All imports OK')\""