feat: vid folder layout, changelog popup, shift-to-resize, DB migration

- Export layout changed from clip_NNN group dirs to vid_NNN per-video folders
- Automatic DB migration rewrites old paths and moves files on startup
- Per-video counter with DB cross-check to prevent overwrites
- Changelog popup on version bump with "don't show again" checkbox
- Scan region resize now requires Shift+drag to prevent accidental edits
- Recalculate vid folder and counter on file load
- Add EAT_LARGE embedding model variant
- Update tests for new flat export path structure

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-19 17:01:37 +02:00
parent d8b3972bdc
commit 6c1d42adfe
7 changed files with 509 additions and 816 deletions
+8 -5
View File
@@ -65,6 +65,7 @@ _EMBED_MODELS = {
"AST": 768,
"AST_ML": 3072, # 768 * 4
"EAT": 768,
"EAT_LARGE": 1024,
}
_DEFAULT_EMBED_MODEL = "WAV2VEC2_BASE"
@@ -104,11 +105,13 @@ def _get_w2v_model(model_name: str | None = None):
_ast_feature_extractor = ASTFeatureExtractor.from_pretrained(
"MIT/ast-finetuned-audioset-10-10-0.4593"
)
elif load_name == "EAT":
elif load_name in ("EAT", "EAT_LARGE"):
from transformers import AutoModel
eat_repo = ("worstchan/EAT-large_epoch20_finetune_AS2M"
if load_name == "EAT_LARGE"
else "worstchan/EAT-base_epoch30_finetune_AS2M")
_w2v_model = AutoModel.from_pretrained(
"worstchan/EAT-base_epoch30_finetune_AS2M",
trust_remote_code=True,
eat_repo, trust_remote_code=True,
).to(_w2v_device)
else:
import torchaudio
@@ -254,7 +257,7 @@ def _extract_w2v_windows(y: np.ndarray, sr: int = _SR,
model, device = _get_w2v_model(model_name)
is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
is_ast = (model_name or _DEFAULT_EMBED_MODEL) in ("AST", "AST_ML")
is_eat = (model_name or _DEFAULT_EMBED_MODEL) == "EAT"
is_eat = (model_name or _DEFAULT_EMBED_MODEL) in ("EAT", "EAT_LARGE")
ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)
# Auto-size batches based on available GPU memory
batch_size = 16
@@ -383,7 +386,7 @@ def _extract_w2v_targeted(y: np.ndarray, sr: int, gt_intense: list[float],
is_beats = (model_name or _DEFAULT_EMBED_MODEL) == "BEATS"
is_ast = (model_name or _DEFAULT_EMBED_MODEL) in ("AST", "AST_ML")
is_eat = (model_name or _DEFAULT_EMBED_MODEL) == "EAT"
is_eat = (model_name or _DEFAULT_EMBED_MODEL) in ("EAT", "EAT_LARGE")
ml_cfg = _ml_config(model_name or _DEFAULT_EMBED_MODEL)
for batch_start in range(0, len(valid_times), batch_size):