fix: bug audit — broken test imports, training data overlap, cleanup

- Fix test_utils.py importing build_annotation_json_path from main
  instead of core.annotations (all 59 tests pass now)
- Fix get_training_data double-counting clips at same start_time
  in both positive and soft sets — subtract positive from soft
- Add cancel_flag to train_classifier so training can be interrupted
  between videos (TrainWorker passes self as cancel_flag)
- Remove orphaned core/export.py (was for deleted server API)
- Remove stale Dockerfile and docker-compose.yml (referenced server)
- Clean up leftover server/__pycache__ and client/ build artifacts
- Add torch to requirements.txt (was only mentioned in comments)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-18 12:55:58 +02:00
parent 7834b1d05c
commit e1789d4e71
8 changed files with 17 additions and 168 deletions
+6 -1
View File
@@ -240,7 +240,8 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
model_path: str | None = None,
tolerance: float = 12.0,
neg_margin: float = 120.0,
embed_model: str | None = None) -> dict:
embed_model: str | None = None,
cancel_flag: object = None) -> dict:
"""Train a classifier from labeled videos.
Args:
@@ -248,6 +249,7 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
model_path: if given, save model to this path
tolerance/neg_margin: labeling parameters
embed_model: embedding model name (e.g. "HUBERT_BASE", "BEATS"), defaults to WAV2VEC2_BASE
cancel_flag: object with _cancel attribute; if set, training aborts early
Returns:
dict with 'classifier', 'embed_model', and metadata, or None on failure.
@@ -257,6 +259,9 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
all_X, all_y = [], []
for vi, (vpath, gt_intense, gt_soft) in enumerate(video_infos):
if cancel_flag and getattr(cancel_flag, '_cancel', False):
_log("audio_scan: training cancelled")
return None
_log(f"audio_scan: training [{vi+1}/{len(video_infos)}] {os.path.basename(vpath)}")
y, _ = librosa.load(vpath, sr=_SR, mono=True)