fix: bug audit — broken test imports, training data overlap, cleanup
- Fix test_utils.py importing build_annotation_json_path from main instead of core.annotations (all 59 tests pass now) - Fix get_training_data double-counting clips at same start_time in both positive and soft sets — subtract positive from soft - Add cancel_flag to train_classifier so training can be interrupted between videos (TrainWorker passes self as cancel_flag) - Remove orphaned core/export.py (was for deleted server API) - Remove stale Dockerfile and docker-compose.yml (referenced server) - Clean up leftover server/__pycache__ and client/ build artifacts - Add torch to requirements.txt (was only mentioned in comments) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
+6
-1
@@ -240,7 +240,8 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
|
||||
model_path: str | None = None,
|
||||
tolerance: float = 12.0,
|
||||
neg_margin: float = 120.0,
|
||||
embed_model: str | None = None) -> dict:
|
||||
embed_model: str | None = None,
|
||||
cancel_flag: object = None) -> dict:
|
||||
"""Train a classifier from labeled videos.
|
||||
|
||||
Args:
|
||||
@@ -248,6 +249,7 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
|
||||
model_path: if given, save model to this path
|
||||
tolerance/neg_margin: labeling parameters
|
||||
embed_model: embedding model name (e.g. "HUBERT_BASE", "BEATS"), defaults to WAV2VEC2_BASE
|
||||
cancel_flag: object with _cancel attribute; if set, training aborts early
|
||||
|
||||
Returns:
|
||||
dict with 'classifier', 'embed_model', and metadata, or None on failure.
|
||||
@@ -257,6 +259,9 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
|
||||
all_X, all_y = [], []
|
||||
|
||||
for vi, (vpath, gt_intense, gt_soft) in enumerate(video_infos):
|
||||
if cancel_flag and getattr(cancel_flag, '_cancel', False):
|
||||
_log("audio_scan: training cancelled")
|
||||
return None
|
||||
_log(f"audio_scan: training [{vi+1}/{len(video_infos)}] {os.path.basename(vpath)}")
|
||||
y, _ = librosa.load(vpath, sr=_SR, mono=True)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user