From f5361a963e6427f6aba2fccc2f2fbaaa7cc2cda9 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sun, 19 Apr 2026 14:00:38 +0200 Subject: [PATCH] feat: calibrate classifier probabilities with isotonic regression Co-Authored-By: Claude Opus 4.6 --- core/audio_scan.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/core/audio_scan.py b/core/audio_scan.py index 833e7b2..2267abe 100644 --- a/core/audio_scan.py +++ b/core/audio_scan.py @@ -560,6 +560,17 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]], clf.fit(X[train_idx], y_arr[train_idx]) _log("audio_scan: classifier trained") + # Calibrate probabilities for better threshold behavior + from sklearn.calibration import CalibratedClassifierCV + min_class = min(int(n_pos), int(n_neg_sample)) + if min_class >= 6: + cal_clf = CalibratedClassifierCV(clf, cv=3, method='isotonic') + cal_clf.fit(X[train_idx], y_arr[train_idx]) + clf = cal_clf + _log("audio_scan: classifier calibrated (isotonic, 3-fold)") + else: + _log(f"audio_scan: skipping calibration (min class size {min_class} < 6)") + model = {"classifier": clf, "n_features": X.shape[1], "embed_model": embed_model or _DEFAULT_EMBED_MODEL}