feat: calibrate classifier probabilities with isotonic regression
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -560,6 +560,17 @@ def train_classifier(video_infos: list[tuple[str, list[float], list[float]]],
|
|||||||
clf.fit(X[train_idx], y_arr[train_idx])
|
clf.fit(X[train_idx], y_arr[train_idx])
|
||||||
_log("audio_scan: classifier trained")
|
_log("audio_scan: classifier trained")
|
||||||
|
|
||||||
|
# Calibrate probabilities for better threshold behavior
|
||||||
|
from sklearn.calibration import CalibratedClassifierCV
|
||||||
|
min_class = min(int(n_pos), int(n_neg_sample))
|
||||||
|
if min_class >= 6:
|
||||||
|
cal_clf = CalibratedClassifierCV(clf, cv=3, method='isotonic')
|
||||||
|
cal_clf.fit(X[train_idx], y_arr[train_idx])
|
||||||
|
clf = cal_clf
|
||||||
|
_log("audio_scan: classifier calibrated (isotonic, 3-fold)")
|
||||||
|
else:
|
||||||
|
_log(f"audio_scan: skipping calibration (min class size {min_class} < 6)")
|
||||||
|
|
||||||
model = {"classifier": clf, "n_features": X.shape[1],
|
model = {"classifier": clf, "n_features": X.shape[1],
|
||||||
"embed_model": embed_model or _DEFAULT_EMBED_MODEL}
|
"embed_model": embed_model or _DEFAULT_EMBED_MODEL}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user