From 8ab5bdba77e18d6f84e947993303ce4a5575360e Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Fri, 17 Apr 2026 09:27:11 +0200 Subject: [PATCH] fix: use mean+std MFCC vectors (40-dim) for better discrimination Mean-only vectors were too similar across different audio segments, causing everything to match even at threshold 0.99. Adding std captures temporal dynamics and makes the similarity scores much more spread out. Co-Authored-By: Claude Opus 4.6 --- core/audio_scan.py | 10 +++++++--- tests/test_audio_scan.py | 6 +++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/core/audio_scan.py b/core/audio_scan.py index 43eeff0..bc615e8 100644 --- a/core/audio_scan.py +++ b/core/audio_scan.py @@ -10,10 +10,14 @@ _SR = 22050 def _extract_mfcc(path: str, sr: int = _SR) -> np.ndarray: - """Load audio from a file and return a mean MFCC vector (20-dim).""" + """Load audio from a file and return an MFCC feature vector (40-dim). + + Concatenates mean + std of each coefficient over time. + Mean captures average spectral content; std captures dynamics. + """ y, _ = librosa.load(path, sr=sr, mono=True) mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=_N_MFCC) - return mfcc.mean(axis=1) # average over time → (20,) + return np.concatenate([mfcc.mean(axis=1), mfcc.std(axis=1)]) def build_profile(clip_paths: list[str]) -> dict | None: @@ -95,7 +99,7 @@ def scan_video( chunk = y[pos : pos + win_samples] mfcc = librosa.feature.mfcc(y=chunk, sr=sr, n_mfcc=_N_MFCC) - vec = mfcc.mean(axis=1) + vec = np.concatenate([mfcc.mean(axis=1), mfcc.std(axis=1)]) if mode == "nearest": score = max( diff --git a/tests/test_audio_scan.py b/tests/test_audio_scan.py index 52e534c..b7961aa 100644 --- a/tests/test_audio_scan.py +++ b/tests/test_audio_scan.py @@ -16,7 +16,7 @@ def test_extract_mfcc_returns_1d_vector(): _make_wav(f.name) try: vec = _extract_mfcc(f.name) - assert vec.shape == (20,) + assert vec.shape == (40,) assert not np.isnan(vec).any() finally: os.unlink(f.name) @@ -29,7 +29,7 @@ def test_build_profile_single_clip(): profile = build_profile([f.name]) assert "mean_vector" in profile assert "clip_vectors" in profile - assert profile["mean_vector"].shape == (20,) + assert profile["mean_vector"].shape == (40,) assert len(profile["clip_vectors"]) == 1 finally: os.unlink(f.name) @@ -49,7 +49,7 @@ def test_build_profile_multiple_clips(): profile = build_profile(paths) assert len(profile["clip_vectors"]) == 3 - assert profile["mean_vector"].shape == (20,) + assert profile["mean_vector"].shape == (40,) finally: for p in paths: os.unlink(p)