feat: add audio_scan module with build_profile

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-17 08:48:18 +02:00
parent b1980de6d1
commit e17d8f67aa
2 changed files with 110 additions and 0 deletions
@@ -0,0 +1,40 @@
+"""Audio similarity scanning — MFCC-based profile matching."""
+
+import numpy as np
+import librosa
+
+from .paths import _log
+
+_N_MFCC = 20
+_SR = 22050
+
+
+def _extract_mfcc(path: str, sr: int = _SR) -> np.ndarray:
+    """Load audio from a file and return a mean MFCC vector (20-dim)."""
+    y, _ = librosa.load(path, sr=sr, mono=True)
+    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=_N_MFCC)
+    return mfcc.mean(axis=1)  # average over time → (20,)
+
+
+def build_profile(clip_paths: list[str]) -> dict | None:
+    """Extract MFCCs from reference clips.
+
+    Returns dict with:
+      - mean_vector: averaged MFCC across all clips (20,)
+      - clip_vectors: list of individual MFCC vectors
+    Returns None if no clips could be loaded.
+    """
+    vectors = []
+    for p in clip_paths:
+        try:
+            vec = _extract_mfcc(p)
+            vectors.append(vec)
+        except Exception as e:
+            _log(f"audio_scan: skip {p}: {e}")
+    if not vectors:
+        return None
+    arr = np.stack(vectors)
+    return {
+        "mean_vector": arr.mean(axis=0),
+        "clip_vectors": vectors,
+    }
@@ -0,0 +1,70 @@
+import tempfile, os
+import numpy as np
+from core.audio_scan import build_profile, _extract_mfcc
+
+
+def _make_wav(path: str, duration: float = 8.0, sr: int = 22050):
+    """Create a short sine-wave WAV file for testing."""
+    import soundfile as sf
+    t = np.linspace(0, duration, int(sr * duration), endpoint=False)
+    audio = 0.5 * np.sin(2 * np.pi * 440 * t)
+    sf.write(path, audio, sr)
+
+
+def test_extract_mfcc_returns_1d_vector():
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        _make_wav(f.name)
+    try:
+        vec = _extract_mfcc(f.name)
+        assert vec.shape == (20,)
+        assert not np.isnan(vec).any()
+    finally:
+        os.unlink(f.name)
+
+
+def test_build_profile_single_clip():
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        _make_wav(f.name)
+    try:
+        profile = build_profile([f.name])
+        assert "mean_vector" in profile
+        assert "clip_vectors" in profile
+        assert profile["mean_vector"].shape == (20,)
+        assert len(profile["clip_vectors"]) == 1
+    finally:
+        os.unlink(f.name)
+
+
+def test_build_profile_multiple_clips():
+    paths = []
+    try:
+        for i in range(3):
+            f = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+            freq = 440 + i * 200
+            import soundfile as sf
+            t = np.linspace(0, 8.0, 22050 * 8, endpoint=False)
+            sf.write(f.name, 0.5 * np.sin(2 * np.pi * freq * t), 22050)
+            paths.append(f.name)
+            f.close()
+
+        profile = build_profile(paths)
+        assert len(profile["clip_vectors"]) == 3
+        assert profile["mean_vector"].shape == (20,)
+    finally:
+        for p in paths:
+            os.unlink(p)
+
+
+def test_build_profile_skips_missing_files():
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+        _make_wav(f.name)
+    try:
+        profile = build_profile([f.name, "/no/such/file.wav"])
+        assert len(profile["clip_vectors"]) == 1
+    finally:
+        os.unlink(f.name)
+
+
+def test_build_profile_empty_returns_none():
+    result = build_profile([])
+    assert result is None