From 558fa23da4740f9f6ae9f3962b8a47de79cc9b60 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Mon, 6 Apr 2026 12:42:03 +0200 Subject: [PATCH] feat: ProcessedDB and _normalize_filename with tests Co-Authored-By: Claude Sonnet 4.6 --- main.py | 60 ++++++++++++++++++++++++++++++++++++++++++++ tests/test_utils.py | 61 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+) diff --git a/main.py b/main.py index 2203c51..0c2f463 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,10 @@ import sys import os +import re +import sqlite3 import subprocess +from datetime import datetime +from difflib import SequenceMatcher from pathlib import Path from PyQt6.QtWidgets import ( @@ -39,6 +43,62 @@ def build_ffmpeg_command(input_path: str, start: float, output_path: str) -> lis ] +def _normalize_filename(filename: str) -> str: + """Strip extension and common resolution/quality tags for fuzzy comparison.""" + name = os.path.splitext(filename)[0].lower() + name = re.sub( + r'(? None: + if not self._enabled: + return + self._con.execute( + "INSERT INTO processed (filename, processed_at) VALUES (?, ?)", + (filename, datetime.utcnow().isoformat()), + ) + self._con.commit() + + def find_similar(self, filename: str) -> str | None: + if not self._enabled: + return None + rows = self._con.execute( + "SELECT DISTINCT filename FROM processed" + ).fetchall() + norm_new = _normalize_filename(filename) + best_ratio, best_match = 0.0, None + for (stored,) in rows: + ratio = SequenceMatcher( + None, norm_new, _normalize_filename(stored) + ).ratio() + if ratio >= 0.75 and ratio > best_ratio: + best_ratio, best_match = ratio, stored + return best_match + + class ExportWorker(QThread): finished = pyqtSignal(str) # output path error = pyqtSignal(str) # error message diff --git a/tests/test_utils.py b/tests/test_utils.py index 4ad872e..d162034 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,4 +1,6 @@ +import tempfile, os from main import build_export_path, format_time, build_ffmpeg_command +from main import _normalize_filename, ProcessedDB def test_build_export_path_first(): @@ -32,3 +34,62 @@ def test_ffmpeg_command(): assert "-t" in cmd assert "8" in cmd assert cmd[-1] == "/out/clip_001.mp4" + + +# --- _normalize_filename --- + +def test_normalize_strips_extension(): + assert _normalize_filename("clip.mp4") == "clip" + +def test_normalize_strips_resolution(): + assert _normalize_filename("clip_2160p.mp4") == "clip" + +def test_normalize_strips_1080p(): + assert _normalize_filename("clip_1080p.mkv") == "clip" + +def test_normalize_strips_multiple_tags(): + assert _normalize_filename("show_1080p_HDR.mkv") == "show" + +def test_normalize_lowercases(): + assert _normalize_filename("MyVideo_4K.mp4") == "myvideo" + +def test_normalize_collapses_separators(): + assert _normalize_filename("my__video--2160p.mp4") == "my_video" + + +# --- ProcessedDB --- + +def test_db_add_and_find_exact(): + with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: + path = f.name + try: + db = ProcessedDB(path) + db.add("video.mp4") + assert db.find_similar("video.mp4") == "video.mp4" + finally: + os.unlink(path) + +def test_db_find_similar_resolution_variant(): + with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: + path = f.name + try: + db = ProcessedDB(path) + db.add("episode_s01e01_2160p.mkv") + assert db.find_similar("episode_s01e01_1080p.mkv") == "episode_s01e01_2160p.mkv" + finally: + os.unlink(path) + +def test_db_find_similar_no_match(): + with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: + path = f.name + try: + db = ProcessedDB(path) + db.add("alpha.mp4") + assert db.find_similar("completely_different_zzzz.mp4") is None + finally: + os.unlink(path) + +def test_db_disabled_survives_bad_path(): + db = ProcessedDB("/no/such/directory/8cut.db") + db.add("x.mp4") # must not raise + assert db.find_similar("x.mp4") is None # gracefully returns None