feat: scan result history — keep N versions per (file, model)

Add scan_timestamp column to scan_results. save_scan_results now inserts with a timestamp and prunes versions beyond max_versions (default 5). get_scan_results returns only the latest version by default, with optional scan_timestamp parameter for loading specific versions. New get_scan_versions method returns available versions for a (file, profile, model) tuple. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-19 15:18:28 +02:00
parent 2614a765d5
commit 4fb2ae144f
2 changed files with 104 additions and 20 deletions
@@ -94,7 +94,8 @@ class ProcessedDB:
            "  score           REAL NOT NULL,"
            "  disabled        INTEGER NOT NULL DEFAULT 0,"
            "  orig_start_time REAL,"
-            "  orig_end_time   REAL"
+            "  orig_end_time   REAL,"
+            "  scan_timestamp  TEXT NOT NULL DEFAULT ''"
            ")"
        )
        # Migrate: add new columns to existing scan_results tables
@@ -106,6 +107,7 @@ class ProcessedDB:
            ("disabled",        "INTEGER NOT NULL DEFAULT 0"),
            ("orig_start_time", "REAL"),
            ("orig_end_time",   "REAL"),
+            ("scan_timestamp",  "TEXT NOT NULL DEFAULT ''"),
        ]:
            if col not in sr_cols:
                self._con.execute(
@@ -480,45 +482,100 @@ class ProcessedDB:
    # ── Scan results ─────────────────────────────────────────────

    def save_scan_results(self, filename: str, profile: str, model: str,
-                          regions: list[tuple[float, float, float]]) -> None:
-        """Replace scan results for (filename, profile, model) with new regions.
+                          regions: list[tuple[float, float, float]],
+                          max_versions: int = 5) -> None:
+        """Save scan results as a new version for (filename, profile, model).

        regions: list of (start_time, end_time, score).
+        Keeps up to max_versions; oldest are pruned automatically.
        """
        if not self._enabled:
            return
+        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
        with self._lock:
-            self._con.execute(
-                "DELETE FROM scan_results"
-                " WHERE filename = ? AND profile = ? AND model = ?",
-                (filename, profile, model),
-            )
            self._con.executemany(
                "INSERT INTO scan_results"
                " (filename, profile, model, start_time, end_time, score,"
-                "  orig_start_time, orig_end_time)"
-                " VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
-                [(filename, profile, model, s, e, sc, s, e) for s, e, sc in regions],
+                "  orig_start_time, orig_end_time, scan_timestamp)"
+                " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)",
+                [(filename, profile, model, s, e, sc, s, e, ts)
+                 for s, e, sc in regions],
            )
+            # Prune old versions beyond max_versions
+            versions = self._con.execute(
+                "SELECT DISTINCT scan_timestamp FROM scan_results"
+                " WHERE filename = ? AND profile = ? AND model = ?"
+                " ORDER BY scan_timestamp DESC",
+                (filename, profile, model),
+            ).fetchall()
+            if len(versions) > max_versions:
+                old_ts = [v[0] for v in versions[max_versions:]]
+                self._con.execute(
+                    "DELETE FROM scan_results"
+                    " WHERE filename = ? AND profile = ? AND model = ?"
+                    f" AND scan_timestamp IN ({','.join('?' * len(old_ts))})",
+                    (filename, profile, model, *old_ts),
+                )
            self._con.commit()

-    def get_scan_results(self, filename: str, profile: str
+    def get_scan_versions(self, filename: str, profile: str, model: str
+                          ) -> list[dict]:
+        """Return list of scan versions for (filename, profile, model).
+
+        Returns [{timestamp, count, max_score}, ...] ordered newest first.
+        """
+        if not self._enabled:
+            return []
+        rows = self._con.execute(
+            "SELECT scan_timestamp, COUNT(*), MAX(score)"
+            " FROM scan_results"
+            " WHERE filename = ? AND profile = ? AND model = ?"
+            "   AND scan_timestamp != ''"
+            " GROUP BY scan_timestamp"
+            " ORDER BY scan_timestamp DESC",
+            (filename, profile, model),
+        ).fetchall()
+        return [{"timestamp": ts, "count": cnt, "max_score": sc}
+                for ts, cnt, sc in rows]
+
+    def get_scan_results(self, filename: str, profile: str,
+                         scan_timestamp: str | None = None
                         ) -> dict[str, list[tuple[int, float, float, float, bool, float, float]]]:
        """Return scan results grouped by model.

+        If scan_timestamp is given, returns only that version's rows.
+        Otherwise returns the latest version per model.
+
        Returns {model: [(row_id, start, end, score, disabled, orig_start, orig_end), ...]}
        sorted by start_time.
        """
        if not self._enabled:
            return {}
-        rows = self._con.execute(
-            "SELECT id, model, start_time, end_time, score, disabled,"
-            "       orig_start_time, orig_end_time"
-            " FROM scan_results"
-            " WHERE filename = ? AND profile = ?"
-            " ORDER BY model, start_time",
-            (filename, profile),
-        ).fetchall()
+        if scan_timestamp:
+            rows = self._con.execute(
+                "SELECT id, model, start_time, end_time, score, disabled,"
+                "       orig_start_time, orig_end_time"
+                " FROM scan_results"
+                " WHERE filename = ? AND profile = ? AND scan_timestamp = ?"
+                " ORDER BY model, start_time",
+                (filename, profile, scan_timestamp),
+            ).fetchall()
+        else:
+            # For each model, get rows from the latest timestamp only
+            rows = self._con.execute(
+                "SELECT r.id, r.model, r.start_time, r.end_time, r.score,"
+                "       r.disabled, r.orig_start_time, r.orig_end_time"
+                " FROM scan_results r"
+                " INNER JOIN ("
+                "   SELECT model, MAX(scan_timestamp) AS latest"
+                "   FROM scan_results"
+                "   WHERE filename = ? AND profile = ?"
+                "   GROUP BY model"
+                " ) m ON r.model = m.model AND r.scan_timestamp = m.latest"
+                " WHERE r.filename = ? AND r.profile = ?"
+                " ORDER BY r.model, r.start_time",
+                (filename, profile, filename, profile),
+            ).fetchall()
        result: dict[str, list[tuple[int, float, float, float, bool, float, float]]] = {}
        for row_id, model, s, e, sc, dis, os_, oe in rows:
            # Fall back to current bounds for legacy rows without orig