Add in-memory metadata cache to avoid redundant disk I/O

List, prune, and delete operations now use a lightweight in-memory cache of snapshot metadata (everything except graphData). Only get_full_record() and update_meta() hit disk after warm-up, keeping sidebar loads and auto-capture prune cycles fast. Key changes: - snapshot_storage.py: cache layer (_cache, _cache_warmed, _extract_meta, _ensure_cached), new get_full_record() and update_meta() functions, all existing functions updated to use cache - snapshot_routes.py: new /get and /update-meta endpoints - snapshot_manager.js: db_getFullRecord() and db_updateMeta() helpers, lazy graphData fetch in restore/swap/diff/preview/tooltip, label/notes/ lock use update_meta instead of full put to preserve graphData on disk Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-26 10:57:41 +01:00
parent ab3bbc7f71
commit 7518821447
3 changed files with 267 additions and 56 deletions
@@ -132,6 +132,38 @@ async function db_getAllWorkflowKeys() {
    }
 }
 async function db_updateMeta(workflowKey, id, fields) {
    try {
        const resp = await api.fetchApi("/snapshot-manager/update-meta", {
            method: "POST",
            headers: { "Content-Type": "application/json" },
            body: JSON.stringify({ workflowKey, id, fields }),
        });
        if (!resp.ok) {
            const err = await resp.json();
            throw new Error(err.error || resp.statusText);
        }
    } catch (err) {
        console.warn(`[${EXTENSION_NAME}] Update meta failed:`, err);
        showToast("Failed to update snapshot", "error");
    }
 }
 async function db_getFullRecord(workflowKey, id) {
    try {
        const resp = await api.fetchApi("/snapshot-manager/get", {
            method: "POST",
            headers: { "Content-Type": "application/json" },
            body: JSON.stringify({ workflowKey, id }),
        });
        if (!resp.ok) return null;
        return await resp.json();
    } catch (err) {
        console.warn(`[${EXTENSION_NAME}] Get full record failed:`, err);
        return null;
    }
 }
 async function pruneSnapshots(workflowKey) {
    try {
        const resp = await api.fetchApi("/snapshot-manager/prune", {
@@ -1115,7 +1147,13 @@ function showDiffModal(baseLabel, targetLabel, diff, allNodes, baseGraphData, ta
 // ─── Preview Modal ──────────────────────────────────────────────────
-function showPreviewModal(record) {
+async function showPreviewModal(record) {
    if (!record.graphData) {
        const full = await db_getFullRecord(record.workflowKey, record.id);
        if (!full) { showToast("Failed to load snapshot data", "error"); return; }
        record = full;
    }
    const overlay = document.createElement("div");
    overlay.className = "snap-preview-overlay";
@@ -1281,6 +1319,11 @@ function scheduleCaptureSnapshot() {
 // ─── Restore ─────────────────────────────────────────────────────────
 async function restoreSnapshot(record) {
    if (!record.graphData) {
        const full = await db_getFullRecord(record.workflowKey, record.id);
        if (!full) { showToast("Failed to load snapshot data", "error"); return; }
        record = full;
    }
    await withRestoreLock(async () => {
        if (!validateSnapshotData(record.graphData)) {
            showToast("Invalid snapshot data", "error");
@@ -1316,6 +1359,12 @@ async function swapSnapshot(record) {
        currentSnapshotId = capturedId || prevCurrentId;
    }
    if (!record.graphData) {
        const full = await db_getFullRecord(record.workflowKey, record.id);
        if (!full) { showToast("Failed to load snapshot data", "error"); return; }
        record = full;
    }
    await withRestoreLock(async () => {
        if (!validateSnapshotData(record.graphData)) {
            showToast("Invalid snapshot data", "error");
@@ -2435,7 +2484,7 @@ async function buildSidebar(el) {
                    const newLabel = input.value.trim() || originalLabel;
                    if (newLabel !== originalLabel) {
                        rec.label = newLabel;
-                        await db_put(rec);
+                        await db_updateMeta(rec.workflowKey, rec.id, { label: newLabel });
                        await refresh();
                    } else {
                        labelDiv.textContent = originalLabel;
@@ -2515,7 +2564,7 @@ async function buildSidebar(el) {
                    saved = true;
                    const newNotes = textarea.value.trim();
                    rec.notes = newNotes || undefined;
-                    await db_put(rec);
+                    await db_updateMeta(rec.workflowKey, rec.id, { notes: newNotes || null });
                    await refresh();
                };
                textarea.addEventListener("keydown", (ev) => {
@@ -2533,7 +2582,7 @@ async function buildSidebar(el) {
            lockBtn.title = rec.locked ? "Unlock snapshot" : "Lock snapshot";
            lockBtn.addEventListener("click", async () => {
                rec.locked = !rec.locked;
-                await db_put(rec);
+                await db_updateMeta(rec.workflowKey, rec.id, { locked: rec.locked });
                await refresh();
            });
@@ -2592,26 +2641,31 @@ async function buildSidebar(el) {
                    refresh();
                    return;
                }
-                // Normal click
+                // Normal click — async to allow lazy graphData fetch
-                let baseGraph, targetGraph, baseLabel, targetLabel;
+                (async () => {
-                if (diffBaseSnapshot && diffBaseSnapshot.id !== rec.id) {
+                    let baseGraph, targetGraph, baseLabel, targetLabel;
-                    // Two-snapshot compare: base vs this
+                    if (diffBaseSnapshot && diffBaseSnapshot.id !== rec.id) {
-                    baseGraph = diffBaseSnapshot.graphData || {};
+                        // Two-snapshot compare: base vs this
-                    targetGraph = rec.graphData || {};
+                        const baseFull = diffBaseSnapshot.graphData ? diffBaseSnapshot : await db_getFullRecord(diffBaseSnapshot.workflowKey, diffBaseSnapshot.id);
-                    baseLabel = diffBaseSnapshot.label;
+                        const targetFull = rec.graphData ? rec : await db_getFullRecord(rec.workflowKey, rec.id);
-                    targetLabel = rec.label;
+                        baseGraph = (baseFull && baseFull.graphData) || {};
-                    diffBaseSnapshot = null;
+                        targetGraph = (targetFull && targetFull.graphData) || {};
-                    refresh(); // clear highlight
+                        baseLabel = diffBaseSnapshot.label;
-                } else {
+                        targetLabel = rec.label;
-                    // Compare this snapshot vs current live workflow
+                        diffBaseSnapshot = null;
-                    baseGraph = rec.graphData || {};
+                        refresh(); // clear highlight
-                    targetGraph = getGraphData() || {};
+                    } else {
-                    baseLabel = rec.label;
+                        // Compare this snapshot vs current live workflow
-                    targetLabel = "Current Workflow";
+                        const full = rec.graphData ? rec : await db_getFullRecord(rec.workflowKey, rec.id);
-                }
+                        baseGraph = (full && full.graphData) || {};
-                const diff = computeDetailedDiff(baseGraph, targetGraph);
+                        targetGraph = getGraphData() || {};
-                const allNodes = buildNodeLookup(baseGraph, targetGraph);
+                        baseLabel = rec.label;
-                showDiffModal(baseLabel, targetLabel, diff, allNodes, baseGraph, targetGraph);
+                        targetLabel = "Current Workflow";
                    }
                    const diff = computeDetailedDiff(baseGraph, targetGraph);
                    const allNodes = buildNodeLookup(baseGraph, targetGraph);
                    showDiffModal(baseLabel, targetLabel, diff, allNodes, baseGraph, targetGraph);
                })();
            });
            const previewBtn = document.createElement("button");
@@ -2633,8 +2687,16 @@ async function buildSidebar(el) {
            // Hover tooltip
            item.addEventListener("mouseenter", () => {
-                tooltipTimer = setTimeout(() => {
+                tooltipTimer = setTimeout(async () => {
-                    const svg = getCachedSVG(rec.id, rec.graphData, { width: 240, height: 180 });
+                    const svgCacheKey = `${rec.id}:240x180`;
                    let graphData = rec.graphData;
                    if (!graphData && !svgCache.has(svgCacheKey)) {
                        const full = await db_getFullRecord(rec.workflowKey, rec.id);
                        if (!full || !tooltipTimer) return; // abort if mouse already left
                        graphData = full.graphData;
                    }
                    if (!tooltipTimer) return; // abort if mouse left during fetch
                    const svg = getCachedSVG(rec.id, graphData, { width: 240, height: 180 });
                    if (!svg) return;
                    tooltip.innerHTML = "";
                    tooltip.appendChild(svg);
@@ -40,6 +40,43 @@ async def list_snapshots(request):
        return web.json_response({"error": str(e)}, status=500)
@routes.post("/snapshot-manager/get")
 async def get_snapshot(request):
    try:
        data = await request.json()
        workflow_key = data.get("workflowKey")
        snapshot_id = data.get("id")
        if not workflow_key or not snapshot_id:
            return web.json_response({"error": "Missing workflowKey or id"}, status=400)
        record = storage.get_full_record(workflow_key, snapshot_id)
        if record is None:
            return web.json_response({"error": "Not found"}, status=404)
        return web.json_response(record)
    except ValueError as e:
        return web.json_response({"error": str(e)}, status=400)
    except Exception as e:
        return web.json_response({"error": str(e)}, status=500)
@routes.post("/snapshot-manager/update-meta")
 async def update_snapshot_meta(request):
    try:
        data = await request.json()
        workflow_key = data.get("workflowKey")
        snapshot_id = data.get("id")
        fields = data.get("fields")
        if not workflow_key or not snapshot_id or not isinstance(fields, dict):
            return web.json_response({"error": "Missing workflowKey, id, or fields"}, status=400)
        ok = storage.update_meta(workflow_key, snapshot_id, fields)
        if not ok:
            return web.json_response({"error": "Not found"}, status=404)
        return web.json_response({"ok": True})
    except ValueError as e:
        return web.json_response({"error": str(e)}, status=400)
    except Exception as e:
        return web.json_response({"error": str(e)}, status=500)
@routes.post("/snapshot-manager/delete")
 async def delete_snapshot(request):
    try:
@@ -5,6 +5,9 @@ Stores each snapshot as an individual JSON file under:
    <extension_dir>/data/snapshots/<encoded_workflow_key>/<id>.json
 Workflow keys are percent-encoded for filesystem safety.
 An in-memory metadata cache avoids redundant disk reads for list/prune/delete
 operations.  Only get_full_record() reads a file from disk after warm-up.
 """
 import json
@@ -13,6 +16,40 @@ import urllib.parse
 _DATA_DIR = os.path.join(os.path.dirname(__file__), "data", "snapshots")
 # ─── In-memory metadata cache ────────────────────────────────────────
 # Maps workflow_key -> list of metadata dicts (sorted by timestamp asc).
 # Metadata is everything *except* graphData.
 _cache = {}
 _cache_warmed = set()  # workflow keys already loaded from disk
 def _extract_meta(record):
    """Return a lightweight copy of *record* without graphData."""
    return {k: v for k, v in record.items() if k != "graphData"}
 def _ensure_cached(workflow_key):
    """Warm the cache for *workflow_key* if not already loaded. Return cached list."""
    if workflow_key not in _cache_warmed:
        d = _workflow_dir(workflow_key)
        entries = []
        if os.path.isdir(d):
            for fname in os.listdir(d):
                if not fname.endswith(".json"):
                    continue
                path = os.path.join(d, fname)
                try:
                    with open(path, "r", encoding="utf-8") as f:
                        entries.append(_extract_meta(json.load(f)))
                except (json.JSONDecodeError, OSError):
                    continue
        entries.sort(key=lambda r: r.get("timestamp", 0))
        _cache[workflow_key] = entries
        _cache_warmed.add(workflow_key)
    return _cache.get(workflow_key, [])
 # ─── Helpers ─────────────────────────────────────────────────────────
 def _workflow_dir(workflow_key):
    encoded = urllib.parse.quote(workflow_key, safe="")
@@ -24,8 +61,10 @@ def _validate_id(snapshot_id):
        raise ValueError(f"Invalid snapshot id: {snapshot_id!r}")
 # ─── Public API ──────────────────────────────────────────────────────
 def put(record):
-    """Write one snapshot record to disk."""
+    """Write one snapshot record to disk and update the cache."""
    snapshot_id = record["id"]
    workflow_key = record["workflowKey"]
    _validate_id(snapshot_id)
@@ -35,33 +74,82 @@ def put(record):
    with open(path, "w", encoding="utf-8") as f:
        json.dump(record, f, separators=(",", ":"))
    # Update cache only if already warmed; otherwise _ensure_cached will
    # pick up the new file from disk on next read.
    if workflow_key in _cache_warmed:
        meta = _extract_meta(record)
        cached = _cache[workflow_key]
        cached[:] = [e for e in cached if e.get("id") != snapshot_id]
        cached.append(meta)
        cached.sort(key=lambda r: r.get("timestamp", 0))
 def get_all_for_workflow(workflow_key):
-    """Return all snapshots for a workflow, sorted ascending by timestamp."""
+    """Return all snapshot metadata for a workflow (no graphData), sorted ascending by timestamp."""
-    d = _workflow_dir(workflow_key)
+    return [dict(e) for e in _ensure_cached(workflow_key)]
-    if not os.path.isdir(d):
+
-        return []
+
-    results = []
+def get_full_record(workflow_key, snapshot_id):
-    for fname in os.listdir(d):
+    """Read a single snapshot file from disk (with graphData). Returns dict or None."""
-        if not fname.endswith(".json"):
+    _validate_id(snapshot_id)
-            continue
+    path = os.path.join(_workflow_dir(workflow_key), f"{snapshot_id}.json")
-        path = os.path.join(d, fname)
+    if not os.path.isfile(path):
-        try:
+        return None
-            with open(path, "r", encoding="utf-8") as f:
+    try:
-                results.append(json.load(f))
+        with open(path, "r", encoding="utf-8") as f:
-        except (json.JSONDecodeError, OSError):
+            return json.load(f)
-            continue
+    except (json.JSONDecodeError, OSError):
-    results.sort(key=lambda r: r.get("timestamp", 0))
+        return None
-    return results
+
 def update_meta(workflow_key, snapshot_id, fields):
    """Merge *fields* into an existing snapshot on disk without touching graphData.
    Returns True on success, False if the file does not exist.
    """
    _validate_id(snapshot_id)
    path = os.path.join(_workflow_dir(workflow_key), f"{snapshot_id}.json")
    if not os.path.isfile(path):
        return False
    with open(path, "r", encoding="utf-8") as f:
        record = json.load(f)
    # Merge fields; None values remove the key
    for k, v in fields.items():
        if v is None:
            record.pop(k, None)
        else:
            record[k] = v
    with open(path, "w", encoding="utf-8") as f:
        json.dump(record, f, separators=(",", ":"))
    # Update cache entry
    for entry in _cache.get(workflow_key, []):
        if entry.get("id") == snapshot_id:
            for k, v in fields.items():
                if k == "graphData":
                    continue
                if v is None:
                    entry.pop(k, None)
                else:
                    entry[k] = v
            break
    return True
 def delete(workflow_key, snapshot_id):
-    """Remove one snapshot file. Cleans up empty workflow dir."""
+    """Remove one snapshot file and its cache entry. Cleans up empty workflow dir."""
    _validate_id(snapshot_id)
    d = _workflow_dir(workflow_key)
    path = os.path.join(d, f"{snapshot_id}.json")
    if os.path.isfile(path):
        os.remove(path)
    # Update cache
    if workflow_key in _cache:
        _cache[workflow_key] = [e for e in _cache[workflow_key] if e.get("id") != snapshot_id]
        if not _cache[workflow_key]:
            del _cache[workflow_key]
            _cache_warmed.discard(workflow_key)
    # Clean up empty directory
    if os.path.isdir(d) and not os.listdir(d):
        os.rmdir(d)
@@ -69,18 +157,28 @@ def delete(workflow_key, snapshot_id):
 def delete_all_for_workflow(workflow_key):
    """Delete all unlocked snapshots for a workflow. Returns {lockedCount}."""
-    records = get_all_for_workflow(workflow_key)
+    entries = _ensure_cached(workflow_key)
    locked = []
    locked_count = 0
-    for rec in records:
+    d = _workflow_dir(workflow_key)
    for rec in entries:
        if rec.get("locked"):
            locked_count += 1
            locked.append(rec)
        else:
            _validate_id(rec["id"])
-            path = os.path.join(_workflow_dir(workflow_key), f"{rec['id']}.json")
+            path = os.path.join(d, f"{rec['id']}.json")
            if os.path.isfile(path):
                os.remove(path)
    # Update cache to locked-only
    if locked:
        _cache[workflow_key] = locked
    else:
        _cache.pop(workflow_key, None)
        _cache_warmed.discard(workflow_key)
    # Clean up empty directory
    d = _workflow_dir(workflow_key)
    if os.path.isdir(d) and not os.listdir(d):
        os.rmdir(d)
    return {"lockedCount": locked_count}
@@ -95,11 +193,11 @@ def get_all_workflow_keys():
        subdir = os.path.join(_DATA_DIR, encoded_name)
        if not os.path.isdir(subdir):
            continue
        count = sum(1 for f in os.listdir(subdir) if f.endswith(".json"))
        if count == 0:
            continue
        workflow_key = urllib.parse.unquote(encoded_name)
-        results.append({"workflowKey": workflow_key, "count": count})
+        entries = _ensure_cached(workflow_key)
        if not entries:
            continue
        results.append({"workflowKey": workflow_key, "count": len(entries)})
    results.sort(key=lambda r: r["workflowKey"])
    return results
@@ -112,22 +210,36 @@ def prune(workflow_key, max_snapshots, source=None):
      - "regular": only prune records where source is absent or not "node"
      - None: prune all unlocked (existing behavior)
    """
-    records = get_all_for_workflow(workflow_key)
+    entries = _ensure_cached(workflow_key)
    if source == "node":
-        candidates = [r for r in records if not r.get("locked") and r.get("source") == "node"]
+        candidates = [r for r in entries if not r.get("locked") and r.get("source") == "node"]
    elif source == "regular":
-        candidates = [r for r in records if not r.get("locked") and r.get("source") != "node"]
+        candidates = [r for r in entries if not r.get("locked") and r.get("source") != "node"]
    else:
-        candidates = [r for r in records if not r.get("locked")]
+        candidates = [r for r in entries if not r.get("locked")]
    if len(candidates) <= max_snapshots:
        return 0
    to_delete = candidates[: len(candidates) - max_snapshots]
    d = _workflow_dir(workflow_key)
    deleted = 0
    delete_ids = set()
    for rec in to_delete:
        _validate_id(rec["id"])
        path = os.path.join(d, f"{rec['id']}.json")
        if os.path.isfile(path):
            os.remove(path)
            deleted += 1
            delete_ids.add(rec["id"])
    # Update cache
    if delete_ids and workflow_key in _cache:
        _cache[workflow_key] = [e for e in _cache[workflow_key] if e.get("id") not in delete_ids]
        if not _cache[workflow_key]:
            del _cache[workflow_key]
            _cache_warmed.discard(workflow_key)
    # Clean up empty directory
    if os.path.isdir(d) and not os.listdir(d):
        os.rmdir(d)
    return deleted