Fix RAM leak: strip history snapshots from memory, load on demand

History tree nodes stored full data snapshots in memory (5-50MB each),
accumulating with every save. Now:

- New `history_snapshots` DB table stores node data separately
- `save_history_tree` and `sync_to_db` extract snapshots before saving
- In-memory tree nodes only hold metadata (id, parent, note, timestamp)
- Restore and preview load snapshots from DB on demand
- `save_and_snap` uses json roundtrip instead of deepcopy (1 copy not 2)
- `_src_cache` moved to AppState, cleared on file switch
- `strip_snapshots()` method on HistoryTree for explicit cleanup

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-19 09:48:47 +01:00
parent 79e1426036
commit eac4e4f08b
7 changed files with 173 additions and 23 deletions
+74 -4
View File
@@ -48,8 +48,18 @@ CREATE TABLE IF NOT EXISTS history_trees (
updated_at REAL NOT NULL
);
CREATE TABLE IF NOT EXISTS history_snapshots (
id INTEGER PRIMARY KEY AUTOINCREMENT,
data_file_id INTEGER NOT NULL REFERENCES data_files(id) ON DELETE CASCADE,
node_id TEXT NOT NULL,
snapshot_data TEXT NOT NULL,
updated_at REAL NOT NULL,
UNIQUE(data_file_id, node_id)
);
CREATE INDEX IF NOT EXISTS idx_data_files_project_id ON data_files(project_id);
CREATE INDEX IF NOT EXISTS idx_sequences_data_file_id ON sequences(data_file_id);
CREATE INDEX IF NOT EXISTS idx_history_snapshots_df ON history_snapshots(data_file_id);
"""
@@ -314,22 +324,64 @@ class ProjectDB:
# ------------------------------------------------------------------
def save_history_tree(self, data_file_id: int, tree_data: dict) -> None:
"""Save history tree, extracting node snapshots into separate table."""
now = time.time()
# Extract snapshot data from nodes into history_snapshots table
nodes = tree_data.get("nodes", {})
slim_tree = dict(tree_data)
slim_nodes = {}
for nid, node in nodes.items():
snap = node.get("data")
if snap:
self.conn.execute(
"INSERT INTO history_snapshots (data_file_id, node_id, snapshot_data, updated_at) "
"VALUES (?, ?, ?, ?) "
"ON CONFLICT(data_file_id, node_id) DO UPDATE SET "
"snapshot_data=excluded.snapshot_data, updated_at=excluded.updated_at",
(data_file_id, nid, json.dumps(snap), now),
)
# Store node without data in tree
slim_nodes[nid] = {k: v for k, v in node.items() if k != "data"}
slim_tree["nodes"] = slim_nodes
self.conn.execute(
"INSERT INTO history_trees (data_file_id, tree_data, updated_at) "
"VALUES (?, ?, ?) "
"ON CONFLICT(data_file_id) DO UPDATE SET tree_data=excluded.tree_data, updated_at=excluded.updated_at",
(data_file_id, json.dumps(tree_data), now),
(data_file_id, json.dumps(slim_tree), now),
)
self.conn.commit()
def get_history_tree(self, data_file_id: int) -> dict | None:
"""Load history tree metadata (without snapshot data)."""
row = self.conn.execute(
"SELECT tree_data FROM history_trees WHERE data_file_id = ?",
(data_file_id,),
).fetchone()
return json.loads(row["tree_data"]) if row else None
# ------------------------------------------------------------------
# History snapshots (per-node data, loaded on demand)
# ------------------------------------------------------------------
def get_node_snapshot(self, data_file_id: int, node_id: str) -> dict | None:
"""Load a single node's snapshot data on demand."""
row = self.conn.execute(
"SELECT snapshot_data FROM history_snapshots WHERE data_file_id = ? AND node_id = ?",
(data_file_id, node_id),
).fetchone()
return json.loads(row["snapshot_data"]) if row else None
def delete_node_snapshots(self, data_file_id: int, node_ids: set) -> None:
"""Delete snapshots for removed nodes."""
if not node_ids:
return
placeholders = ",".join("?" for _ in node_ids)
self.conn.execute(
f"DELETE FROM history_snapshots WHERE data_file_id = ? AND node_id IN ({placeholders})",
(data_file_id, *node_ids),
)
self.conn.commit()
# ------------------------------------------------------------------
# Import
# ------------------------------------------------------------------
@@ -385,15 +437,30 @@ class ProjectDB:
(df_id, seq_num, json.dumps(item), now),
)
# Import history tree
# Import history tree (extract snapshots into separate table)
history_tree = data.get(KEY_HISTORY_TREE)
if history_tree and isinstance(history_tree, dict):
now = time.time()
nodes = history_tree.get("nodes", {})
slim_tree = dict(history_tree)
slim_nodes = {}
for nid, node in nodes.items():
snap = node.get("data")
if snap:
self.conn.execute(
"INSERT INTO history_snapshots (data_file_id, node_id, snapshot_data, updated_at) "
"VALUES (?, ?, ?, ?) "
"ON CONFLICT(data_file_id, node_id) DO UPDATE SET "
"snapshot_data=excluded.snapshot_data, updated_at=excluded.updated_at",
(df_id, nid, json.dumps(snap), now),
)
slim_nodes[nid] = {k: v for k, v in node.items() if k != "data"}
slim_tree["nodes"] = slim_nodes
self.conn.execute(
"INSERT INTO history_trees (data_file_id, tree_data, updated_at) "
"VALUES (?, ?, ?) "
"ON CONFLICT(data_file_id) DO UPDATE SET tree_data=excluded.tree_data, updated_at=excluded.updated_at",
(df_id, json.dumps(history_tree), now),
(df_id, json.dumps(slim_tree), now),
)
self.conn.execute("COMMIT")
@@ -445,9 +512,12 @@ class ProjectDB:
data["batch_data"] = batch_data
t2 = time.time()
# Load history tree
# Load history tree (metadata only, no snapshot data)
tree = self.get_history_tree(df["id"])
if tree:
# Strip any residual snapshot data from nodes
for node in tree.get("nodes", {}).values():
node.pop("data", None)
data["history_tree"] = tree
t3 = time.time()