Fix RAM leak: strip history snapshots from memory, load on demand

History tree nodes stored full data snapshots in memory (5-50MB each),
accumulating with every save. Now:

- New `history_snapshots` DB table stores node data separately
- `save_history_tree` and `sync_to_db` extract snapshots before saving
- In-memory tree nodes only hold metadata (id, parent, note, timestamp)
- Restore and preview load snapshots from DB on demand
- `save_and_snap` uses json roundtrip instead of deepcopy (1 copy not 2)
- `_src_cache` moved to AppState, cleared on file switch
- `strip_snapshots()` method on HistoryTree for explicit cleanup

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-19 09:48:47 +01:00
parent 79e1426036
commit eac4e4f08b
7 changed files with 173 additions and 23 deletions
+26 -2
View File
@@ -210,6 +210,15 @@ def save_json(path: str | Path, data: dict[str, Any]) -> None:
os.replace(tmp, path)
logger.info("save_json %s: %.3fs", path.name, time.time() - t0)
def snapshot_data(data: dict[str, Any]) -> dict[str, Any]:
"""Create a thread-safe deep copy via JSON roundtrip.
Must be called on the main thread before passing data to asyncio.to_thread,
to avoid 'dict changed size during iteration' when the UI mutates data.
"""
return json.loads(json.dumps(data))
def get_file_mtime(path: str | Path) -> float:
"""Returns the modification time of a file, or 0 if it doesn't exist."""
path = Path(path)
@@ -279,14 +288,29 @@ def sync_to_db(db, project_name: str, file_path: Path, data: dict) -> None:
else:
db.conn.execute("DELETE FROM sequences WHERE data_file_id = ?", (df_id,))
# Sync history tree
# Sync history tree (extract node snapshots into separate table)
history_tree = data.get(KEY_HISTORY_TREE)
if history_tree and isinstance(history_tree, dict):
nodes = history_tree.get("nodes", {})
slim_tree = dict(history_tree)
slim_nodes = {}
for nid, node in nodes.items():
snap = node.get("data")
if snap:
db.conn.execute(
"INSERT INTO history_snapshots (data_file_id, node_id, snapshot_data, updated_at) "
"VALUES (?, ?, ?, ?) "
"ON CONFLICT(data_file_id, node_id) DO UPDATE SET "
"snapshot_data=excluded.snapshot_data, updated_at=excluded.updated_at",
(df_id, nid, json.dumps(snap), now),
)
slim_nodes[nid] = {k: v for k, v in node.items() if k != "data"}
slim_tree["nodes"] = slim_nodes
db.conn.execute(
"INSERT INTO history_trees (data_file_id, tree_data, updated_at) "
"VALUES (?, ?, ?) "
"ON CONFLICT(data_file_id) DO UPDATE SET tree_data=excluded.tree_data, updated_at=excluded.updated_at",
(df_id, json.dumps(history_tree), now),
(df_id, json.dumps(slim_tree), now),
)
db.conn.execute("COMMIT")