diff --git a/db.py b/db.py index 1a4b564..aeb44c7 100644 --- a/db.py +++ b/db.py @@ -48,8 +48,18 @@ CREATE TABLE IF NOT EXISTS history_trees ( updated_at REAL NOT NULL ); +CREATE TABLE IF NOT EXISTS history_snapshots ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + data_file_id INTEGER NOT NULL REFERENCES data_files(id) ON DELETE CASCADE, + node_id TEXT NOT NULL, + snapshot_data TEXT NOT NULL, + updated_at REAL NOT NULL, + UNIQUE(data_file_id, node_id) +); + CREATE INDEX IF NOT EXISTS idx_data_files_project_id ON data_files(project_id); CREATE INDEX IF NOT EXISTS idx_sequences_data_file_id ON sequences(data_file_id); +CREATE INDEX IF NOT EXISTS idx_history_snapshots_df ON history_snapshots(data_file_id); """ @@ -314,22 +324,64 @@ class ProjectDB: # ------------------------------------------------------------------ def save_history_tree(self, data_file_id: int, tree_data: dict) -> None: + """Save history tree, extracting node snapshots into separate table.""" now = time.time() + # Extract snapshot data from nodes into history_snapshots table + nodes = tree_data.get("nodes", {}) + slim_tree = dict(tree_data) + slim_nodes = {} + for nid, node in nodes.items(): + snap = node.get("data") + if snap: + self.conn.execute( + "INSERT INTO history_snapshots (data_file_id, node_id, snapshot_data, updated_at) " + "VALUES (?, ?, ?, ?) " + "ON CONFLICT(data_file_id, node_id) DO UPDATE SET " + "snapshot_data=excluded.snapshot_data, updated_at=excluded.updated_at", + (data_file_id, nid, json.dumps(snap), now), + ) + # Store node without data in tree + slim_nodes[nid] = {k: v for k, v in node.items() if k != "data"} + slim_tree["nodes"] = slim_nodes self.conn.execute( "INSERT INTO history_trees (data_file_id, tree_data, updated_at) " "VALUES (?, ?, ?) " "ON CONFLICT(data_file_id) DO UPDATE SET tree_data=excluded.tree_data, updated_at=excluded.updated_at", - (data_file_id, json.dumps(tree_data), now), + (data_file_id, json.dumps(slim_tree), now), ) self.conn.commit() def get_history_tree(self, data_file_id: int) -> dict | None: + """Load history tree metadata (without snapshot data).""" row = self.conn.execute( "SELECT tree_data FROM history_trees WHERE data_file_id = ?", (data_file_id,), ).fetchone() return json.loads(row["tree_data"]) if row else None + # ------------------------------------------------------------------ + # History snapshots (per-node data, loaded on demand) + # ------------------------------------------------------------------ + + def get_node_snapshot(self, data_file_id: int, node_id: str) -> dict | None: + """Load a single node's snapshot data on demand.""" + row = self.conn.execute( + "SELECT snapshot_data FROM history_snapshots WHERE data_file_id = ? AND node_id = ?", + (data_file_id, node_id), + ).fetchone() + return json.loads(row["snapshot_data"]) if row else None + + def delete_node_snapshots(self, data_file_id: int, node_ids: set) -> None: + """Delete snapshots for removed nodes.""" + if not node_ids: + return + placeholders = ",".join("?" for _ in node_ids) + self.conn.execute( + f"DELETE FROM history_snapshots WHERE data_file_id = ? AND node_id IN ({placeholders})", + (data_file_id, *node_ids), + ) + self.conn.commit() + # ------------------------------------------------------------------ # Import # ------------------------------------------------------------------ @@ -385,15 +437,30 @@ class ProjectDB: (df_id, seq_num, json.dumps(item), now), ) - # Import history tree + # Import history tree (extract snapshots into separate table) history_tree = data.get(KEY_HISTORY_TREE) if history_tree and isinstance(history_tree, dict): now = time.time() + nodes = history_tree.get("nodes", {}) + slim_tree = dict(history_tree) + slim_nodes = {} + for nid, node in nodes.items(): + snap = node.get("data") + if snap: + self.conn.execute( + "INSERT INTO history_snapshots (data_file_id, node_id, snapshot_data, updated_at) " + "VALUES (?, ?, ?, ?) " + "ON CONFLICT(data_file_id, node_id) DO UPDATE SET " + "snapshot_data=excluded.snapshot_data, updated_at=excluded.updated_at", + (df_id, nid, json.dumps(snap), now), + ) + slim_nodes[nid] = {k: v for k, v in node.items() if k != "data"} + slim_tree["nodes"] = slim_nodes self.conn.execute( "INSERT INTO history_trees (data_file_id, tree_data, updated_at) " "VALUES (?, ?, ?) " "ON CONFLICT(data_file_id) DO UPDATE SET tree_data=excluded.tree_data, updated_at=excluded.updated_at", - (df_id, json.dumps(history_tree), now), + (df_id, json.dumps(slim_tree), now), ) self.conn.execute("COMMIT") @@ -445,9 +512,12 @@ class ProjectDB: data["batch_data"] = batch_data t2 = time.time() - # Load history tree + # Load history tree (metadata only, no snapshot data) tree = self.get_history_tree(df["id"]) if tree: + # Strip any residual snapshot data from nodes + for node in tree.get("nodes", {}).values(): + node.pop("data", None) data["history_tree"] = tree t3 = time.time() diff --git a/history_tree.py b/history_tree.py index 77db105..e605bfb 100644 --- a/history_tree.py +++ b/history_tree.py @@ -76,6 +76,11 @@ class HistoryTree: return self.nodes[node_id]["data"] return None + def strip_snapshots(self) -> None: + """Remove snapshot data from all nodes to free memory.""" + for node in self.nodes.values(): + node.pop("data", None) + def to_dict(self) -> dict[str, Any]: return {"nodes": self.nodes, "branches": self.branches, "head_id": self.head_id} diff --git a/main.py b/main.py index e787965..e9ed3d6 100644 --- a/main.py +++ b/main.py @@ -11,7 +11,7 @@ from utils import ( load_config, save_config, load_snippets, save_snippets, load_json, save_json, generate_templates, DEFAULTS, KEY_BATCH_DATA, KEY_SEQUENCE_NUMBER, - resolve_path_case_insensitive, + resolve_path_case_insensitive, sync_to_db, ) from tab_batch_ng import render_batch_processor from tab_timeline_ng import render_timeline_tab @@ -290,11 +290,19 @@ def index(): pane_state.db.load_full_data, pane_state.current_project, file_stem) if data is None: data, _ = await asyncio.to_thread(load_json, fp) + if pane_state.db and pane_state.db_enabled and pane_state.current_project: + await asyncio.to_thread( + sync_to_db, pane_state.db, pane_state.current_project, fp, data) + tree = data.get('history_tree') + if tree and isinstance(tree, dict): + for node in tree.get('nodes', {}).values(): + node.pop('data', None) pane_state.data_cache = data pane_state.last_mtime = fp.stat().st_mtime if fp.exists() else 0 pane_state.loaded_file = str(fp) pane_state.file_path = fp pane_state.restored_indicator = None + pane_state._src_cache = {'data': None, 'batch': [], 'name': None} _render_batch_tab_content.refresh() logger.info("on_select END (%.3fs)", _time.perf_counter() - _t0) @@ -320,11 +328,21 @@ def index(): state.db.load_full_data, state.current_project, file_stem) if data is None: data, _ = await asyncio.to_thread(load_json, fp) + # When loading from JSON fallback and DB is enabled, sync to DB + # so snapshots are persisted, then strip from memory + if state.db and state.db_enabled and state.current_project: + await asyncio.to_thread( + sync_to_db, state.db, state.current_project, fp, data) + tree = data.get('history_tree') + if tree and isinstance(tree, dict): + for node in tree.get('nodes', {}).values(): + node.pop('data', None) state.data_cache = data state.last_mtime = fp.stat().st_mtime if fp.exists() else 0 state.loaded_file = str(fp) state.file_path = fp state.restored_indicator = None + state._src_cache = {'data': None, 'batch': [], 'name': None} if state._main_rendered: render_main_content.refresh() logger.info("load_file END (%.3fs)", _time.perf_counter() - _t0) diff --git a/state.py b/state.py index bef8818..4f8d7a4 100644 --- a/state.py +++ b/state.py @@ -28,6 +28,7 @@ class AppState: _main_rendered: bool = False _live_checkboxes: dict = field(default_factory=dict) _live_refreshables: dict = field(default_factory=dict) + _src_cache: dict = field(default_factory=lambda: {'data': None, 'batch': [], 'name': None}) def create_secondary(self) -> 'AppState': return AppState( diff --git a/tab_batch_ng.py b/tab_batch_ng.py index d29ef70..50f6c01 100644 --- a/tab_batch_ng.py +++ b/tab_batch_ng.py @@ -247,8 +247,8 @@ def render_batch_processor(state: AppState): src_seq_select = ui.select([], label='Source Sequence:').classes('w-64') - # Track loaded source data - _src_cache = {'data': None, 'batch': [], 'name': None} + # Track loaded source data (on state so it's cleared on file switch) + _src_cache = state._src_cache def _update_src(): name = src_file_select.value @@ -359,11 +359,14 @@ def render_batch_processor(state: AppState): data[KEY_BATCH_DATA] = batch_list tree_data = data.get(KEY_HISTORY_TREE, {}) htree = HistoryTree(tree_data) - t1 = time.perf_counter() - snapshot_payload = {k: copy.deepcopy(v) for k, v in data.items() - if k != KEY_HISTORY_TREE} - logger.info("save_and_snap deepcopy %.3fs", time.perf_counter() - t1) note = commit_input.value if commit_input.value else _auto_change_note(htree, batch_list) + # Single serialization: json roundtrip gives us an isolated snapshot + # without the expensive deepcopy + t1 = time.perf_counter() + snapshot_json = json.dumps({k: v for k, v in data.items() + if k != KEY_HISTORY_TREE}) + snapshot_payload = json.loads(snapshot_json) + logger.info("save_and_snap snapshot %.3fs", time.perf_counter() - t1) try: htree.commit(snapshot_payload, note=note) except ValueError as e: @@ -371,13 +374,16 @@ def render_batch_processor(state: AppState): return data[KEY_HISTORY_TREE] = htree.to_dict() t1 = time.perf_counter() - snapshot = json.loads(json.dumps(data)) - await asyncio.to_thread(save_json, file_path, snapshot) + save_snapshot = json.loads(json.dumps(data)) + await asyncio.to_thread(save_json, file_path, save_snapshot) logger.info("save_and_snap save_json %.3fs", time.perf_counter() - t1) if state.db_enabled and state.current_project and state.db: t1 = time.perf_counter() - await asyncio.to_thread(sync_to_db, state.db, state.current_project, file_path, snapshot) + await asyncio.to_thread(sync_to_db, state.db, state.current_project, file_path, save_snapshot) logger.info("save_and_snap sync_to_db %.3fs", time.perf_counter() - t1) + # Free snapshot data from memory — it's persisted in DB now + htree.strip_snapshots() + data[KEY_HISTORY_TREE] = htree.to_dict() state.restored_indicator = None commit_input.set_value('') logger.info("save_and_snap END (%.3fs)", time.perf_counter() - t_ss) diff --git a/tab_timeline_ng.py b/tab_timeline_ng.py index 5574bf6..d1c9882 100644 --- a/tab_timeline_ng.py +++ b/tab_timeline_ng.py @@ -13,7 +13,7 @@ from utils import save_json, sync_to_db, KEY_BATCH_DATA, KEY_HISTORY_TREE logger = logging.getLogger(__name__) -def _delete_nodes(htree, data, file_path, node_ids): +def _delete_nodes(htree, data, file_path, node_ids, state=None): """Delete nodes with backup, branch cleanup, re-parenting, and head fallback.""" if 'history_tree_backup' not in data: data['history_tree_backup'] = [] @@ -52,6 +52,11 @@ def _delete_nodes(htree, data, file_path, node_ids): else: htree.head_id = None data[KEY_HISTORY_TREE] = htree.to_dict() + # Clean up DB snapshots for deleted nodes + if state and state.db_enabled and state.db and state.current_project: + df = state.db.get_data_file_by_names(state.current_project, file_path.stem) + if df: + state.db.delete_node_snapshots(df['id'], set(node_ids)) def _render_selection_picker(all_nodes, htree, state, refresh_fn): @@ -159,7 +164,7 @@ def _render_batch_delete(htree, data, file_path, state, refresh_fn): async def do_batch_delete(): current_valid = state.timeline_selected_nodes & set(htree.nodes.keys()) - _delete_nodes(htree, data, file_path, current_valid) + _delete_nodes(htree, data, file_path, current_valid, state=state) snapshot = json.loads(json.dumps(data)) await asyncio.to_thread(save_json, file_path, snapshot) if state.db_enabled and state.current_project and state.db: @@ -344,7 +349,7 @@ def _render_node_manager(all_nodes, htree, data, file_path, restore_fn, refresh_ async def delete_selected(): if sel_id in htree.nodes: - _delete_nodes(htree, data, file_path, {sel_id}) + _delete_nodes(htree, data, file_path, {sel_id}, state=state) snapshot = json.loads(json.dumps(data)) await asyncio.to_thread(save_json, file_path, snapshot) if state and state.db_enabled and state.current_project and state.db: @@ -361,7 +366,7 @@ def _render_node_manager(all_nodes, htree, data, file_path, restore_fn, refresh_ # Data preview with ui.expansion('Data Preview', icon='preview').classes('w-full q-mt-sm'): - _render_data_preview(sel_id, htree) + _render_data_preview(sel_id, htree, state=state, file_path=file_path) render_branch_nodes() @@ -566,7 +571,20 @@ async def _restore_node(data, node, htree, file_path, state: AppState): """Restore a history node as the current version (full replace, not merge).""" t0 = time.perf_counter() logger.info("_restore_node START: %s", node.get('note', 'Step')) - node_data = json.loads(json.dumps(node.get('data', {}))) + # Load snapshot from DB on demand (nodes no longer hold data in memory) + raw_snap = node.get('data') + if not raw_snap and state.db_enabled and state.db and state.current_project: + df = state.db.get_data_file_by_names(state.current_project, file_path.stem) + if df: + raw_snap = await asyncio.to_thread( + state.db.get_node_snapshot, df['id'], node['id']) + if not raw_snap: + # Last resort: read from JSON file on disk + from utils import load_json as _load_json + raw_file, _ = await asyncio.to_thread(_load_json, file_path) + tree_on_disk = raw_file.get(KEY_HISTORY_TREE, {}) + raw_snap = tree_on_disk.get('nodes', {}).get(node['id'], {}).get('data', {}) + node_data = json.loads(json.dumps(raw_snap)) if raw_snap else {} # Preserve the history tree before clearing preserved_tree = data.get(KEY_HISTORY_TREE) preserved_backup = data.get('history_tree_backup') @@ -589,13 +607,21 @@ async def _restore_node(data, node, htree, file_path, state: AppState): ui.notify('Restored!', type='positive') -def _render_data_preview(nid, htree): +def _render_data_preview(nid, htree, state: AppState = None, file_path=None): """Render a read-only preview of the selected node's data.""" if not nid or nid not in htree.nodes: ui.label('No node selected.').classes('text-caption') return - node_data = htree.nodes[nid].get('data', {}) + # Load snapshot from DB on demand (not stored in memory) + node_data = htree.nodes[nid].get('data') + if not node_data and state and state.db_enabled and state.db and state.current_project and file_path: + df = state.db.get_data_file_by_names(state.current_project, file_path.stem) + if df: + node_data = state.db.get_node_snapshot(df['id'], nid) + if not node_data: + ui.label('Snapshot data not available.').classes('text-caption text-warning') + return batch_list = node_data.get(KEY_BATCH_DATA, []) if batch_list and isinstance(batch_list, list) and len(batch_list) > 0: diff --git a/utils.py b/utils.py index c73e693..56146f8 100644 --- a/utils.py +++ b/utils.py @@ -210,6 +210,15 @@ def save_json(path: str | Path, data: dict[str, Any]) -> None: os.replace(tmp, path) logger.info("save_json %s: %.3fs", path.name, time.time() - t0) + +def snapshot_data(data: dict[str, Any]) -> dict[str, Any]: + """Create a thread-safe deep copy via JSON roundtrip. + + Must be called on the main thread before passing data to asyncio.to_thread, + to avoid 'dict changed size during iteration' when the UI mutates data. + """ + return json.loads(json.dumps(data)) + def get_file_mtime(path: str | Path) -> float: """Returns the modification time of a file, or 0 if it doesn't exist.""" path = Path(path) @@ -279,14 +288,29 @@ def sync_to_db(db, project_name: str, file_path: Path, data: dict) -> None: else: db.conn.execute("DELETE FROM sequences WHERE data_file_id = ?", (df_id,)) - # Sync history tree + # Sync history tree (extract node snapshots into separate table) history_tree = data.get(KEY_HISTORY_TREE) if history_tree and isinstance(history_tree, dict): + nodes = history_tree.get("nodes", {}) + slim_tree = dict(history_tree) + slim_nodes = {} + for nid, node in nodes.items(): + snap = node.get("data") + if snap: + db.conn.execute( + "INSERT INTO history_snapshots (data_file_id, node_id, snapshot_data, updated_at) " + "VALUES (?, ?, ?, ?) " + "ON CONFLICT(data_file_id, node_id) DO UPDATE SET " + "snapshot_data=excluded.snapshot_data, updated_at=excluded.updated_at", + (df_id, nid, json.dumps(snap), now), + ) + slim_nodes[nid] = {k: v for k, v in node.items() if k != "data"} + slim_tree["nodes"] = slim_nodes db.conn.execute( "INSERT INTO history_trees (data_file_id, tree_data, updated_at) " "VALUES (?, ?, ?) " "ON CONFLICT(data_file_id) DO UPDATE SET tree_data=excluded.tree_data, updated_at=excluded.updated_at", - (df_id, json.dumps(history_tree), now), + (df_id, json.dumps(slim_tree), now), ) db.conn.execute("COMMIT")