diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..067db60 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,61 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Turbo Sorter Pro v12.5 - A dual-interface image organization tool combining Streamlit (admin dashboard) and NiceGUI (gallery interface) for managing large image collections through time-sync matching, ID collision resolution, category-based sorting, and gallery tagging with pairing capabilities. + +## Commands + +```bash +# Install dependencies +pip install -r requirements.txt + +# Run Streamlit dashboard (port 8501) +streamlit run app.py --server.port=8501 --server.address=0.0.0.0 + +# Run NiceGUI gallery (port 8080) +python3 gallery_app.py + +# Both services (container startup) +./start.sh + +# Syntax check all Python files +python3 -m py_compile *.py +``` + +## Architecture + +### Dual-Framework Design +- **Streamlit (app.py, port 8501)**: Administrative dashboard with 5 modular tabs for management workflows +- **NiceGUI (gallery_app.py, port 8080)**: Modern gallery interface for image tagging and pairing operations +- **Shared Backend**: Both UIs use `SorterEngine` (engine.py) and the same SQLite database + +### Core Components + +| File | Purpose | +|------|---------| +| `engine.py` | Static `SorterEngine` class - all DB operations, file handling, image compression | +| `gallery_app.py` | NiceGUI gallery with `AppState` class for centralized state management | +| `app.py` | Streamlit entry point, loads tab modules | +| `tab_*.py` | Independent tab modules for each workflow | + +### Database +SQLite at `/app/sorter_database.db` with tables: profiles, folder_ids, categories, staging_area, processed_log, folder_tags, profile_categories, pairing_settings. + +### Tab Workflows +1. **Time-Sync Discovery** - Match images by timestamp across folders +2. **ID Review** - Resolve ID collisions between target/control folders +3. **Unused Archive** - Manage rejected image pairs +4. **Category Sorter** - One-to-many categorization +5. **Gallery Staged** - Grid-based tagging with Gallery/Pairing dual modes + +## Key Patterns + +- **ID Format**: `id001_`, `id002_` (zero-padded 3-digit prefix) +- **Staging Pattern**: Two-phase commit (stage → commit) with undo support +- **Image Formats**: .jpg, .jpeg, .png, .webp, .bmp, .tiff +- **Compression**: WebP with ThreadPoolExecutor (8 workers) +- **Permissions**: chmod 0o777 applied to committed files +- **Default Paths**: `/storage` when not configured diff --git a/__pycache__/engine.cpython-312.pyc b/__pycache__/engine.cpython-312.pyc new file mode 100644 index 0000000..fc73241 Binary files /dev/null and b/__pycache__/engine.cpython-312.pyc differ diff --git a/__pycache__/gallery_app.cpython-312.pyc b/__pycache__/gallery_app.cpython-312.pyc new file mode 100644 index 0000000..ea14328 Binary files /dev/null and b/__pycache__/gallery_app.cpython-312.pyc differ diff --git a/engine.py b/engine.py index 930ca00..c34910c 100644 --- a/engine.py +++ b/engine.py @@ -1,12 +1,28 @@ import os import shutil import sqlite3 +from contextlib import contextmanager from PIL import Image from io import BytesIO class SorterEngine: DB_PATH = "/app/sorter_database.db" + @staticmethod + @contextmanager + def get_db(): + """Context manager for database connections. + Ensures proper commit/rollback and always closes connection.""" + conn = sqlite3.connect(SorterEngine.DB_PATH) + try: + yield conn + conn.commit() + except Exception: + conn.rollback() + raise + finally: + conn.close() + # --- 1. DATABASE INITIALIZATION --- @staticmethod def init_db(): @@ -51,7 +67,15 @@ class SorterEngine: if cursor.fetchone()[0] == 0: for cat in ["_TRASH", "control", "Default", "Action", "Solo"]: cursor.execute("INSERT OR IGNORE INTO categories VALUES (?)", (cat,)) - + + # --- PERFORMANCE INDEXES --- + # Index for staging_area queries filtered by category + cursor.execute("CREATE INDEX IF NOT EXISTS idx_staging_category ON staging_area(target_category)") + # Index for folder_tags queries filtered by profile and folder_path + cursor.execute("CREATE INDEX IF NOT EXISTS idx_folder_tags_profile ON folder_tags(profile, folder_path)") + # Index for profile_categories lookups + cursor.execute("CREATE INDEX IF NOT EXISTS idx_profile_categories ON profile_categories(profile)") + conn.commit() conn.close() @@ -146,42 +170,48 @@ class SorterEngine: @staticmethod def load_profiles(): - """Loads all workspace presets including pairing settings.""" + """Loads all workspace presets including pairing settings. + Uses LEFT JOIN to fetch all data in a single query (fixes N+1 problem).""" conn = sqlite3.connect(SorterEngine.DB_PATH) cursor = conn.cursor() - cursor.execute("SELECT * FROM profiles") - rows = cursor.fetchall() - - # Ensure pairing_settings table exists - cursor.execute('''CREATE TABLE IF NOT EXISTS pairing_settings - (profile TEXT PRIMARY KEY, - adjacent_folder TEXT, - main_category TEXT, - adj_category TEXT, - main_output TEXT, - adj_output TEXT, + + # Ensure pairing_settings table exists before JOIN + cursor.execute('''CREATE TABLE IF NOT EXISTS pairing_settings + (profile TEXT PRIMARY KEY, + adjacent_folder TEXT, + main_category TEXT, + adj_category TEXT, + main_output TEXT, + adj_output TEXT, time_window INTEGER)''') - + + # Single query with LEFT JOIN - eliminates N+1 queries + cursor.execute(''' + SELECT p.name, p.tab1_target, p.tab2_target, p.tab2_control, + p.tab4_source, p.tab4_out, p.mode, p.tab5_source, p.tab5_out, + ps.adjacent_folder, ps.main_category, ps.adj_category, + ps.main_output, ps.adj_output, ps.time_window + FROM profiles p + LEFT JOIN pairing_settings ps ON p.name = ps.profile + ''') + rows = cursor.fetchall() + profiles = {} for r in rows: profile_name = r[0] profiles[profile_name] = { - "tab1_target": r[1], "tab2_target": r[2], "tab2_control": r[3], + "tab1_target": r[1], "tab2_target": r[2], "tab2_control": r[3], "tab4_source": r[4], "tab4_out": r[5], "mode": r[6], - "tab5_source": r[7], "tab5_out": r[8] + "tab5_source": r[7], "tab5_out": r[8], + # Pairing settings from JOIN (with defaults for NULL) + "pair_adjacent_folder": r[9] or "", + "pair_main_category": r[10] or "control", + "pair_adj_category": r[11] or "control", + "pair_main_output": r[12] or "/storage", + "pair_adj_output": r[13] or "/storage", + "pair_time_window": r[14] or 60 } - - # Load pairing settings for this profile - cursor.execute("SELECT * FROM pairing_settings WHERE profile = ?", (profile_name,)) - pair_row = cursor.fetchone() - if pair_row: - profiles[profile_name]["pair_adjacent_folder"] = pair_row[1] or "" - profiles[profile_name]["pair_main_category"] = pair_row[2] or "control" - profiles[profile_name]["pair_adj_category"] = pair_row[3] or "control" - profiles[profile_name]["pair_main_output"] = pair_row[4] or "/storage" - profiles[profile_name]["pair_adj_output"] = pair_row[5] or "/storage" - profiles[profile_name]["pair_time_window"] = pair_row[6] or 60 - + conn.close() return profiles @@ -354,40 +384,33 @@ class SorterEngine: @staticmethod def stage_image(original_path, category, new_name): """Records a pending rename/move in the database.""" - conn = sqlite3.connect(SorterEngine.DB_PATH) - cursor = conn.cursor() - cursor.execute("INSERT OR REPLACE INTO staging_area VALUES (?, ?, ?, 1)", (original_path, category, new_name)) - conn.commit() - conn.close() + with SorterEngine.get_db() as conn: + cursor = conn.cursor() + cursor.execute("INSERT OR REPLACE INTO staging_area VALUES (?, ?, ?, 1)", (original_path, category, new_name)) @staticmethod def clear_staged_item(original_path): """Removes an item from the pending staging area.""" - conn = sqlite3.connect(SorterEngine.DB_PATH) - cursor = conn.cursor() - cursor.execute("DELETE FROM staging_area WHERE original_path = ?", (original_path,)) - conn.commit() - conn.close() + with SorterEngine.get_db() as conn: + cursor = conn.cursor() + cursor.execute("DELETE FROM staging_area WHERE original_path = ?", (original_path,)) @staticmethod def clear_staging_area(): """Clears all items from the staging area.""" - conn = sqlite3.connect(SorterEngine.DB_PATH) - cursor = conn.cursor() - cursor.execute("DELETE FROM staging_area") - conn.commit() - conn.close() + with SorterEngine.get_db() as conn: + cursor = conn.cursor() + cursor.execute("DELETE FROM staging_area") @staticmethod def get_staged_data(): """Retrieves current tagged/staged images.""" - conn = sqlite3.connect(SorterEngine.DB_PATH) - cursor = conn.cursor() - cursor.execute("SELECT * FROM staging_area") - rows = cursor.fetchall() - conn.close() - # FIXED: Added "marked": r[3] to the dictionary - return {r[0]: {"cat": r[1], "name": r[2], "marked": r[3]} for r in rows} + with SorterEngine.get_db() as conn: + cursor = conn.cursor() + cursor.execute("SELECT * FROM staging_area") + rows = cursor.fetchall() + # FIXED: Added "marked": r[3] to the dictionary + return {r[0]: {"cat": r[1], "name": r[2], "marked": r[3]} for r in rows} @staticmethod def commit_global(output_root, cleanup_mode, operation="Copy", source_root=None, profile=None): diff --git a/gallery_app.py b/gallery_app.py index c03bc38..b4efe3b 100644 --- a/gallery_app.py +++ b/gallery_app.py @@ -1,7 +1,8 @@ import os import math import asyncio -from typing import Optional, List, Dict, Set +from typing import Optional, List, Dict, Set, Tuple +from functools import partial from nicegui import ui, app, run from fastapi import Response from engine import SorterEngine @@ -48,6 +49,12 @@ class AppState: self.staged_data: Dict = {} self.green_dots: Set[int] = set() self.index_map: Dict[int, str] = {} + + # Performance caches (Phase 1 optimizations) + self._cached_tagged_count: int = 0 # Cached count for get_stats() + self._green_dots_dirty: bool = True # Lazy green dots calculation + self._last_disk_scan_key: str = "" # Track output_dir + category for lazy disk scan + self._disk_index_map: Dict[int, str] = {} # Cached disk scan results # UI Containers (populated later) self.sidebar_container = None @@ -59,7 +66,7 @@ class AppState: self.pair_time_window = 60 # seconds +/- for matching self.pair_current_idx = 0 # Current image index in pairing mode self.pair_adjacent_folder = "" # Path to adjacent folder - self.pair_adjacent_images: List[str] = [] # Images from adjacent folder + self.pair_adjacent_data: List[Tuple[str, float]] = [] # (path, timestamp) tuples for O(1) lookup self.pair_matches: List[str] = [] # Current matches for selected image self.pair_selected_match = None # Currently selected match self.pairing_container = None # UI container for pairing mode @@ -165,11 +172,23 @@ class AppState: return filtered[start : start + self.page_size] def get_stats(self) -> Dict: - """Get image statistics for display.""" + """Get image statistics for display. Uses cached tagged count.""" total = len(self.all_images) - tagged = len([img for img in self.all_images if img in self.staged_data]) + tagged = self._cached_tagged_count return {"total": total, "tagged": tagged, "untagged": total - tagged} + def get_green_dots(self) -> Set[int]: + """Lazily calculate green dots (pages with tagged images). + Only recalculates when _green_dots_dirty is True.""" + if self._green_dots_dirty: + self.green_dots.clear() + staged_keys = set(self.staged_data.keys()) + for idx, img_path in enumerate(self.all_images): + if img_path in staged_keys: + self.green_dots.add(idx // self.page_size) + self._green_dots_dirty = False + return self.green_dots + state = AppState() # ========================================== @@ -237,36 +256,46 @@ def get_file_timestamp(filepath: str) -> Optional[float]: return None def load_adjacent_folder(): - """Load images from adjacent folder for pairing, excluding main folder.""" + """Load images from adjacent folder for pairing, excluding main folder. + Caches timestamps at load time to avoid repeated syscalls during navigation.""" if not state.pair_adjacent_folder or not os.path.exists(state.pair_adjacent_folder): - state.pair_adjacent_images = [] + state.pair_adjacent_data = [] ui.notify("Adjacent folder path is empty or doesn't exist", type='warning') return - + # Exclude the main source folder to avoid duplicates exclude = [state.source_dir] if state.source_dir else [] - - state.pair_adjacent_images = SorterEngine.get_images( - state.pair_adjacent_folder, - recursive=True, + + images = SorterEngine.get_images( + state.pair_adjacent_folder, + recursive=True, exclude_paths=exclude ) - ui.notify(f"Loaded {len(state.pair_adjacent_images)} images from adjacent folder", type='info') + + # Cache timestamps at load time (one-time cost instead of per-navigation) + state.pair_adjacent_data = [] + for img_path in images: + ts = get_file_timestamp(img_path) + if ts is not None: + state.pair_adjacent_data.append((img_path, ts)) + + ui.notify(f"Loaded {len(state.pair_adjacent_data)} images from adjacent folder", type='info') def find_time_matches(source_image: str) -> List[str]: - """Find images in adjacent folder within time window of source image.""" + """Find images in adjacent folder within time window of source image. + Uses cached timestamps from pair_adjacent_data for O(n) without syscalls.""" source_time = get_file_timestamp(source_image) if source_time is None: return [] - + + window = state.pair_time_window matches = [] - for adj_image in state.pair_adjacent_images: - adj_time = get_file_timestamp(adj_image) - if adj_time is not None: - time_diff = abs(source_time - adj_time) - if time_diff <= state.pair_time_window: - matches.append((adj_image, time_diff)) - + # Use pre-cached timestamps - no syscalls needed + for adj_path, adj_time in state.pair_adjacent_data: + time_diff = abs(source_time - adj_time) + if time_diff <= window: + matches.append((adj_path, time_diff)) + # Sort by time difference (closest first) matches.sort(key=lambda x: x[1]) return [m[0] for m in matches] @@ -459,47 +488,62 @@ def select_match(match_path: str): state.pair_selected_match = match_path render_pairing_view() -def refresh_staged_info(): - """Update staged data and index maps.""" +def refresh_staged_info(force_disk_scan: bool = False): + """Update staged data and index maps. + + Args: + force_disk_scan: If True, rescan disk even if category hasn't changed. + Set this after APPLY operations that modify files. + """ state.staged_data = SorterEngine.get_staged_data() - - # Update green dots (pages with staged images) - state.green_dots.clear() staged_keys = set(state.staged_data.keys()) - for idx, img_path in enumerate(state.all_images): - if img_path in staged_keys: - state.green_dots.add(idx // state.page_size) - + + # Update cached tagged count (O(n) but simpler than set intersection) + state._cached_tagged_count = sum(1 for img in state.all_images if img in staged_keys) + + # Mark green dots as dirty (lazy calculation) + state._green_dots_dirty = True + # Build index map for active category (gallery mode) state.index_map.clear() - + # Add staged images for orig_path, info in state.staged_data.items(): if info['cat'] == state.active_cat: idx = _extract_index(info['name']) if idx is not None: state.index_map[idx] = orig_path - - # Add committed images from disk - cat_path = os.path.join(state.output_dir, state.active_cat) - if os.path.exists(cat_path): - for filename in os.listdir(cat_path): - if filename.startswith(state.active_cat): - idx = _extract_index(filename) - if idx is not None and idx not in state.index_map: - state.index_map[idx] = os.path.join(cat_path, filename) - + + # Lazy disk scan: only rescan when output_dir+category changes or forced + disk_scan_key = f"{state.output_dir}:{state.active_cat}" + cache_valid = state._last_disk_scan_key == disk_scan_key + if not cache_valid or force_disk_scan: + state._last_disk_scan_key = disk_scan_key + state._disk_index_map.clear() + cat_path = os.path.join(state.output_dir, state.active_cat) + if os.path.exists(cat_path): + for filename in os.listdir(cat_path): + if filename.startswith(state.active_cat): + idx = _extract_index(filename) + if idx is not None: + state._disk_index_map[idx] = os.path.join(cat_path, filename) + + # Merge disk results into index_map (staged takes precedence) + for idx, path in state._disk_index_map.items(): + if idx not in state.index_map: + state.index_map[idx] = path + # Build pairing mode index map (both categories) state.pair_index_map.clear() - + for orig_path, info in state.staged_data.items(): idx = _extract_index(info['name']) if idx is None: continue - + if idx not in state.pair_index_map: state.pair_index_map[idx] = {"main": None, "adj": None} - + # Check if this is from main or adjacent category if info['cat'] == state.pair_main_category: state.pair_index_map[idx]["main"] = orig_path @@ -543,13 +587,15 @@ def action_tag(img_path: str, manual_idx: Optional[int] = None): state.undo_stack.pop(0) SorterEngine.stage_image(img_path, state.active_cat, name) - + # Only auto-increment if we used the default next_index (not manual) if manual_idx is None: state.next_index = idx + 1 - + refresh_staged_info() - refresh_ui() + # Use targeted refresh - sidebar index grid needs update, but skip heavy rebuild + render_sidebar() # Update index grid to show new tag + refresh_grid_only() # Just grid + pagination stats def action_untag(img_path: str): """Remove staging from an image.""" @@ -568,7 +614,9 @@ def action_untag(img_path: str): SorterEngine.clear_staged_item(img_path) refresh_staged_info() - refresh_ui() + # Use targeted refresh - sidebar index grid needs update + render_sidebar() # Update index grid to show removed tag + refresh_grid_only() # Just grid + pagination stats def action_delete(img_path: str): """Delete image to trash.""" @@ -632,9 +680,11 @@ def action_apply_page(): if not batch: ui.notify("No images on current page", type='warning') return - + SorterEngine.commit_batch(batch, state.output_dir, state.cleanup_mode, state.batch_mode) ui.notify(f"Page processed ({state.batch_mode})", type='positive') + # Force disk rescan since files were committed + state._last_disk_scan_key = "" load_images() async def action_apply_global(): @@ -648,6 +698,8 @@ async def action_apply_global(): state.source_dir, state.profile_name ) + # Force disk rescan since files were committed + state._last_disk_scan_key = "" load_images() ui.notify("Global apply complete!", type='positive') @@ -991,42 +1043,51 @@ def render_gallery(): for img_path in batch: render_image_card(img_path) +def _set_hovered(path: str): + """Helper for hover tracking - used with partial for memory efficiency.""" + state.hovered_image = path + +def _clear_hovered(): + """Helper for hover tracking - used with partial for memory efficiency.""" + state.hovered_image = None + def render_image_card(img_path: str): - """Render individual image card.""" + """Render individual image card. + Uses functools.partial instead of lambdas for better memory efficiency.""" is_staged = img_path in state.staged_data thumb_size = 800 - + card = ui.card().classes('p-2 bg-gray-900 border border-gray-700 no-shadow hover:border-green-500 transition-colors') - + with card: - # Track hover for keyboard shortcuts - card.on('mouseenter', lambda p=img_path: setattr(state, 'hovered_image', p)) - card.on('mouseleave', lambda: setattr(state, 'hovered_image', None)) - + # Track hover for keyboard shortcuts - using partial instead of lambda + card.on('mouseenter', partial(_set_hovered, img_path)) + card.on('mouseleave', _clear_hovered) + # Header with filename and actions with ui.row().classes('w-full justify-between no-wrap mb-1'): ui.label(os.path.basename(img_path)[:15]).classes('text-xs text-gray-400 truncate') with ui.row().classes('gap-0'): ui.button( icon='zoom_in', - on_click=lambda p=img_path: open_zoom_dialog(p) + on_click=partial(open_zoom_dialog, img_path) ).props('flat size=sm dense color=white') ui.button( icon='delete', - on_click=lambda p=img_path: action_delete(p) + on_click=partial(action_delete, img_path) ).props('flat size=sm dense color=red') - + # Thumbnail with double-click to tag img = ui.image(f"/thumbnail?path={img_path}&size={thumb_size}&q={state.preview_quality}") \ .classes('w-full h-64 bg-black rounded cursor-pointer') \ .props('fit=contain no-spinner') - - # Double-click to tag (if not already tagged) + + # Double-click to tag (if not already tagged) - using partial if not is_staged: - img.on('dblclick', lambda p=img_path: action_tag(p)) + img.on('dblclick', partial(action_tag, img_path)) else: - img.on('dblclick', lambda p=img_path: action_untag(p)) - + img.on('dblclick', partial(action_untag, img_path)) + # Tagging UI if is_staged: info = state.staged_data[img_path] @@ -1035,12 +1096,13 @@ def render_image_card(img_path: str): ui.label(f"🏷️ {info['cat']}").classes('text-center text-green-400 text-xs py-1 w-full') ui.button( f"Untag (#{idx_str})", - on_click=lambda p=img_path: action_untag(p) + on_click=partial(action_untag, img_path) ).props('flat color=grey-5 dense').classes('w-full') else: with ui.row().classes('w-full no-wrap mt-2 gap-1'): local_idx = ui.number(value=state.next_index, precision=0) \ .props('dense dark outlined').classes('w-1/3') + # Note: This one still needs lambda due to dynamic local_idx.value access ui.button( 'Tag', on_click=lambda p=img_path, i=local_idx: action_tag(p, int(i.value)) @@ -1108,8 +1170,9 @@ def render_pagination(): start = max(0, state.page - 2) end = min(state.total_pages, state.page + 3) + green_dots = state.get_green_dots() # Lazy calculation for p in range(start, end): - dot = " 🟢" if p in state.green_dots else "" + dot = " 🟢" if p in green_dots else "" color = "white" if p == state.page else "grey-6" ui.button( f"{p+1}{dot}", @@ -1131,6 +1194,12 @@ def refresh_ui(): render_pagination() render_gallery() +def refresh_grid_only(): + """Refresh only the grid and pagination stats - skip sidebar rebuild. + Use for tag/untag operations where sidebar doesn't need full rebuild.""" + render_pagination() + render_gallery() + def handle_keyboard(e): """Handle keyboard navigation and shortcuts (fallback).""" if not e.action.keydown: