claude

2026-01-28 15:42:14 +01:00
parent 7580036c9d
commit bf1134e47f
5 changed files with 269 additions and 116 deletions
@@ -0,0 +1,61 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+Turbo Sorter Pro v12.5 - A dual-interface image organization tool combining Streamlit (admin dashboard) and NiceGUI (gallery interface) for managing large image collections through time-sync matching, ID collision resolution, category-based sorting, and gallery tagging with pairing capabilities.
+
+## Commands
+
+```bash
+# Install dependencies
+pip install -r requirements.txt
+
+# Run Streamlit dashboard (port 8501)
+streamlit run app.py --server.port=8501 --server.address=0.0.0.0
+
+# Run NiceGUI gallery (port 8080)
+python3 gallery_app.py
+
+# Both services (container startup)
+./start.sh
+
+# Syntax check all Python files
+python3 -m py_compile *.py
+```
+
+## Architecture
+
+### Dual-Framework Design
+- **Streamlit (app.py, port 8501)**: Administrative dashboard with 5 modular tabs for management workflows
+- **NiceGUI (gallery_app.py, port 8080)**: Modern gallery interface for image tagging and pairing operations
+- **Shared Backend**: Both UIs use `SorterEngine` (engine.py) and the same SQLite database
+
+### Core Components
+
+| File | Purpose |
+|------|---------|
+| `engine.py` | Static `SorterEngine` class - all DB operations, file handling, image compression |
+| `gallery_app.py` | NiceGUI gallery with `AppState` class for centralized state management |
+| `app.py` | Streamlit entry point, loads tab modules |
+| `tab_*.py` | Independent tab modules for each workflow |
+
+### Database
+SQLite at `/app/sorter_database.db` with tables: profiles, folder_ids, categories, staging_area, processed_log, folder_tags, profile_categories, pairing_settings.
+
+### Tab Workflows
+1. **Time-Sync Discovery** - Match images by timestamp across folders
+2. **ID Review** - Resolve ID collisions between target/control folders
+3. **Unused Archive** - Manage rejected image pairs
+4. **Category Sorter** - One-to-many categorization
+5. **Gallery Staged** - Grid-based tagging with Gallery/Pairing dual modes
+
+## Key Patterns
+
+- **ID Format**: `id001_`, `id002_` (zero-padded 3-digit prefix)
+- **Staging Pattern**: Two-phase commit (stage → commit) with undo support
+- **Image Formats**: .jpg, .jpeg, .png, .webp, .bmp, .tiff
+- **Compression**: WebP with ThreadPoolExecutor (8 workers)
+- **Permissions**: chmod 0o777 applied to committed files
+- **Default Paths**: `/storage` when not configured
@@ -1,12 +1,28 @@
 import os
 import shutil
 import sqlite3
+from contextlib import contextmanager
 from PIL import Image
 from io import BytesIO

 class SorterEngine:
    DB_PATH = "/app/sorter_database.db"

+    @staticmethod
+    @contextmanager
+    def get_db():
+        """Context manager for database connections.
+        Ensures proper commit/rollback and always closes connection."""
+        conn = sqlite3.connect(SorterEngine.DB_PATH)
+        try:
+            yield conn
+            conn.commit()
+        except Exception:
+            conn.rollback()
+            raise
+        finally:
+            conn.close()
+
    # --- 1. DATABASE INITIALIZATION ---
    @staticmethod
    def init_db():
@@ -51,7 +67,15 @@ class SorterEngine:
        if cursor.fetchone()[0] == 0:
            for cat in ["_TRASH", "control", "Default", "Action", "Solo"]:
                cursor.execute("INSERT OR IGNORE INTO categories VALUES (?)", (cat,))
-        
+
+        # --- PERFORMANCE INDEXES ---
+        # Index for staging_area queries filtered by category
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_staging_category ON staging_area(target_category)")
+        # Index for folder_tags queries filtered by profile and folder_path
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_folder_tags_profile ON folder_tags(profile, folder_path)")
+        # Index for profile_categories lookups
+        cursor.execute("CREATE INDEX IF NOT EXISTS idx_profile_categories ON profile_categories(profile)")
+
        conn.commit()
        conn.close()

@@ -146,42 +170,48 @@ class SorterEngine:

    @staticmethod
    def load_profiles():
-        """Loads all workspace presets including pairing settings."""
+        """Loads all workspace presets including pairing settings.
+        Uses LEFT JOIN to fetch all data in a single query (fixes N+1 problem)."""
        conn = sqlite3.connect(SorterEngine.DB_PATH)
        cursor = conn.cursor()
-        cursor.execute("SELECT * FROM profiles")
-        rows = cursor.fetchall()
-        
-        # Ensure pairing_settings table exists
-        cursor.execute('''CREATE TABLE IF NOT EXISTS pairing_settings 
-            (profile TEXT PRIMARY KEY, 
-             adjacent_folder TEXT, 
-             main_category TEXT, 
-             adj_category TEXT, 
-             main_output TEXT, 
-             adj_output TEXT, 
+
+        # Ensure pairing_settings table exists before JOIN
+        cursor.execute('''CREATE TABLE IF NOT EXISTS pairing_settings
+            (profile TEXT PRIMARY KEY,
+             adjacent_folder TEXT,
+             main_category TEXT,
+             adj_category TEXT,
+             main_output TEXT,
+             adj_output TEXT,
             time_window INTEGER)''')
-        
+
+        # Single query with LEFT JOIN - eliminates N+1 queries
+        cursor.execute('''
+            SELECT p.name, p.tab1_target, p.tab2_target, p.tab2_control,
+                   p.tab4_source, p.tab4_out, p.mode, p.tab5_source, p.tab5_out,
+                   ps.adjacent_folder, ps.main_category, ps.adj_category,
+                   ps.main_output, ps.adj_output, ps.time_window
+            FROM profiles p
+            LEFT JOIN pairing_settings ps ON p.name = ps.profile
+        ''')
+        rows = cursor.fetchall()
+
        profiles = {}
        for r in rows:
            profile_name = r[0]
            profiles[profile_name] = {
-                "tab1_target": r[1], "tab2_target": r[2], "tab2_control": r[3], 
+                "tab1_target": r[1], "tab2_target": r[2], "tab2_control": r[3],
                "tab4_source": r[4], "tab4_out": r[5], "mode": r[6],
-                "tab5_source": r[7], "tab5_out": r[8]
+                "tab5_source": r[7], "tab5_out": r[8],
+                # Pairing settings from JOIN (with defaults for NULL)
+                "pair_adjacent_folder": r[9] or "",
+                "pair_main_category": r[10] or "control",
+                "pair_adj_category": r[11] or "control",
+                "pair_main_output": r[12] or "/storage",
+                "pair_adj_output": r[13] or "/storage",
+                "pair_time_window": r[14] or 60
            }
-            
-            # Load pairing settings for this profile
-            cursor.execute("SELECT * FROM pairing_settings WHERE profile = ?", (profile_name,))
-            pair_row = cursor.fetchone()
-            if pair_row:
-                profiles[profile_name]["pair_adjacent_folder"] = pair_row[1] or ""
-                profiles[profile_name]["pair_main_category"] = pair_row[2] or "control"
-                profiles[profile_name]["pair_adj_category"] = pair_row[3] or "control"
-                profiles[profile_name]["pair_main_output"] = pair_row[4] or "/storage"
-                profiles[profile_name]["pair_adj_output"] = pair_row[5] or "/storage"
-                profiles[profile_name]["pair_time_window"] = pair_row[6] or 60
-        
+
        conn.close()
        return profiles

@@ -354,40 +384,33 @@ class SorterEngine:
    @staticmethod
    def stage_image(original_path, category, new_name):
        """Records a pending rename/move in the database."""
-        conn = sqlite3.connect(SorterEngine.DB_PATH)
-        cursor = conn.cursor()
-        cursor.execute("INSERT OR REPLACE INTO staging_area VALUES (?, ?, ?, 1)", (original_path, category, new_name))
-        conn.commit()
-        conn.close()
+        with SorterEngine.get_db() as conn:
+            cursor = conn.cursor()
+            cursor.execute("INSERT OR REPLACE INTO staging_area VALUES (?, ?, ?, 1)", (original_path, category, new_name))

    @staticmethod
    def clear_staged_item(original_path):
        """Removes an item from the pending staging area."""
-        conn = sqlite3.connect(SorterEngine.DB_PATH)
-        cursor = conn.cursor()
-        cursor.execute("DELETE FROM staging_area WHERE original_path = ?", (original_path,))
-        conn.commit()
-        conn.close()
+        with SorterEngine.get_db() as conn:
+            cursor = conn.cursor()
+            cursor.execute("DELETE FROM staging_area WHERE original_path = ?", (original_path,))

    @staticmethod
    def clear_staging_area():
        """Clears all items from the staging area."""
-        conn = sqlite3.connect(SorterEngine.DB_PATH)
-        cursor = conn.cursor()
-        cursor.execute("DELETE FROM staging_area")
-        conn.commit()
-        conn.close()
+        with SorterEngine.get_db() as conn:
+            cursor = conn.cursor()
+            cursor.execute("DELETE FROM staging_area")

    @staticmethod
    def get_staged_data():
        """Retrieves current tagged/staged images."""
-        conn = sqlite3.connect(SorterEngine.DB_PATH)
-        cursor = conn.cursor()
-        cursor.execute("SELECT * FROM staging_area")
-        rows = cursor.fetchall()
-        conn.close()
-        # FIXED: Added "marked": r[3] to the dictionary
-        return {r[0]: {"cat": r[1], "name": r[2], "marked": r[3]} for r in rows}
+        with SorterEngine.get_db() as conn:
+            cursor = conn.cursor()
+            cursor.execute("SELECT * FROM staging_area")
+            rows = cursor.fetchall()
+            # FIXED: Added "marked": r[3] to the dictionary
+            return {r[0]: {"cat": r[1], "name": r[2], "marked": r[3]} for r in rows}
        
    @staticmethod
    def commit_global(output_root, cleanup_mode, operation="Copy", source_root=None, profile=None):
@@ -1,7 +1,8 @@
 import os
 import math
 import asyncio
-from typing import Optional, List, Dict, Set
+from typing import Optional, List, Dict, Set, Tuple
+from functools import partial
 from nicegui import ui, app, run
 from fastapi import Response
 from engine import SorterEngine
@@ -48,6 +49,12 @@ class AppState:
        self.staged_data: Dict = {}
        self.green_dots: Set[int] = set()
        self.index_map: Dict[int, str] = {}
+
+        # Performance caches (Phase 1 optimizations)
+        self._cached_tagged_count: int = 0  # Cached count for get_stats()
+        self._green_dots_dirty: bool = True  # Lazy green dots calculation
+        self._last_disk_scan_key: str = ""  # Track output_dir + category for lazy disk scan
+        self._disk_index_map: Dict[int, str] = {}  # Cached disk scan results
        
        # UI Containers (populated later)
        self.sidebar_container = None
@@ -59,7 +66,7 @@ class AppState:
        self.pair_time_window = 60  # seconds +/- for matching
        self.pair_current_idx = 0  # Current image index in pairing mode
        self.pair_adjacent_folder = ""  # Path to adjacent folder
-        self.pair_adjacent_images: List[str] = []  # Images from adjacent folder
+        self.pair_adjacent_data: List[Tuple[str, float]] = []  # (path, timestamp) tuples for O(1) lookup
        self.pair_matches: List[str] = []  # Current matches for selected image
        self.pair_selected_match = None  # Currently selected match
        self.pairing_container = None  # UI container for pairing mode
@@ -165,11 +172,23 @@ class AppState:
        return filtered[start : start + self.page_size]
    
    def get_stats(self) -> Dict:
-        """Get image statistics for display."""
+        """Get image statistics for display. Uses cached tagged count."""
        total = len(self.all_images)
-        tagged = len([img for img in self.all_images if img in self.staged_data])
+        tagged = self._cached_tagged_count
        return {"total": total, "tagged": tagged, "untagged": total - tagged}

+    def get_green_dots(self) -> Set[int]:
+        """Lazily calculate green dots (pages with tagged images).
+        Only recalculates when _green_dots_dirty is True."""
+        if self._green_dots_dirty:
+            self.green_dots.clear()
+            staged_keys = set(self.staged_data.keys())
+            for idx, img_path in enumerate(self.all_images):
+                if img_path in staged_keys:
+                    self.green_dots.add(idx // self.page_size)
+            self._green_dots_dirty = False
+        return self.green_dots
+
 state = AppState()

 # ==========================================
@@ -237,36 +256,46 @@ def get_file_timestamp(filepath: str) -> Optional[float]:
        return None

 def load_adjacent_folder():
-    """Load images from adjacent folder for pairing, excluding main folder."""
+    """Load images from adjacent folder for pairing, excluding main folder.
+    Caches timestamps at load time to avoid repeated syscalls during navigation."""
    if not state.pair_adjacent_folder or not os.path.exists(state.pair_adjacent_folder):
-        state.pair_adjacent_images = []
+        state.pair_adjacent_data = []
        ui.notify("Adjacent folder path is empty or doesn't exist", type='warning')
        return
-    
+
    # Exclude the main source folder to avoid duplicates
    exclude = [state.source_dir] if state.source_dir else []
-    
-    state.pair_adjacent_images = SorterEngine.get_images(
-        state.pair_adjacent_folder, 
-        recursive=True, 
+
+    images = SorterEngine.get_images(
+        state.pair_adjacent_folder,
+        recursive=True,
        exclude_paths=exclude
    )
-    ui.notify(f"Loaded {len(state.pair_adjacent_images)} images from adjacent folder", type='info')
+
+    # Cache timestamps at load time (one-time cost instead of per-navigation)
+    state.pair_adjacent_data = []
+    for img_path in images:
+        ts = get_file_timestamp(img_path)
+        if ts is not None:
+            state.pair_adjacent_data.append((img_path, ts))
+
+    ui.notify(f"Loaded {len(state.pair_adjacent_data)} images from adjacent folder", type='info')

 def find_time_matches(source_image: str) -> List[str]:
-    """Find images in adjacent folder within time window of source image."""
+    """Find images in adjacent folder within time window of source image.
+    Uses cached timestamps from pair_adjacent_data for O(n) without syscalls."""
    source_time = get_file_timestamp(source_image)
    if source_time is None:
        return []
-    
+
+    window = state.pair_time_window
    matches = []
-    for adj_image in state.pair_adjacent_images:
-        adj_time = get_file_timestamp(adj_image)
-        if adj_time is not None:
-            time_diff = abs(source_time - adj_time)
-            if time_diff <= state.pair_time_window:
-                matches.append((adj_image, time_diff))
-    
+    # Use pre-cached timestamps - no syscalls needed
+    for adj_path, adj_time in state.pair_adjacent_data:
+        time_diff = abs(source_time - adj_time)
+        if time_diff <= window:
+            matches.append((adj_path, time_diff))
+
    # Sort by time difference (closest first)
    matches.sort(key=lambda x: x[1])
    return [m[0] for m in matches]
@@ -459,47 +488,62 @@ def select_match(match_path: str):
    state.pair_selected_match = match_path
    render_pairing_view()

-def refresh_staged_info():
-    """Update staged data and index maps."""
+def refresh_staged_info(force_disk_scan: bool = False):
+    """Update staged data and index maps.
+
+    Args:
+        force_disk_scan: If True, rescan disk even if category hasn't changed.
+                        Set this after APPLY operations that modify files.
+    """
    state.staged_data = SorterEngine.get_staged_data()
-    
-    # Update green dots (pages with staged images)
-    state.green_dots.clear()
    staged_keys = set(state.staged_data.keys())
-    for idx, img_path in enumerate(state.all_images):
-        if img_path in staged_keys:
-            state.green_dots.add(idx // state.page_size)
-    
+
+    # Update cached tagged count (O(n) but simpler than set intersection)
+    state._cached_tagged_count = sum(1 for img in state.all_images if img in staged_keys)
+
+    # Mark green dots as dirty (lazy calculation)
+    state._green_dots_dirty = True
+
    # Build index map for active category (gallery mode)
    state.index_map.clear()
-    
+
    # Add staged images
    for orig_path, info in state.staged_data.items():
        if info['cat'] == state.active_cat:
            idx = _extract_index(info['name'])
            if idx is not None:
                state.index_map[idx] = orig_path
-    
-    # Add committed images from disk
-    cat_path = os.path.join(state.output_dir, state.active_cat)
-    if os.path.exists(cat_path):
-        for filename in os.listdir(cat_path):
-            if filename.startswith(state.active_cat):
-                idx = _extract_index(filename)
-                if idx is not None and idx not in state.index_map:
-                    state.index_map[idx] = os.path.join(cat_path, filename)
-    
+
+    # Lazy disk scan: only rescan when output_dir+category changes or forced
+    disk_scan_key = f"{state.output_dir}:{state.active_cat}"
+    cache_valid = state._last_disk_scan_key == disk_scan_key
+    if not cache_valid or force_disk_scan:
+        state._last_disk_scan_key = disk_scan_key
+        state._disk_index_map.clear()
+        cat_path = os.path.join(state.output_dir, state.active_cat)
+        if os.path.exists(cat_path):
+            for filename in os.listdir(cat_path):
+                if filename.startswith(state.active_cat):
+                    idx = _extract_index(filename)
+                    if idx is not None:
+                        state._disk_index_map[idx] = os.path.join(cat_path, filename)
+
+    # Merge disk results into index_map (staged takes precedence)
+    for idx, path in state._disk_index_map.items():
+        if idx not in state.index_map:
+            state.index_map[idx] = path
+
    # Build pairing mode index map (both categories)
    state.pair_index_map.clear()
-    
+
    for orig_path, info in state.staged_data.items():
        idx = _extract_index(info['name'])
        if idx is None:
            continue
-        
+
        if idx not in state.pair_index_map:
            state.pair_index_map[idx] = {"main": None, "adj": None}
-        
+
        # Check if this is from main or adjacent category
        if info['cat'] == state.pair_main_category:
            state.pair_index_map[idx]["main"] = orig_path
@@ -543,13 +587,15 @@ def action_tag(img_path: str, manual_idx: Optional[int] = None):
        state.undo_stack.pop(0)
    
    SorterEngine.stage_image(img_path, state.active_cat, name)
-    
+
    # Only auto-increment if we used the default next_index (not manual)
    if manual_idx is None:
        state.next_index = idx + 1
-    
+
    refresh_staged_info()
-    refresh_ui()
+    # Use targeted refresh - sidebar index grid needs update, but skip heavy rebuild
+    render_sidebar()  # Update index grid to show new tag
+    refresh_grid_only()  # Just grid + pagination stats

 def action_untag(img_path: str):
    """Remove staging from an image."""
@@ -568,7 +614,9 @@ def action_untag(img_path: str):
    
    SorterEngine.clear_staged_item(img_path)
    refresh_staged_info()
-    refresh_ui()
+    # Use targeted refresh - sidebar index grid needs update
+    render_sidebar()  # Update index grid to show removed tag
+    refresh_grid_only()  # Just grid + pagination stats

 def action_delete(img_path: str):
    """Delete image to trash."""
@@ -632,9 +680,11 @@ def action_apply_page():
    if not batch:
        ui.notify("No images on current page", type='warning')
        return
-    
+
    SorterEngine.commit_batch(batch, state.output_dir, state.cleanup_mode, state.batch_mode)
    ui.notify(f"Page processed ({state.batch_mode})", type='positive')
+    # Force disk rescan since files were committed
+    state._last_disk_scan_key = ""
    load_images()

 async def action_apply_global():
@@ -648,6 +698,8 @@ async def action_apply_global():
        state.source_dir,
        state.profile_name
    )
+    # Force disk rescan since files were committed
+    state._last_disk_scan_key = ""
    load_images()
    ui.notify("Global apply complete!", type='positive')

@@ -991,42 +1043,51 @@ def render_gallery():
            for img_path in batch:
                render_image_card(img_path)

+def _set_hovered(path: str):
+    """Helper for hover tracking - used with partial for memory efficiency."""
+    state.hovered_image = path
+
+def _clear_hovered():
+    """Helper for hover tracking - used with partial for memory efficiency."""
+    state.hovered_image = None
+
 def render_image_card(img_path: str):
-    """Render individual image card."""
+    """Render individual image card.
+    Uses functools.partial instead of lambdas for better memory efficiency."""
    is_staged = img_path in state.staged_data
    thumb_size = 800
-    
+
    card = ui.card().classes('p-2 bg-gray-900 border border-gray-700 no-shadow hover:border-green-500 transition-colors')
-    
+
    with card:
-        # Track hover for keyboard shortcuts
-        card.on('mouseenter', lambda p=img_path: setattr(state, 'hovered_image', p))
-        card.on('mouseleave', lambda: setattr(state, 'hovered_image', None))
-        
+        # Track hover for keyboard shortcuts - using partial instead of lambda
+        card.on('mouseenter', partial(_set_hovered, img_path))
+        card.on('mouseleave', _clear_hovered)
+
        # Header with filename and actions
        with ui.row().classes('w-full justify-between no-wrap mb-1'):
            ui.label(os.path.basename(img_path)[:15]).classes('text-xs text-gray-400 truncate')
            with ui.row().classes('gap-0'):
                ui.button(
                    icon='zoom_in',
-                    on_click=lambda p=img_path: open_zoom_dialog(p)
+                    on_click=partial(open_zoom_dialog, img_path)
                ).props('flat size=sm dense color=white')
                ui.button(
                    icon='delete',
-                    on_click=lambda p=img_path: action_delete(p)
+                    on_click=partial(action_delete, img_path)
                ).props('flat size=sm dense color=red')
-        
+
        # Thumbnail with double-click to tag
        img = ui.image(f"/thumbnail?path={img_path}&size={thumb_size}&q={state.preview_quality}") \
            .classes('w-full h-64 bg-black rounded cursor-pointer') \
            .props('fit=contain no-spinner')
-        
-        # Double-click to tag (if not already tagged)
+
+        # Double-click to tag (if not already tagged) - using partial
        if not is_staged:
-            img.on('dblclick', lambda p=img_path: action_tag(p))
+            img.on('dblclick', partial(action_tag, img_path))
        else:
-            img.on('dblclick', lambda p=img_path: action_untag(p))
-        
+            img.on('dblclick', partial(action_untag, img_path))
+
        # Tagging UI
        if is_staged:
            info = state.staged_data[img_path]
@@ -1035,12 +1096,13 @@ def render_image_card(img_path: str):
            ui.label(f"🏷️ {info['cat']}").classes('text-center text-green-400 text-xs py-1 w-full')
            ui.button(
                f"Untag (#{idx_str})",
-                on_click=lambda p=img_path: action_untag(p)
+                on_click=partial(action_untag, img_path)
            ).props('flat color=grey-5 dense').classes('w-full')
        else:
            with ui.row().classes('w-full no-wrap mt-2 gap-1'):
                local_idx = ui.number(value=state.next_index, precision=0) \
                    .props('dense dark outlined').classes('w-1/3')
+                # Note: This one still needs lambda due to dynamic local_idx.value access
                ui.button(
                    'Tag',
                    on_click=lambda p=img_path, i=local_idx: action_tag(p, int(i.value))
@@ -1108,8 +1170,9 @@ def render_pagination():
            start = max(0, state.page - 2)
            end = min(state.total_pages, state.page + 3)
            
+            green_dots = state.get_green_dots()  # Lazy calculation
            for p in range(start, end):
-                dot = " 🟢" if p in state.green_dots else ""
+                dot = " 🟢" if p in green_dots else ""
                color = "white" if p == state.page else "grey-6"
                ui.button(
                    f"{p+1}{dot}",
@@ -1131,6 +1194,12 @@ def refresh_ui():
    render_pagination()
    render_gallery()

+def refresh_grid_only():
+    """Refresh only the grid and pagination stats - skip sidebar rebuild.
+    Use for tag/untag operations where sidebar doesn't need full rebuild."""
+    render_pagination()
+    render_gallery()
+
 def handle_keyboard(e):
    """Handle keyboard navigation and shortcuts (fallback)."""
    if not e.action.keydown: