This commit is contained in:
2026-01-28 15:42:14 +01:00
parent 7580036c9d
commit bf1134e47f
5 changed files with 269 additions and 116 deletions

61
CLAUDE.md Normal file
View File

@@ -0,0 +1,61 @@
# CLAUDE.md
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
## Project Overview
Turbo Sorter Pro v12.5 - A dual-interface image organization tool combining Streamlit (admin dashboard) and NiceGUI (gallery interface) for managing large image collections through time-sync matching, ID collision resolution, category-based sorting, and gallery tagging with pairing capabilities.
## Commands
```bash
# Install dependencies
pip install -r requirements.txt
# Run Streamlit dashboard (port 8501)
streamlit run app.py --server.port=8501 --server.address=0.0.0.0
# Run NiceGUI gallery (port 8080)
python3 gallery_app.py
# Both services (container startup)
./start.sh
# Syntax check all Python files
python3 -m py_compile *.py
```
## Architecture
### Dual-Framework Design
- **Streamlit (app.py, port 8501)**: Administrative dashboard with 5 modular tabs for management workflows
- **NiceGUI (gallery_app.py, port 8080)**: Modern gallery interface for image tagging and pairing operations
- **Shared Backend**: Both UIs use `SorterEngine` (engine.py) and the same SQLite database
### Core Components
| File | Purpose |
|------|---------|
| `engine.py` | Static `SorterEngine` class - all DB operations, file handling, image compression |
| `gallery_app.py` | NiceGUI gallery with `AppState` class for centralized state management |
| `app.py` | Streamlit entry point, loads tab modules |
| `tab_*.py` | Independent tab modules for each workflow |
### Database
SQLite at `/app/sorter_database.db` with tables: profiles, folder_ids, categories, staging_area, processed_log, folder_tags, profile_categories, pairing_settings.
### Tab Workflows
1. **Time-Sync Discovery** - Match images by timestamp across folders
2. **ID Review** - Resolve ID collisions between target/control folders
3. **Unused Archive** - Manage rejected image pairs
4. **Category Sorter** - One-to-many categorization
5. **Gallery Staged** - Grid-based tagging with Gallery/Pairing dual modes
## Key Patterns
- **ID Format**: `id001_`, `id002_` (zero-padded 3-digit prefix)
- **Staging Pattern**: Two-phase commit (stage → commit) with undo support
- **Image Formats**: .jpg, .jpeg, .png, .webp, .bmp, .tiff
- **Compression**: WebP with ThreadPoolExecutor (8 workers)
- **Permissions**: chmod 0o777 applied to committed files
- **Default Paths**: `/storage` when not configured

Binary file not shown.

Binary file not shown.

123
engine.py
View File

@@ -1,12 +1,28 @@
import os
import shutil
import sqlite3
from contextlib import contextmanager
from PIL import Image
from io import BytesIO
class SorterEngine:
DB_PATH = "/app/sorter_database.db"
@staticmethod
@contextmanager
def get_db():
"""Context manager for database connections.
Ensures proper commit/rollback and always closes connection."""
conn = sqlite3.connect(SorterEngine.DB_PATH)
try:
yield conn
conn.commit()
except Exception:
conn.rollback()
raise
finally:
conn.close()
# --- 1. DATABASE INITIALIZATION ---
@staticmethod
def init_db():
@@ -51,7 +67,15 @@ class SorterEngine:
if cursor.fetchone()[0] == 0:
for cat in ["_TRASH", "control", "Default", "Action", "Solo"]:
cursor.execute("INSERT OR IGNORE INTO categories VALUES (?)", (cat,))
# --- PERFORMANCE INDEXES ---
# Index for staging_area queries filtered by category
cursor.execute("CREATE INDEX IF NOT EXISTS idx_staging_category ON staging_area(target_category)")
# Index for folder_tags queries filtered by profile and folder_path
cursor.execute("CREATE INDEX IF NOT EXISTS idx_folder_tags_profile ON folder_tags(profile, folder_path)")
# Index for profile_categories lookups
cursor.execute("CREATE INDEX IF NOT EXISTS idx_profile_categories ON profile_categories(profile)")
conn.commit()
conn.close()
@@ -146,42 +170,48 @@ class SorterEngine:
@staticmethod
def load_profiles():
"""Loads all workspace presets including pairing settings."""
"""Loads all workspace presets including pairing settings.
Uses LEFT JOIN to fetch all data in a single query (fixes N+1 problem)."""
conn = sqlite3.connect(SorterEngine.DB_PATH)
cursor = conn.cursor()
cursor.execute("SELECT * FROM profiles")
rows = cursor.fetchall()
# Ensure pairing_settings table exists
cursor.execute('''CREATE TABLE IF NOT EXISTS pairing_settings
(profile TEXT PRIMARY KEY,
adjacent_folder TEXT,
main_category TEXT,
adj_category TEXT,
main_output TEXT,
adj_output TEXT,
# Ensure pairing_settings table exists before JOIN
cursor.execute('''CREATE TABLE IF NOT EXISTS pairing_settings
(profile TEXT PRIMARY KEY,
adjacent_folder TEXT,
main_category TEXT,
adj_category TEXT,
main_output TEXT,
adj_output TEXT,
time_window INTEGER)''')
# Single query with LEFT JOIN - eliminates N+1 queries
cursor.execute('''
SELECT p.name, p.tab1_target, p.tab2_target, p.tab2_control,
p.tab4_source, p.tab4_out, p.mode, p.tab5_source, p.tab5_out,
ps.adjacent_folder, ps.main_category, ps.adj_category,
ps.main_output, ps.adj_output, ps.time_window
FROM profiles p
LEFT JOIN pairing_settings ps ON p.name = ps.profile
''')
rows = cursor.fetchall()
profiles = {}
for r in rows:
profile_name = r[0]
profiles[profile_name] = {
"tab1_target": r[1], "tab2_target": r[2], "tab2_control": r[3],
"tab1_target": r[1], "tab2_target": r[2], "tab2_control": r[3],
"tab4_source": r[4], "tab4_out": r[5], "mode": r[6],
"tab5_source": r[7], "tab5_out": r[8]
"tab5_source": r[7], "tab5_out": r[8],
# Pairing settings from JOIN (with defaults for NULL)
"pair_adjacent_folder": r[9] or "",
"pair_main_category": r[10] or "control",
"pair_adj_category": r[11] or "control",
"pair_main_output": r[12] or "/storage",
"pair_adj_output": r[13] or "/storage",
"pair_time_window": r[14] or 60
}
# Load pairing settings for this profile
cursor.execute("SELECT * FROM pairing_settings WHERE profile = ?", (profile_name,))
pair_row = cursor.fetchone()
if pair_row:
profiles[profile_name]["pair_adjacent_folder"] = pair_row[1] or ""
profiles[profile_name]["pair_main_category"] = pair_row[2] or "control"
profiles[profile_name]["pair_adj_category"] = pair_row[3] or "control"
profiles[profile_name]["pair_main_output"] = pair_row[4] or "/storage"
profiles[profile_name]["pair_adj_output"] = pair_row[5] or "/storage"
profiles[profile_name]["pair_time_window"] = pair_row[6] or 60
conn.close()
return profiles
@@ -354,40 +384,33 @@ class SorterEngine:
@staticmethod
def stage_image(original_path, category, new_name):
"""Records a pending rename/move in the database."""
conn = sqlite3.connect(SorterEngine.DB_PATH)
cursor = conn.cursor()
cursor.execute("INSERT OR REPLACE INTO staging_area VALUES (?, ?, ?, 1)", (original_path, category, new_name))
conn.commit()
conn.close()
with SorterEngine.get_db() as conn:
cursor = conn.cursor()
cursor.execute("INSERT OR REPLACE INTO staging_area VALUES (?, ?, ?, 1)", (original_path, category, new_name))
@staticmethod
def clear_staged_item(original_path):
"""Removes an item from the pending staging area."""
conn = sqlite3.connect(SorterEngine.DB_PATH)
cursor = conn.cursor()
cursor.execute("DELETE FROM staging_area WHERE original_path = ?", (original_path,))
conn.commit()
conn.close()
with SorterEngine.get_db() as conn:
cursor = conn.cursor()
cursor.execute("DELETE FROM staging_area WHERE original_path = ?", (original_path,))
@staticmethod
def clear_staging_area():
"""Clears all items from the staging area."""
conn = sqlite3.connect(SorterEngine.DB_PATH)
cursor = conn.cursor()
cursor.execute("DELETE FROM staging_area")
conn.commit()
conn.close()
with SorterEngine.get_db() as conn:
cursor = conn.cursor()
cursor.execute("DELETE FROM staging_area")
@staticmethod
def get_staged_data():
"""Retrieves current tagged/staged images."""
conn = sqlite3.connect(SorterEngine.DB_PATH)
cursor = conn.cursor()
cursor.execute("SELECT * FROM staging_area")
rows = cursor.fetchall()
conn.close()
# FIXED: Added "marked": r[3] to the dictionary
return {r[0]: {"cat": r[1], "name": r[2], "marked": r[3]} for r in rows}
with SorterEngine.get_db() as conn:
cursor = conn.cursor()
cursor.execute("SELECT * FROM staging_area")
rows = cursor.fetchall()
# FIXED: Added "marked": r[3] to the dictionary
return {r[0]: {"cat": r[1], "name": r[2], "marked": r[3]} for r in rows}
@staticmethod
def commit_global(output_root, cleanup_mode, operation="Copy", source_root=None, profile=None):

View File

@@ -1,7 +1,8 @@
import os
import math
import asyncio
from typing import Optional, List, Dict, Set
from typing import Optional, List, Dict, Set, Tuple
from functools import partial
from nicegui import ui, app, run
from fastapi import Response
from engine import SorterEngine
@@ -48,6 +49,12 @@ class AppState:
self.staged_data: Dict = {}
self.green_dots: Set[int] = set()
self.index_map: Dict[int, str] = {}
# Performance caches (Phase 1 optimizations)
self._cached_tagged_count: int = 0 # Cached count for get_stats()
self._green_dots_dirty: bool = True # Lazy green dots calculation
self._last_disk_scan_key: str = "" # Track output_dir + category for lazy disk scan
self._disk_index_map: Dict[int, str] = {} # Cached disk scan results
# UI Containers (populated later)
self.sidebar_container = None
@@ -59,7 +66,7 @@ class AppState:
self.pair_time_window = 60 # seconds +/- for matching
self.pair_current_idx = 0 # Current image index in pairing mode
self.pair_adjacent_folder = "" # Path to adjacent folder
self.pair_adjacent_images: List[str] = [] # Images from adjacent folder
self.pair_adjacent_data: List[Tuple[str, float]] = [] # (path, timestamp) tuples for O(1) lookup
self.pair_matches: List[str] = [] # Current matches for selected image
self.pair_selected_match = None # Currently selected match
self.pairing_container = None # UI container for pairing mode
@@ -165,11 +172,23 @@ class AppState:
return filtered[start : start + self.page_size]
def get_stats(self) -> Dict:
"""Get image statistics for display."""
"""Get image statistics for display. Uses cached tagged count."""
total = len(self.all_images)
tagged = len([img for img in self.all_images if img in self.staged_data])
tagged = self._cached_tagged_count
return {"total": total, "tagged": tagged, "untagged": total - tagged}
def get_green_dots(self) -> Set[int]:
"""Lazily calculate green dots (pages with tagged images).
Only recalculates when _green_dots_dirty is True."""
if self._green_dots_dirty:
self.green_dots.clear()
staged_keys = set(self.staged_data.keys())
for idx, img_path in enumerate(self.all_images):
if img_path in staged_keys:
self.green_dots.add(idx // self.page_size)
self._green_dots_dirty = False
return self.green_dots
state = AppState()
# ==========================================
@@ -237,36 +256,46 @@ def get_file_timestamp(filepath: str) -> Optional[float]:
return None
def load_adjacent_folder():
"""Load images from adjacent folder for pairing, excluding main folder."""
"""Load images from adjacent folder for pairing, excluding main folder.
Caches timestamps at load time to avoid repeated syscalls during navigation."""
if not state.pair_adjacent_folder or not os.path.exists(state.pair_adjacent_folder):
state.pair_adjacent_images = []
state.pair_adjacent_data = []
ui.notify("Adjacent folder path is empty or doesn't exist", type='warning')
return
# Exclude the main source folder to avoid duplicates
exclude = [state.source_dir] if state.source_dir else []
state.pair_adjacent_images = SorterEngine.get_images(
state.pair_adjacent_folder,
recursive=True,
images = SorterEngine.get_images(
state.pair_adjacent_folder,
recursive=True,
exclude_paths=exclude
)
ui.notify(f"Loaded {len(state.pair_adjacent_images)} images from adjacent folder", type='info')
# Cache timestamps at load time (one-time cost instead of per-navigation)
state.pair_adjacent_data = []
for img_path in images:
ts = get_file_timestamp(img_path)
if ts is not None:
state.pair_adjacent_data.append((img_path, ts))
ui.notify(f"Loaded {len(state.pair_adjacent_data)} images from adjacent folder", type='info')
def find_time_matches(source_image: str) -> List[str]:
"""Find images in adjacent folder within time window of source image."""
"""Find images in adjacent folder within time window of source image.
Uses cached timestamps from pair_adjacent_data for O(n) without syscalls."""
source_time = get_file_timestamp(source_image)
if source_time is None:
return []
window = state.pair_time_window
matches = []
for adj_image in state.pair_adjacent_images:
adj_time = get_file_timestamp(adj_image)
if adj_time is not None:
time_diff = abs(source_time - adj_time)
if time_diff <= state.pair_time_window:
matches.append((adj_image, time_diff))
# Use pre-cached timestamps - no syscalls needed
for adj_path, adj_time in state.pair_adjacent_data:
time_diff = abs(source_time - adj_time)
if time_diff <= window:
matches.append((adj_path, time_diff))
# Sort by time difference (closest first)
matches.sort(key=lambda x: x[1])
return [m[0] for m in matches]
@@ -459,47 +488,62 @@ def select_match(match_path: str):
state.pair_selected_match = match_path
render_pairing_view()
def refresh_staged_info():
"""Update staged data and index maps."""
def refresh_staged_info(force_disk_scan: bool = False):
"""Update staged data and index maps.
Args:
force_disk_scan: If True, rescan disk even if category hasn't changed.
Set this after APPLY operations that modify files.
"""
state.staged_data = SorterEngine.get_staged_data()
# Update green dots (pages with staged images)
state.green_dots.clear()
staged_keys = set(state.staged_data.keys())
for idx, img_path in enumerate(state.all_images):
if img_path in staged_keys:
state.green_dots.add(idx // state.page_size)
# Update cached tagged count (O(n) but simpler than set intersection)
state._cached_tagged_count = sum(1 for img in state.all_images if img in staged_keys)
# Mark green dots as dirty (lazy calculation)
state._green_dots_dirty = True
# Build index map for active category (gallery mode)
state.index_map.clear()
# Add staged images
for orig_path, info in state.staged_data.items():
if info['cat'] == state.active_cat:
idx = _extract_index(info['name'])
if idx is not None:
state.index_map[idx] = orig_path
# Add committed images from disk
cat_path = os.path.join(state.output_dir, state.active_cat)
if os.path.exists(cat_path):
for filename in os.listdir(cat_path):
if filename.startswith(state.active_cat):
idx = _extract_index(filename)
if idx is not None and idx not in state.index_map:
state.index_map[idx] = os.path.join(cat_path, filename)
# Lazy disk scan: only rescan when output_dir+category changes or forced
disk_scan_key = f"{state.output_dir}:{state.active_cat}"
cache_valid = state._last_disk_scan_key == disk_scan_key
if not cache_valid or force_disk_scan:
state._last_disk_scan_key = disk_scan_key
state._disk_index_map.clear()
cat_path = os.path.join(state.output_dir, state.active_cat)
if os.path.exists(cat_path):
for filename in os.listdir(cat_path):
if filename.startswith(state.active_cat):
idx = _extract_index(filename)
if idx is not None:
state._disk_index_map[idx] = os.path.join(cat_path, filename)
# Merge disk results into index_map (staged takes precedence)
for idx, path in state._disk_index_map.items():
if idx not in state.index_map:
state.index_map[idx] = path
# Build pairing mode index map (both categories)
state.pair_index_map.clear()
for orig_path, info in state.staged_data.items():
idx = _extract_index(info['name'])
if idx is None:
continue
if idx not in state.pair_index_map:
state.pair_index_map[idx] = {"main": None, "adj": None}
# Check if this is from main or adjacent category
if info['cat'] == state.pair_main_category:
state.pair_index_map[idx]["main"] = orig_path
@@ -543,13 +587,15 @@ def action_tag(img_path: str, manual_idx: Optional[int] = None):
state.undo_stack.pop(0)
SorterEngine.stage_image(img_path, state.active_cat, name)
# Only auto-increment if we used the default next_index (not manual)
if manual_idx is None:
state.next_index = idx + 1
refresh_staged_info()
refresh_ui()
# Use targeted refresh - sidebar index grid needs update, but skip heavy rebuild
render_sidebar() # Update index grid to show new tag
refresh_grid_only() # Just grid + pagination stats
def action_untag(img_path: str):
"""Remove staging from an image."""
@@ -568,7 +614,9 @@ def action_untag(img_path: str):
SorterEngine.clear_staged_item(img_path)
refresh_staged_info()
refresh_ui()
# Use targeted refresh - sidebar index grid needs update
render_sidebar() # Update index grid to show removed tag
refresh_grid_only() # Just grid + pagination stats
def action_delete(img_path: str):
"""Delete image to trash."""
@@ -632,9 +680,11 @@ def action_apply_page():
if not batch:
ui.notify("No images on current page", type='warning')
return
SorterEngine.commit_batch(batch, state.output_dir, state.cleanup_mode, state.batch_mode)
ui.notify(f"Page processed ({state.batch_mode})", type='positive')
# Force disk rescan since files were committed
state._last_disk_scan_key = ""
load_images()
async def action_apply_global():
@@ -648,6 +698,8 @@ async def action_apply_global():
state.source_dir,
state.profile_name
)
# Force disk rescan since files were committed
state._last_disk_scan_key = ""
load_images()
ui.notify("Global apply complete!", type='positive')
@@ -991,42 +1043,51 @@ def render_gallery():
for img_path in batch:
render_image_card(img_path)
def _set_hovered(path: str):
"""Helper for hover tracking - used with partial for memory efficiency."""
state.hovered_image = path
def _clear_hovered():
"""Helper for hover tracking - used with partial for memory efficiency."""
state.hovered_image = None
def render_image_card(img_path: str):
"""Render individual image card."""
"""Render individual image card.
Uses functools.partial instead of lambdas for better memory efficiency."""
is_staged = img_path in state.staged_data
thumb_size = 800
card = ui.card().classes('p-2 bg-gray-900 border border-gray-700 no-shadow hover:border-green-500 transition-colors')
with card:
# Track hover for keyboard shortcuts
card.on('mouseenter', lambda p=img_path: setattr(state, 'hovered_image', p))
card.on('mouseleave', lambda: setattr(state, 'hovered_image', None))
# Track hover for keyboard shortcuts - using partial instead of lambda
card.on('mouseenter', partial(_set_hovered, img_path))
card.on('mouseleave', _clear_hovered)
# Header with filename and actions
with ui.row().classes('w-full justify-between no-wrap mb-1'):
ui.label(os.path.basename(img_path)[:15]).classes('text-xs text-gray-400 truncate')
with ui.row().classes('gap-0'):
ui.button(
icon='zoom_in',
on_click=lambda p=img_path: open_zoom_dialog(p)
on_click=partial(open_zoom_dialog, img_path)
).props('flat size=sm dense color=white')
ui.button(
icon='delete',
on_click=lambda p=img_path: action_delete(p)
on_click=partial(action_delete, img_path)
).props('flat size=sm dense color=red')
# Thumbnail with double-click to tag
img = ui.image(f"/thumbnail?path={img_path}&size={thumb_size}&q={state.preview_quality}") \
.classes('w-full h-64 bg-black rounded cursor-pointer') \
.props('fit=contain no-spinner')
# Double-click to tag (if not already tagged)
# Double-click to tag (if not already tagged) - using partial
if not is_staged:
img.on('dblclick', lambda p=img_path: action_tag(p))
img.on('dblclick', partial(action_tag, img_path))
else:
img.on('dblclick', lambda p=img_path: action_untag(p))
img.on('dblclick', partial(action_untag, img_path))
# Tagging UI
if is_staged:
info = state.staged_data[img_path]
@@ -1035,12 +1096,13 @@ def render_image_card(img_path: str):
ui.label(f"🏷️ {info['cat']}").classes('text-center text-green-400 text-xs py-1 w-full')
ui.button(
f"Untag (#{idx_str})",
on_click=lambda p=img_path: action_untag(p)
on_click=partial(action_untag, img_path)
).props('flat color=grey-5 dense').classes('w-full')
else:
with ui.row().classes('w-full no-wrap mt-2 gap-1'):
local_idx = ui.number(value=state.next_index, precision=0) \
.props('dense dark outlined').classes('w-1/3')
# Note: This one still needs lambda due to dynamic local_idx.value access
ui.button(
'Tag',
on_click=lambda p=img_path, i=local_idx: action_tag(p, int(i.value))
@@ -1108,8 +1170,9 @@ def render_pagination():
start = max(0, state.page - 2)
end = min(state.total_pages, state.page + 3)
green_dots = state.get_green_dots() # Lazy calculation
for p in range(start, end):
dot = " 🟢" if p in state.green_dots else ""
dot = " 🟢" if p in green_dots else ""
color = "white" if p == state.page else "grey-6"
ui.button(
f"{p+1}{dot}",
@@ -1131,6 +1194,12 @@ def refresh_ui():
render_pagination()
render_gallery()
def refresh_grid_only():
"""Refresh only the grid and pagination stats - skip sidebar rebuild.
Use for tag/untag operations where sidebar doesn't need full rebuild."""
render_pagination()
render_gallery()
def handle_keyboard(e):
"""Handle keyboard navigation and shortcuts (fallback)."""
if not e.action.keydown: