Files
sorting-sorted/engine.py
2026-01-20 13:11:48 +01:00

623 lines
26 KiB
Python

import os
import shutil
import sqlite3
from PIL import Image
from io import BytesIO
from contextlib import contextmanager
from functools import lru_cache
import hashlib
class SorterEngine:
DB_PATH = "/app/sorter_database.db"
# Connection pool for better performance
_connection_pool = {}
# --- CONNECTION MANAGEMENT (Performance Optimization) ---
@staticmethod
@contextmanager
def get_connection():
"""Context manager for efficient DB connections with WAL mode."""
# Ensure directory exists
db_dir = os.path.dirname(SorterEngine.DB_PATH)
if db_dir and not os.path.exists(db_dir):
os.makedirs(db_dir, exist_ok=True)
conn = sqlite3.connect(SorterEngine.DB_PATH, check_same_thread=False)
conn.execute("PRAGMA journal_mode=WAL") # Write-Ahead Logging for speed
conn.execute("PRAGMA synchronous=NORMAL") # Faster writes
conn.execute("PRAGMA cache_size=2000") # ~2MB cache
try:
yield conn
finally:
conn.close()
# --- 1. DATABASE INITIALIZATION ---
@staticmethod
def init_db():
"""Initializes tables, including folder-based tag persistence."""
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
# Existing tables...
cursor.execute('''CREATE TABLE IF NOT EXISTS profiles
(name TEXT PRIMARY KEY, tab1_target TEXT, tab2_target TEXT, tab2_control TEXT,
tab4_source TEXT, tab4_out TEXT, mode TEXT, tab5_source TEXT, tab5_out TEXT)''')
cursor.execute('''CREATE TABLE IF NOT EXISTS folder_ids (path TEXT PRIMARY KEY, folder_id INTEGER)''')
cursor.execute('''CREATE TABLE IF NOT EXISTS categories (name TEXT PRIMARY KEY)''')
cursor.execute('''CREATE TABLE IF NOT EXISTS staging_area
(original_path TEXT PRIMARY KEY, target_category TEXT, new_name TEXT, is_marked INTEGER DEFAULT 0)''')
# --- HISTORY TABLE ---
cursor.execute('''CREATE TABLE IF NOT EXISTS processed_log
(source_path TEXT PRIMARY KEY, category TEXT, action_type TEXT)''')
# --- NEW: FOLDER-BASED TAG PERSISTENCE ---
# Maps folder_hash -> original staging data for restoration
cursor.execute('''CREATE TABLE IF NOT EXISTS folder_tags
(folder_hash TEXT NOT NULL,
relative_path TEXT NOT NULL,
target_category TEXT,
new_name TEXT,
is_marked INTEGER DEFAULT 0,
PRIMARY KEY (folder_hash, relative_path))''')
# Index for faster folder lookups
cursor.execute('''CREATE INDEX IF NOT EXISTS idx_folder_tags_hash
ON folder_tags(folder_hash)''')
# Seed categories if empty
cursor.execute("SELECT COUNT(*) FROM categories")
if cursor.fetchone()[0] == 0:
cursor.executemany("INSERT OR IGNORE INTO categories VALUES (?)",
[("_TRASH",), ("Default",), ("Action",), ("Solo",)])
conn.commit()
# --- FOLDER HASH UTILITY ---
@staticmethod
def _get_folder_hash(folder_path: str) -> str:
"""Generate consistent hash for a folder path."""
# Use folder name + parent for uniqueness but allow same-named folders
normalized = os.path.normpath(folder_path).lower()
return hashlib.md5(normalized.encode()).hexdigest()[:16]
# --- 2. PROFILE & PATH MANAGEMENT ---
@staticmethod
def save_tab_paths(profile_name, t1_t=None, t2_t=None, t2_c=None, t4_s=None, t4_o=None, mode=None, t5_s=None, t5_o=None):
"""Updates specific tab paths in the database while preserving others."""
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT * FROM profiles WHERE name = ?", (profile_name,))
row = cursor.fetchone()
if not row:
row = (profile_name, "/storage", "/storage", "/storage", "/storage", "/storage", "id", "/storage", "/storage")
new_values = (
profile_name,
t1_t if t1_t is not None else row[1],
t2_t if t2_t is not None else row[2],
t2_c if t2_c is not None else row[3],
t4_s if t4_s is not None else row[4],
t4_o if t4_o is not None else row[5],
mode if mode is not None else row[6],
t5_s if t5_s is not None else row[7],
t5_o if t5_o is not None else row[8]
)
cursor.execute("INSERT OR REPLACE INTO profiles VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)", new_values)
conn.commit()
@staticmethod
def load_batch_parallel(image_paths, quality):
"""Multithreaded loader with optimized thread count."""
import concurrent.futures
results = {}
def process_one(path):
return path, SorterEngine.compress_for_web(path, quality)
# Optimal workers: min of CPU cores or paths count
max_workers = min(8, len(image_paths)) if image_paths else 1
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_path = {executor.submit(process_one, p): p for p in image_paths}
for future in concurrent.futures.as_completed(future_to_path):
try:
path, data = future.result()
results[path] = data
except Exception:
pass
return results
@staticmethod
def load_profiles():
"""Loads all workspace presets."""
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT * FROM profiles")
rows = cursor.fetchall()
return {r[0]: {
"tab1_target": r[1], "tab2_target": r[2], "tab2_control": r[3],
"tab4_source": r[4], "tab4_out": r[5], "mode": r[6],
"tab5_source": r[7], "tab5_out": r[8]
} for r in rows}
# --- 3. CATEGORY MANAGEMENT (Sorted A-Z) ---
@staticmethod
def get_categories():
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT name FROM categories ORDER BY name COLLATE NOCASE ASC")
return [r[0] for r in cursor.fetchall()]
@staticmethod
def add_category(name):
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("INSERT OR IGNORE INTO categories VALUES (?)", (name,))
conn.commit()
@staticmethod
def rename_category(old_name, new_name, output_base_path=None):
"""Renames category in DB and optionally renames physical folder."""
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
try:
cursor.execute("UPDATE categories SET name = ? WHERE name = ?", (new_name, old_name))
cursor.execute("UPDATE staging_area SET target_category = ? WHERE target_category = ?", (new_name, old_name))
cursor.execute("UPDATE folder_tags SET target_category = ? WHERE target_category = ?", (new_name, old_name))
if output_base_path:
old_path = os.path.join(output_base_path, old_name)
new_path = os.path.join(output_base_path, new_name)
if os.path.exists(old_path) and not os.path.exists(new_path):
os.rename(old_path, new_path)
conn.commit()
except sqlite3.IntegrityError:
pass
@staticmethod
def sync_categories_from_disk(output_path):
"""Scans output directory and adds subfolders as DB categories."""
if not output_path or not os.path.exists(output_path):
return 0
existing_folders = [d for d in os.listdir(output_path)
if os.path.isdir(os.path.join(output_path, d)) and not d.startswith(".")]
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
added = 0
for folder in existing_folders:
cursor.execute("INSERT OR IGNORE INTO categories VALUES (?)", (folder,))
if cursor.rowcount > 0:
added += 1
conn.commit()
return added
# --- 4. IMAGE & ID OPERATIONS ---
@staticmethod
def get_images(path, recursive=False):
"""Optimized image scanner with generator-based sorting."""
exts = {'.jpg', '.jpeg', '.png', '.webp', '.bmp', '.tiff'} # Set for O(1) lookup
if not path or not os.path.exists(path):
return []
image_list = []
if recursive:
for root, dirs, files in os.walk(path):
# Skip trash folders - modify dirs in-place for efficiency
dirs[:] = [d for d in dirs if "_DELETED" not in d]
for f in files:
if os.path.splitext(f.lower())[1] in exts:
image_list.append(os.path.join(root, f))
else:
try:
with os.scandir(path) as entries:
for entry in entries:
if entry.is_file() and os.path.splitext(entry.name.lower())[1] in exts:
image_list.append(entry.path)
except PermissionError:
pass
return sorted(image_list)
@staticmethod
def get_id_mapping(path):
"""Maps idXXX prefixes for Tab 2 collision handling."""
mapping = {}
images = SorterEngine.get_images(path, recursive=False)
for f in images:
fname = os.path.basename(f)
if fname.startswith("id") and "_" in fname:
prefix = fname.split('_')[0]
if prefix not in mapping:
mapping[prefix] = []
mapping[prefix].append(fname)
return mapping
@staticmethod
def get_max_id_number(target_path):
max_id = 0
if not target_path or not os.path.exists(target_path):
return 0
try:
with os.scandir(target_path) as entries:
for entry in entries:
if entry.is_file() and entry.name.startswith("id") and "_" in entry.name:
try:
num = int(entry.name[2:].split('_')[0])
if num > max_id:
max_id = num
except:
continue
except PermissionError:
pass
return max_id
@staticmethod
def get_folder_id(source_path):
"""Retrieves or generates a persistent ID for a specific folder."""
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT folder_id FROM folder_ids WHERE path = ?", (source_path,))
result = cursor.fetchone()
if result:
return result[0]
else:
cursor.execute("SELECT MAX(folder_id) FROM folder_ids")
row = cursor.fetchone()
fid = (row[0] + 1) if row and row[0] else 1
cursor.execute("INSERT INTO folder_ids VALUES (?, ?)", (source_path, fid))
conn.commit()
return fid
# --- 5. GALLERY STAGING & DELETION (TAB 5) ---
@staticmethod
def delete_to_trash(file_path):
"""Moves a file to a local _DELETED subfolder for undo support."""
if not os.path.exists(file_path):
return None
trash_dir = os.path.join(os.path.dirname(file_path), "_DELETED")
os.makedirs(trash_dir, exist_ok=True)
dest = os.path.join(trash_dir, os.path.basename(file_path))
shutil.move(file_path, dest)
return dest
@staticmethod
def stage_image(original_path, category, new_name, source_root=None):
"""Records a pending rename/move in the database and folder cache."""
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("INSERT OR REPLACE INTO staging_area VALUES (?, ?, ?, 1)",
(original_path, category, new_name))
# Also save to folder-based persistence if source_root provided
if source_root:
folder_hash = SorterEngine._get_folder_hash(source_root)
relative_path = os.path.relpath(original_path, source_root)
cursor.execute("""INSERT OR REPLACE INTO folder_tags
VALUES (?, ?, ?, ?, 1)""",
(folder_hash, relative_path, category, new_name))
conn.commit()
@staticmethod
def clear_staged_item(original_path, source_root=None):
"""Removes an item from the pending staging area."""
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("DELETE FROM staging_area WHERE original_path = ?", (original_path,))
# Also remove from folder cache
if source_root:
folder_hash = SorterEngine._get_folder_hash(source_root)
relative_path = os.path.relpath(original_path, source_root)
cursor.execute("DELETE FROM folder_tags WHERE folder_hash = ? AND relative_path = ?",
(folder_hash, relative_path))
conn.commit()
@staticmethod
def get_staged_data():
"""Retrieves current tagged/staged images."""
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT * FROM staging_area")
rows = cursor.fetchall()
return {r[0]: {"cat": r[1], "name": r[2], "marked": r[3]} for r in rows}
# --- NEW: FOLDER-BASED TAG RESTORATION ---
@staticmethod
def restore_folder_tags(source_root):
"""Restores tags from folder cache when reloading a directory."""
folder_hash = SorterEngine._get_folder_hash(source_root)
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
# Get saved tags for this folder
cursor.execute("""SELECT relative_path, target_category, new_name, is_marked
FROM folder_tags WHERE folder_hash = ?""", (folder_hash,))
saved_tags = cursor.fetchall()
restored_count = 0
for rel_path, category, new_name, is_marked in saved_tags:
# Reconstruct absolute path
abs_path = os.path.normpath(os.path.join(source_root, rel_path))
# Only restore if file still exists
if os.path.exists(abs_path):
cursor.execute("""INSERT OR REPLACE INTO staging_area
VALUES (?, ?, ?, ?)""", (abs_path, category, new_name, is_marked))
restored_count += 1
conn.commit()
return restored_count
@staticmethod
def save_all_tags_to_folder_cache(source_root):
"""Saves all current staging data to folder-based cache."""
folder_hash = SorterEngine._get_folder_hash(source_root)
staged = SorterEngine.get_staged_data()
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
# Only save tags for files within this source_root
for abs_path, info in staged.items():
if abs_path.startswith(source_root):
relative_path = os.path.relpath(abs_path, source_root)
cursor.execute("""INSERT OR REPLACE INTO folder_tags
VALUES (?, ?, ?, ?, ?)""",
(folder_hash, relative_path, info['cat'], info['name'], info['marked']))
conn.commit()
@staticmethod
def clear_folder_cache(source_root):
"""Clears saved tags for a specific folder."""
folder_hash = SorterEngine._get_folder_hash(source_root)
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("DELETE FROM folder_tags WHERE folder_hash = ?", (folder_hash,))
conn.commit()
@staticmethod
def commit_global(output_root, cleanup_mode, operation="Copy", source_root=None):
"""Commits ALL staged files and fixes permissions."""
data = SorterEngine.get_staged_data()
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
if not os.path.exists(output_root):
os.makedirs(output_root, exist_ok=True)
# 1. Process all Staged Items
for old_p, info in data.items():
if os.path.exists(old_p):
final_dst = os.path.join(output_root, info['name'])
if os.path.exists(final_dst):
root, ext = os.path.splitext(info['name'])
c = 1
while os.path.exists(final_dst):
final_dst = os.path.join(output_root, f"{root}_{c}{ext}")
c += 1
if operation == "Copy":
shutil.copy2(old_p, final_dst)
else:
shutil.move(old_p, final_dst)
SorterEngine.fix_permissions(final_dst)
cursor.execute("INSERT OR REPLACE INTO processed_log VALUES (?, ?, ?)",
(old_p, info['cat'], operation))
# 2. Global Cleanup
if cleanup_mode != "Keep" and source_root:
all_imgs = SorterEngine.get_images(source_root, recursive=True)
for img_p in all_imgs:
if img_p not in data:
if cleanup_mode == "Move to Unused":
unused_dir = os.path.join(source_root, "unused")
os.makedirs(unused_dir, exist_ok=True)
dest_unused = os.path.join(unused_dir, os.path.basename(img_p))
shutil.move(img_p, dest_unused)
SorterEngine.fix_permissions(dest_unused)
elif cleanup_mode == "Delete":
os.remove(img_p)
# 3. Clear staging area AND folder cache after successful commit
cursor.execute("DELETE FROM staging_area")
if source_root:
folder_hash = SorterEngine._get_folder_hash(source_root)
cursor.execute("DELETE FROM folder_tags WHERE folder_hash = ?", (folder_hash,))
conn.commit()
# --- 6. CORE UTILITIES ---
@staticmethod
def harmonize_names(t_p, c_p):
"""Forces the 'control' file to match the 'target' file's name."""
if not os.path.exists(t_p) or not os.path.exists(c_p):
return c_p
t_name = os.path.basename(t_p)
t_root, t_ext = os.path.splitext(t_name)
c_ext = os.path.splitext(c_p)[1]
new_c_name = f"{t_root}{c_ext}"
new_c_p = os.path.join(os.path.dirname(c_p), new_c_name)
if os.path.exists(new_c_p) and c_p != new_c_p:
new_c_p = os.path.join(os.path.dirname(c_p), f"{t_root}_alt{c_ext}")
os.rename(c_p, new_c_p)
return new_c_p
@staticmethod
def re_id_file(old_path, new_id_prefix):
"""Changes the idXXX_ prefix to resolve collisions."""
dir_name = os.path.dirname(old_path)
old_name = os.path.basename(old_path)
name_no_id = old_name.split('_', 1)[1] if '_' in old_name else old_name
new_name = f"{new_id_prefix}{name_no_id}"
new_path = os.path.join(dir_name, new_name)
os.rename(old_path, new_path)
return new_path
@staticmethod
def move_to_unused_synced(t_p, c_p, t_root, c_root):
"""Moves a pair to 'unused' subfolders."""
t_name = os.path.basename(t_p)
t_un = os.path.join(t_root, "unused", t_name)
c_un = os.path.join(c_root, "unused", t_name)
os.makedirs(os.path.dirname(t_un), exist_ok=True)
os.makedirs(os.path.dirname(c_un), exist_ok=True)
shutil.move(t_p, t_un)
shutil.move(c_p, c_un)
return t_un, c_un
@staticmethod
def restore_from_unused(t_p, c_p, t_root, c_root):
"""Moves files back from 'unused' to main folders."""
t_name = os.path.basename(t_p)
t_dst = os.path.join(t_root, "selected_target", t_name)
c_dst = os.path.join(c_root, "selected_control", t_name)
os.makedirs(os.path.dirname(t_dst), exist_ok=True)
os.makedirs(os.path.dirname(c_dst), exist_ok=True)
shutil.move(t_p, t_dst)
shutil.move(c_p, c_dst)
return t_dst, c_dst
@staticmethod
def compress_for_web(path, quality, target_size=None):
"""Optimized image compression with SIMD hints."""
try:
with Image.open(path) as img:
# Fast mode conversion
if img.mode not in ('RGB', 'RGBA'):
img = img.convert("RGB")
# Smart Resize with LANCZOS (high quality, reasonable speed)
if target_size:
if img.width > target_size or img.height > target_size:
# Use BILINEAR for speed on large downscales, LANCZOS for quality
resampling = Image.Resampling.BILINEAR if max(img.width, img.height) > target_size * 3 else Image.Resampling.LANCZOS
img.thumbnail((target_size, target_size), resampling)
buf = BytesIO()
# WebP with speed optimization
img.save(buf, format="WEBP", quality=quality, method=4) # method=4 is faster than default 6
return buf.getvalue()
except Exception:
return None
@staticmethod
def revert_action(action):
"""Undoes move operations."""
if action['type'] == 'move' and os.path.exists(action['t_dst']):
shutil.move(action['t_dst'], action['t_src'])
elif action['type'] in ['unused', 'cat_move']:
if os.path.exists(action['t_dst']):
shutil.move(action['t_dst'], action['t_src'])
if 'c_dst' in action and os.path.exists(action['c_dst']):
shutil.move(action['c_dst'], action['c_src'])
@staticmethod
def get_processed_log():
"""Retrieves history of processed files."""
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("SELECT * FROM processed_log")
rows = cursor.fetchall()
return {r[0]: {"cat": r[1], "action": r[2]} for r in rows}
@staticmethod
def fix_permissions(path):
"""Forces file to be fully accessible."""
try:
os.chmod(path, 0o777)
except Exception:
pass
@staticmethod
def commit_batch(file_list, output_root, cleanup_mode, operation="Copy"):
"""Commits files and fixes permissions."""
data = SorterEngine.get_staged_data()
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
if not os.path.exists(output_root):
os.makedirs(output_root, exist_ok=True)
for file_path in file_list:
if not os.path.exists(file_path):
continue
# --- CASE A: Tagged ---
if file_path in data and data[file_path]['marked']:
info = data[file_path]
final_dst = os.path.join(output_root, info['name'])
# Collision Check
if os.path.exists(final_dst):
root, ext = os.path.splitext(info['name'])
c = 1
while os.path.exists(final_dst):
final_dst = os.path.join(output_root, f"{root}_{c}{ext}")
c += 1
if operation == "Copy":
shutil.copy2(file_path, final_dst)
else:
shutil.move(file_path, final_dst)
SorterEngine.fix_permissions(final_dst)
cursor.execute("DELETE FROM staging_area WHERE original_path = ?", (file_path,))
cursor.execute("INSERT OR REPLACE INTO processed_log VALUES (?, ?, ?)",
(file_path, info['cat'], operation))
# --- CASE B: Cleanup ---
elif cleanup_mode != "Keep":
if cleanup_mode == "Move to Unused":
unused_dir = os.path.join(os.path.dirname(file_path), "unused")
os.makedirs(unused_dir, exist_ok=True)
dest_unused = os.path.join(unused_dir, os.path.basename(file_path))
shutil.move(file_path, dest_unused)
SorterEngine.fix_permissions(dest_unused)
elif cleanup_mode == "Delete":
os.remove(file_path)
conn.commit()
@staticmethod
def delete_category(name):
"""Deletes a category and clears any staged tags associated with it."""
with SorterEngine.get_connection() as conn:
cursor = conn.cursor()
cursor.execute("DELETE FROM categories WHERE name = ?", (name,))
cursor.execute("DELETE FROM staging_area WHERE target_category = ?", (name,))
cursor.execute("DELETE FROM folder_tags WHERE target_category = ?", (name,))
conn.commit()
@staticmethod
def get_tagged_page_indices(all_images, page_size):
staged = SorterEngine.get_staged_data()
if not staged:
return set()
tagged_pages = set()
staged_keys = set(staged.keys())
for idx, img_path in enumerate(all_images):
if img_path in staged_keys:
tagged_pages.add(idx // page_size)
return tagged_pages