Files
8-cut/tools/migrate_dataset_json.py
T
2026-04-07 14:09:08 +02:00

99 lines
3.0 KiB
Python

#!/usr/bin/env python3
"""One-shot migration: generate dataset.json files from ~/.8cut.db history.
For each export folder that has records in the database, writes (or merges
into) a dataset.json with path (relative to the folder) and label per clip.
Usage:
python tools/migrate_dataset_json.py [--dry-run]
"""
import argparse
import json
import os
import sqlite3
from collections import defaultdict
from pathlib import Path
DB_PATH = Path.home() / ".8cut.db"
def load_db_records(db_path: Path) -> list[dict]:
con = sqlite3.connect(db_path)
con.row_factory = sqlite3.Row
rows = con.execute(
"SELECT output_path, label FROM processed WHERE label != ''"
).fetchall()
con.close()
return [dict(r) for r in rows]
def build_entries_for_folder(folder: str, records: list[dict]) -> list[dict]:
return [
{"path": os.path.relpath(rec["output_path"], folder), "label": rec["label"]}
for rec in records
]
def merge_into_json(json_path: str, new_entries: list[dict], dry_run: bool) -> int:
"""Merge new_entries into existing json_path, returning count added/updated."""
existing: list[dict] = []
if os.path.exists(json_path):
with open(json_path, "r", encoding="utf-8") as f:
try:
existing = json.load(f)
except (json.JSONDecodeError, ValueError):
existing = []
by_path = {e["path"]: e for e in existing}
changed = 0
for entry in new_entries:
if by_path.get(entry["path"]) != entry:
by_path[entry["path"]] = entry
changed += 1
merged = list(by_path.values())
if not dry_run and changed:
with open(json_path, "w", encoding="utf-8") as f:
json.dump(merged, f, indent=2, ensure_ascii=False)
f.write("\n")
return changed
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("--dry-run", action="store_true",
help="Show what would be written without writing")
args = parser.parse_args()
if not DB_PATH.exists():
print(f"Database not found: {DB_PATH}")
return
records = load_db_records(DB_PATH)
if not records:
print("No labelled records in database.")
return
# Group records by parent folder of output_path
by_folder: dict[str, list[dict]] = defaultdict(list)
for rec in records:
folder = os.path.dirname(rec["output_path"])
by_folder[folder].append(rec)
total_written = 0
for folder, recs in sorted(by_folder.items()):
entries = build_entries_for_folder(folder, recs)
json_path = os.path.join(folder, "dataset.json")
changed = merge_into_json(json_path, entries, args.dry_run)
status = "(dry run) " if args.dry_run else ""
print(f"{status}{json_path}: {len(entries)} clips, {changed} added/updated")
total_written += changed
suffix = " (dry run)" if args.dry_run else ""
print(f"\nTotal: {total_written} entries written{suffix}")
if __name__ == "__main__":
main()