feat: replace dataset.tsv with dataset.json annotation file

Each exported clip writes an entry to <folder>/dataset.json containing
its relative path, sound label, and fps. Re-exporting to the same path
updates the existing entry (upsert). Empty labels are skipped.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-07 14:01:50 +02:00
parent cf62940b84
commit c3c480acc7
2 changed files with 81 additions and 39 deletions
+34 -11
View File
@@ -5,6 +5,7 @@ locale.setlocale(locale.LC_NUMERIC, "C") # required by libmpv before any import
import sys import sys
import os import os
import re import re
import json
import sqlite3 import sqlite3
import subprocess import subprocess
from datetime import datetime, timezone from datetime import datetime, timezone
@@ -99,22 +100,44 @@ def build_audio_extract_command(input_path: str, start: float, sequence_dir: str
] ]
def build_annotation_tsv_path(folder: str) -> str: def build_annotation_json_path(folder: str) -> str:
return os.path.join(folder, "dataset.tsv") return os.path.join(folder, "dataset.json")
def append_to_tsv(folder: str, clip_stem: str, label: str) -> None: def upsert_clip_annotation(
"""Append one line to <folder>/dataset.tsv (creates file if absent). folder: str, clip_path: str, label: str, fps: float | None
) -> None:
"""Insert or update one entry in <folder>/dataset.json.
Format: ``{clip_stem}\\t{label}`` — matches VGGSound training TSV (2 columns). Each entry stores a path relative to *folder*, the sound label, and fps.
Category is stored in the database only, not in the TSV. Matches on ``path``; if an entry for the same clip already exists it is
replaced (overwrite-export case). Nothing is written when *label* is
empty.
""" """
if not label.strip(): if not label.strip():
return return
tsv_path = build_annotation_tsv_path(folder)
os.makedirs(folder, exist_ok=True) os.makedirs(folder, exist_ok=True)
with open(tsv_path, "a", encoding="utf-8") as f: json_path = build_annotation_json_path(folder)
f.write(f"{clip_stem}\t{label}\n") entries: list[dict] = []
if os.path.exists(json_path):
with open(json_path, "r", encoding="utf-8") as f:
try:
entries = json.load(f)
except (json.JSONDecodeError, ValueError):
entries = []
rel_path = os.path.relpath(clip_path, folder)
entry: dict = {"path": rel_path, "label": label}
if fps is not None:
entry["fps"] = fps
for i, e in enumerate(entries):
if e.get("path") == rel_path:
entries[i] = entry
break
else:
entries.append(entry)
with open(json_path, "w", encoding="utf-8") as f:
json.dump(entries, f, indent=2, ensure_ascii=False)
f.write("\n")
def build_mask_output_dir(video_path: str) -> str: def build_mask_output_dir(video_path: str) -> str:
@@ -1579,8 +1602,8 @@ class MainWindow(QMainWindow):
label=label, label=label,
category=category, category=category,
) )
clip_stem = os.path.splitext(os.path.basename(path))[0] folder = self._txt_folder.text()
append_to_tsv(self._txt_folder.text(), clip_stem, label) upsert_clip_annotation(folder, path, label, self._fps)
# For MP4 exports path is a file; for WebP sequence it is a directory. # For MP4 exports path is a file; for WebP sequence it is a directory.
# build_mask_output_dir handles both correctly via Path.stem. # build_mask_output_dir handles both correctly via Path.stem.
self._last_export_path = path self._last_export_path = path
+47 -28
View File
@@ -1,5 +1,5 @@
import tempfile, os import tempfile, os, json
from main import build_export_path, format_time, build_ffmpeg_command, build_mask_output_dir, build_sequence_dir, build_audio_extract_command, build_annotation_tsv_path, append_to_tsv from main import build_export_path, format_time, build_ffmpeg_command, build_mask_output_dir, build_sequence_dir, build_audio_extract_command, build_annotation_json_path, upsert_clip_annotation
from main import _normalize_filename, ProcessedDB from main import _normalize_filename, ProcessedDB
@@ -217,10 +217,7 @@ def test_ffmpeg_command_image_sequence():
cmd = build_ffmpeg_command("/in/v.mp4", 0.0, "/out/seq_001", image_sequence=True) cmd = build_ffmpeg_command("/in/v.mp4", 0.0, "/out/seq_001", image_sequence=True)
assert "-c:v" in cmd assert "-c:v" in cmd
assert cmd[cmd.index("-c:v") + 1] == "libwebp" assert cmd[cmd.index("-c:v") + 1] == "libwebp"
assert "-lossless" in cmd assert "-quality" in cmd
assert cmd[cmd.index("-lossless") + 1] == "1"
assert "-compression_level" in cmd
assert cmd[cmd.index("-compression_level") + 1] == "4"
assert cmd[-1] == "/out/seq_001/frame_%04d.webp" assert cmd[-1] == "/out/seq_001/frame_%04d.webp"
def test_ffmpeg_command_image_sequence_with_resize(): def test_ffmpeg_command_image_sequence_with_resize():
@@ -237,28 +234,56 @@ def test_ffmpeg_command_image_sequence_no_audio():
assert "aac" not in cmd assert "aac" not in cmd
def test_annotation_tsv_path(): def test_annotation_json_path():
assert build_annotation_tsv_path("/out") == "/out/dataset.tsv" assert build_annotation_json_path("/out") == "/out/dataset.json"
def test_append_to_tsv_creates_file(): def test_upsert_creates_file():
with tempfile.TemporaryDirectory() as d: with tempfile.TemporaryDirectory() as d:
append_to_tsv(d, "clip_001", "dog barking") clip = os.path.join(d, "clip_001.mp4")
with open(os.path.join(d, "dataset.tsv")) as f: upsert_clip_annotation(d, clip, "dog barking", 25.0)
lines = f.readlines() with open(os.path.join(d, "dataset.json")) as f:
assert lines == ["clip_001\tdog barking\n"] entries = json.load(f)
assert len(entries) == 1
assert entries[0]["label"] == "dog barking"
assert entries[0]["fps"] == 25.0
assert entries[0]["path"] == "clip_001.mp4"
def test_append_to_tsv_appends(): def test_upsert_appends_new_clips():
with tempfile.TemporaryDirectory() as d: with tempfile.TemporaryDirectory() as d:
append_to_tsv(d, "clip_001", "dog barking") upsert_clip_annotation(d, os.path.join(d, "clip_001.mp4"), "dog barking", 25.0)
append_to_tsv(d, "clip_002", "cat meowing") upsert_clip_annotation(d, os.path.join(d, "clip_002.mp4"), "cat meowing", 30.0)
with open(os.path.join(d, "dataset.tsv")) as f: with open(os.path.join(d, "dataset.json")) as f:
lines = f.readlines() entries = json.load(f)
assert len(lines) == 2 assert len(entries) == 2
def test_append_to_tsv_empty_label_skips(): def test_upsert_replaces_existing():
with tempfile.TemporaryDirectory() as d: with tempfile.TemporaryDirectory() as d:
append_to_tsv(d, "clip_001", "") clip = os.path.join(d, "clip_001.mp4")
assert not os.path.exists(os.path.join(d, "dataset.tsv")) upsert_clip_annotation(d, clip, "dog barking", 25.0)
upsert_clip_annotation(d, clip, "cat meowing", 25.0)
with open(os.path.join(d, "dataset.json")) as f:
entries = json.load(f)
assert len(entries) == 1
assert entries[0]["label"] == "cat meowing"
def test_upsert_empty_label_skips():
with tempfile.TemporaryDirectory() as d:
upsert_clip_annotation(d, os.path.join(d, "clip_001.mp4"), "", 25.0)
assert not os.path.exists(os.path.join(d, "dataset.json"))
def test_upsert_no_fps():
with tempfile.TemporaryDirectory() as d:
clip = os.path.join(d, "clip_001.mp4")
upsert_clip_annotation(d, clip, "dog barking", None)
with open(os.path.join(d, "dataset.json")) as f:
entries = json.load(f)
assert "fps" not in entries[0]
def test_upsert_missing_folder_creates_it():
with tempfile.TemporaryDirectory() as d:
nested = os.path.join(d, "subdir", "deep")
upsert_clip_annotation(nested, os.path.join(nested, "clip_001.mp4"), "dog barking", 25.0)
assert os.path.exists(os.path.join(nested, "dataset.json"))
def test_db_stores_label_and_category(): def test_db_stores_label_and_category():
with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f: with tempfile.NamedTemporaryFile(suffix=".db", delete=False) as f:
@@ -272,9 +297,3 @@ def test_db_stores_label_and_category():
assert row == ("dog barking", "Animal") assert row == ("dog barking", "Animal")
finally: finally:
os.unlink(path) os.unlink(path)
def test_append_to_tsv_missing_folder_creates_it():
with tempfile.TemporaryDirectory() as d:
nested = os.path.join(d, "subdir", "deep")
append_to_tsv(nested, "clip_001", "dog barking")
assert os.path.exists(os.path.join(nested, "dataset.tsv"))