feat: Extract audio area — exact-length audio slice from the playhead, save-as

A dedicated "♪ Extract audio" button on the transport row grabs an exact
length of audio (set via the adjacent length box, from the playhead) and opens
a Save As dialog. Output format follows the chosen extension — WAV (pcm_s16le),
MP3 (libmp3lame), FLAC, m4a/aac, ogg/opus — re-encoding as needed; unknown
extensions let ffmpeg pick from the container.

- core.ffmpeg.build_audio_clip_command(input, start, duration, out_path):
  fast-seek + exact -t duration + -vn, codec by extension. Verified end-to-end
  (wav/mp3/flac all land at exactly the requested duration).
- Timeline shows the audio area as a distinct teal dashed band spanning
  [cursor, cursor+length], updated live as the playhead or length changes, so
  you see exactly what will be extracted.
- Length + last save dir persist in QSettings; button enabled once a file loads.

Tests: 3 core (codec-by-extension, exact length, case-insensitive) + 2 GUI
(controls exist, band tracks cursor/length).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-07-01 23:48:24 +02:00
parent 7ae1720b9e
commit ed63d04abf
4 changed files with 189 additions and 1 deletions
+30
View File
@@ -173,6 +173,36 @@ def build_audio_extract_command(input_path: str, start: float, sequence_dir: str
]
# Audio codec chosen per output extension for the manual "Extract audio area"
# tool. Empty list -> let ffmpeg pick a default encoder from the extension.
_AUDIO_CODEC_BY_EXT: dict[str, list[str]] = {
".wav": ["-c:a", "pcm_s16le"],
".flac": ["-c:a", "flac"],
".mp3": ["-c:a", "libmp3lame", "-q:a", "2"],
".m4a": ["-c:a", "aac", "-b:a", "256k"],
".aac": ["-c:a", "aac", "-b:a", "256k"],
".ogg": ["-c:a", "libvorbis", "-q:a", "5"],
".opus": ["-c:a", "libopus", "-b:a", "192k"],
}
def build_audio_clip_command(input_path: str, start: float, duration: float,
out_path: str) -> list[str]:
"""ffmpeg command to extract exactly *duration* seconds of audio starting
at *start*, re-encoded per *out_path*'s extension (wav/mp3/flac/…)."""
ext = os.path.splitext(out_path)[1].lower()
codec = _AUDIO_CODEC_BY_EXT.get(ext, [])
return [
_bin("ffmpeg"), "-y",
"-ss", str(start),
"-i", input_path,
"-t", str(duration),
"-vn",
*codec,
out_path,
]
def detect_hw_encoders() -> list[str]:
"""Probe ffmpeg for available H.264 hardware encoders.
+112 -1
View File
@@ -35,7 +35,8 @@ import mpv
from core.paths import _bin, _log, build_export_path, build_sequence_dir, format_time
from core.ffmpeg import (
_RATIOS, resolve_keyframe, apply_keyframes_to_jobs,
build_ffmpeg_command, build_audio_extract_command, detect_hw_encoders,
build_ffmpeg_command, build_audio_extract_command, build_audio_clip_command,
detect_hw_encoders,
)
from core.db import ProcessedDB
from core.annotations import remove_clip_annotation, upsert_clip_annotation
@@ -1896,6 +1897,9 @@ class TimelineWidget(QWidget):
self._scan_regions: list[tuple[float, float, float, float, float]] = []
self._scan_neg_times: set[float] = set()
self._active_scan_region: tuple[float, float] | None = None
# Manual "Extract audio area" band (start, end) — drawn as a distinct
# teal dashed region so it reads apart from the blue clip selection.
self._audio_region: tuple[float, float] | None = None
# View window for zoom/pan. When _view_span <= 0 the full duration is shown.
self._view_start: float = 0.0
@@ -2058,6 +2062,17 @@ class TimelineWidget(QWidget):
self._active_scan_region = None
self.update()
def set_audio_region(self, start: float, end: float) -> None:
region = (start, end)
if region != self._audio_region:
self._audio_region = region
self.update()
def clear_audio_region(self) -> None:
if self._audio_region is not None:
self._audio_region = None
self.update()
def set_play_position(self, t: float | None) -> None:
# In lock mode, ignore mpv position updates while the user is dragging
# — the async seek hasn't caught up yet, so mpv reports stale values.
@@ -2286,6 +2301,18 @@ class TimelineWidget(QWidget):
p.drawLine(x_start, rh, x_start, h)
p.drawLine(x_end, rh, x_end, h)
# ── audio-extract area (exact length from the playhead) ───────────
if (not self._scan_mode and self._audio_region is not None
and self._duration > 0):
a0, a1 = self._audio_region
ax1 = int(self._time_to_x(a0))
ax2 = int(self._time_to_x(min(a1, self._duration)))
aw = max(ax2 - ax1, 1)
p.fillRect(ax1, rh, aw, th, QColor(0, 200, 180, 45))
p.setBrush(Qt.BrushStyle.NoBrush)
p.setPen(QPen(QColor(0, 220, 190), 1, Qt.PenStyle.DashLine))
p.drawRect(ax1, rh + 1, aw, th - 2)
# ── ghost of the previous cursor position (undo-by-eye) ──────────
if (not self._scan_mode and self._ghost_cursor is not None
and abs(self._ghost_cursor - self._cursor) > 0.05):
@@ -4407,6 +4434,28 @@ class MainWindow(QMainWindow):
transport_row.addWidget(self._btn_export)
transport_row.addWidget(self._btn_cancel)
transport_row.addWidget(self._btn_delete)
# Extract audio area — an exact-length audio slice from the playhead,
# saved via a Save As dialog (format follows the chosen extension).
transport_row.addSpacing(12)
self._spn_audio_len = QDoubleSpinBox()
self._spn_audio_len.setRange(0.10, 120.0)
self._spn_audio_len.setDecimals(2)
self._spn_audio_len.setSingleStep(0.10)
self._spn_audio_len.setSuffix(" s")
self._spn_audio_len.setFixedWidth(78)
self._spn_audio_len.setToolTip("Audio area length, measured from the playhead")
self._spn_audio_len.setValue(
float(self._settings.value("audio_extract_len", 3.0)))
self._spn_audio_len.valueChanged.connect(self._on_audio_len_changed)
self._btn_extract_audio = QPushButton("♪ Extract audio")
self._btn_extract_audio.setFocusPolicy(Qt.FocusPolicy.NoFocus)
self._btn_extract_audio.setToolTip(
"Extract this exact length of audio from the playhead and save it")
self._btn_extract_audio.setEnabled(False)
self._btn_extract_audio.clicked.connect(self._on_extract_audio)
transport_row.addWidget(self._spn_audio_len)
transport_row.addWidget(self._btn_extract_audio)
self._transport_row = transport_row
# Row 1b — subcategory (subprofile) export buttons live on their own
@@ -5789,6 +5838,8 @@ class MainWindow(QMainWindow):
self._btn_play.setEnabled(True)
self._btn_pause.setEnabled(True)
self._btn_export.setEnabled(True)
self._btn_extract_audio.setEnabled(True)
self._update_audio_region()
self._set_subprofile_btns_enabled(True)
# Reset stale state from previous file
self._overwrite_path = ""
@@ -6321,6 +6372,7 @@ class MainWindow(QMainWindow):
self._cursor = t
dur = self._mpv.get_duration()
self._lbl_time.setText(f"{format_time(t)} / {format_time(dur)}")
self._update_audio_region()
self._preview_timer.start()
if self._timeline._scan_mode:
self._scan_panel.highlight_time(t)
@@ -6330,6 +6382,65 @@ class MainWindow(QMainWindow):
else:
self._mpv.seek(t)
def _on_audio_len_changed(self, value: float) -> None:
self._settings.setValue("audio_extract_len", value)
self._update_audio_region()
def _update_audio_region(self) -> None:
"""Keep the timeline's audio-area band in sync with the playhead and
the audio-length control."""
if not self._file_path:
self._timeline.clear_audio_region()
return
start = self._cursor
self._timeline.set_audio_region(start, start + self._spn_audio_len.value())
def _on_extract_audio(self) -> None:
"""Extract an exact-length audio slice starting at the playhead and
prompt for where to save it (format follows the chosen extension)."""
if not self._file_path:
self._show_status("Load a video first", 3000)
return
start = self._cursor
dur = self._spn_audio_len.value()
if start + dur > self._timeline._duration + 0.05:
dur = max(0.05, self._timeline._duration - start)
stem = os.path.splitext(os.path.basename(self._file_path))[0]
default_name = f"{stem}_{start:.2f}-{start + dur:.2f}s.wav"
default_dir = (self._settings.value("audio_extract_dir", "")
or self._tab_export_folder()
or os.path.dirname(self._file_path))
path, _sel = QFileDialog.getSaveFileName(
self, "Save audio clip", os.path.join(default_dir, default_name),
"WAV (*.wav);;MP3 (*.mp3);;FLAC (*.flac);;All files (*)")
if not path:
return
if not os.path.splitext(path)[1]:
path += ".wav"
os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
cmd = build_audio_clip_command(self._file_path, start, dur, path)
self._btn_extract_audio.setEnabled(False)
QApplication.setOverrideCursor(Qt.CursorShape.WaitCursor)
self._show_status(f"Extracting {dur:.2f}s of audio…")
try:
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
except Exception as e:
proc = None
err = str(e)
finally:
QApplication.restoreOverrideCursor()
self._btn_extract_audio.setEnabled(True)
if proc is not None and proc.returncode == 0 and os.path.exists(path):
self._settings.setValue("audio_extract_dir", os.path.dirname(path))
self._show_status(f"Saved audio: {os.path.basename(path)}", 5000)
_log(f"Audio extracted: {path} ({dur:.2f}s @ {start:.2f}s)")
else:
err = (proc.stderr.strip().splitlines()[-1] if proc and proc.stderr
else (err if proc is None else "ffmpeg failed"))
self._show_status("Audio extract failed", 5000)
QMessageBox.warning(self, "Audio extract failed",
f"Could not extract audio:\n\n{err}")
def _toggle_play(self):
if not self._file_path:
return
+25
View File
@@ -243,3 +243,28 @@ def test_subprofile_button_visibility_exact_match(win):
win._apply_subcat_visibility()
assert btns["blowjob"].isHidden()
assert not btns["clap"].isHidden()
def test_extract_audio_controls_exist(win):
from PyQt6.QtWidgets import QPushButton, QDoubleSpinBox
assert isinstance(win._btn_extract_audio, QPushButton)
assert isinstance(win._spn_audio_len, QDoubleSpinBox)
# Disabled until a file is loaded.
assert not win._btn_extract_audio.isEnabled()
def test_audio_region_tracks_cursor_and_length(win):
# The teal audio band spans [cursor, cursor + length]; changing the length
# or moving the cursor moves the band. Fake a loaded file so the guard in
# _update_audio_region passes.
win._file_path = "/x/video.mp4"
win._cursor = 10.0
win._spn_audio_len.setValue(4.0) # fires _on_audio_len_changed
assert win._timeline._audio_region == (10.0, 14.0)
win._cursor = 20.0
win._update_audio_region()
assert win._timeline._audio_region == (20.0, 24.0)
# No file -> band cleared.
win._file_path = ""
win._update_audio_region()
assert win._timeline._audio_region is None
+22
View File
@@ -1,5 +1,6 @@
import tempfile, os, json
from main import build_export_path, format_time, build_ffmpeg_command, build_sequence_dir, build_audio_extract_command, resolve_keyframe, apply_keyframes_to_jobs
from core.ffmpeg import build_audio_clip_command
from core.annotations import build_annotation_json_path, upsert_clip_annotation
from main import ProcessedDB
@@ -54,6 +55,27 @@ def test_ffmpeg_command_with_resize():
assert cmd[-1] == "/out/clip_001.mp4"
def test_audio_clip_command_exact_length():
cmd = build_audio_clip_command("/in/video.mp4", 12.5, 3.2, "/out/clip.wav")
assert cmd[0] == "ffmpeg"
# fast seek before input, exact duration, no video
assert cmd[cmd.index("-ss") + 1] == "12.5"
assert cmd[cmd.index("-t") + 1] == "3.2"
assert cmd.index("-ss") < cmd.index("-i")
assert "-vn" in cmd
assert cmd[-1] == "/out/clip.wav"
def test_audio_clip_command_codec_by_extension():
assert "pcm_s16le" in build_audio_clip_command("/in.mp4", 0, 1, "/o/a.wav")
assert "libmp3lame" in build_audio_clip_command("/in.mp4", 0, 1, "/o/a.mp3")
assert "flac" in build_audio_clip_command("/in.mp4", 0, 1, "/o/a.flac")
# Unknown extension -> no explicit -c:a, let ffmpeg pick from the container.
assert "-c:a" not in build_audio_clip_command("/in.mp4", 0, 1, "/o/a.xyz")
def test_audio_clip_command_extension_case_insensitive():
assert "flac" in build_audio_clip_command("/in.mp4", 0, 1, "/o/A.FLAC")
# --- ProcessedDB ---
def test_db_add_and_get_markers():