feat: Extract audio area — exact-length audio slice from the playhead, save-as

A dedicated "♪ Extract audio" button on the transport row grabs an exact length of audio (set via the adjacent length box, from the playhead) and opens a Save As dialog. Output format follows the chosen extension — WAV (pcm_s16le), MP3 (libmp3lame), FLAC, m4a/aac, ogg/opus — re-encoding as needed; unknown extensions let ffmpeg pick from the container. - core.ffmpeg.build_audio_clip_command(input, start, duration, out_path): fast-seek + exact -t duration + -vn, codec by extension. Verified end-to-end (wav/mp3/flac all land at exactly the requested duration). - Timeline shows the audio area as a distinct teal dashed band spanning [cursor, cursor+length], updated live as the playhead or length changes, so you see exactly what will be extracted. - Length + last save dir persist in QSettings; button enabled once a file loads. Tests: 3 core (codec-by-extension, exact length, case-insensitive) + 2 GUI (controls exist, band tracks cursor/length). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-01 23:48:24 +02:00
parent 7ae1720b9e
commit ed63d04abf
4 changed files with 189 additions and 1 deletions
@@ -173,6 +173,36 @@ def build_audio_extract_command(input_path: str, start: float, sequence_dir: str
    ]


+# Audio codec chosen per output extension for the manual "Extract audio area"
+# tool. Empty list -> let ffmpeg pick a default encoder from the extension.
+_AUDIO_CODEC_BY_EXT: dict[str, list[str]] = {
+    ".wav":  ["-c:a", "pcm_s16le"],
+    ".flac": ["-c:a", "flac"],
+    ".mp3":  ["-c:a", "libmp3lame", "-q:a", "2"],
+    ".m4a":  ["-c:a", "aac", "-b:a", "256k"],
+    ".aac":  ["-c:a", "aac", "-b:a", "256k"],
+    ".ogg":  ["-c:a", "libvorbis", "-q:a", "5"],
+    ".opus": ["-c:a", "libopus", "-b:a", "192k"],
+}
+
+
+def build_audio_clip_command(input_path: str, start: float, duration: float,
+                             out_path: str) -> list[str]:
+    """ffmpeg command to extract exactly *duration* seconds of audio starting
+    at *start*, re-encoded per *out_path*'s extension (wav/mp3/flac/…)."""
+    ext = os.path.splitext(out_path)[1].lower()
+    codec = _AUDIO_CODEC_BY_EXT.get(ext, [])
+    return [
+        _bin("ffmpeg"), "-y",
+        "-ss", str(start),
+        "-i", input_path,
+        "-t", str(duration),
+        "-vn",
+        *codec,
+        out_path,
+    ]
+
+
 def detect_hw_encoders() -> list[str]:
    """Probe ffmpeg for available H.264 hardware encoders.

@@ -35,7 +35,8 @@ import mpv
 from core.paths import _bin, _log, build_export_path, build_sequence_dir, format_time
 from core.ffmpeg import (
    _RATIOS, resolve_keyframe, apply_keyframes_to_jobs,
-    build_ffmpeg_command, build_audio_extract_command, detect_hw_encoders,
+    build_ffmpeg_command, build_audio_extract_command, build_audio_clip_command,
+    detect_hw_encoders,
 )
 from core.db import ProcessedDB
 from core.annotations import remove_clip_annotation, upsert_clip_annotation
@@ -1896,6 +1897,9 @@ class TimelineWidget(QWidget):
        self._scan_regions: list[tuple[float, float, float, float, float]] = []
        self._scan_neg_times: set[float] = set()
        self._active_scan_region: tuple[float, float] | None = None
+        # Manual "Extract audio area" band (start, end) — drawn as a distinct
+        # teal dashed region so it reads apart from the blue clip selection.
+        self._audio_region: tuple[float, float] | None = None

        # View window for zoom/pan. When _view_span <= 0 the full duration is shown.
        self._view_start: float = 0.0
@@ -2058,6 +2062,17 @@ class TimelineWidget(QWidget):
            self._active_scan_region = None
            self.update()

+    def set_audio_region(self, start: float, end: float) -> None:
+        region = (start, end)
+        if region != self._audio_region:
+            self._audio_region = region
+            self.update()
+
+    def clear_audio_region(self) -> None:
+        if self._audio_region is not None:
+            self._audio_region = None
+            self.update()
+
    def set_play_position(self, t: float | None) -> None:
        # In lock mode, ignore mpv position updates while the user is dragging
        # — the async seek hasn't caught up yet, so mpv reports stale values.
@@ -2286,6 +2301,18 @@ class TimelineWidget(QWidget):
                p.drawLine(x_start, rh, x_start, h)
                p.drawLine(x_end,   rh, x_end,   h)

+            # ── audio-extract area (exact length from the playhead) ───────────
+            if (not self._scan_mode and self._audio_region is not None
+                    and self._duration > 0):
+                a0, a1 = self._audio_region
+                ax1 = int(self._time_to_x(a0))
+                ax2 = int(self._time_to_x(min(a1, self._duration)))
+                aw = max(ax2 - ax1, 1)
+                p.fillRect(ax1, rh, aw, th, QColor(0, 200, 180, 45))
+                p.setBrush(Qt.BrushStyle.NoBrush)
+                p.setPen(QPen(QColor(0, 220, 190), 1, Qt.PenStyle.DashLine))
+                p.drawRect(ax1, rh + 1, aw, th - 2)
+
            # ── ghost of the previous cursor position (undo-by-eye) ──────────
            if (not self._scan_mode and self._ghost_cursor is not None
                    and abs(self._ghost_cursor - self._cursor) > 0.05):
@@ -4407,6 +4434,28 @@ class MainWindow(QMainWindow):
        transport_row.addWidget(self._btn_export)
        transport_row.addWidget(self._btn_cancel)
        transport_row.addWidget(self._btn_delete)
+
+        # Extract audio area — an exact-length audio slice from the playhead,
+        # saved via a Save As dialog (format follows the chosen extension).
+        transport_row.addSpacing(12)
+        self._spn_audio_len = QDoubleSpinBox()
+        self._spn_audio_len.setRange(0.10, 120.0)
+        self._spn_audio_len.setDecimals(2)
+        self._spn_audio_len.setSingleStep(0.10)
+        self._spn_audio_len.setSuffix(" s")
+        self._spn_audio_len.setFixedWidth(78)
+        self._spn_audio_len.setToolTip("Audio area length, measured from the playhead")
+        self._spn_audio_len.setValue(
+            float(self._settings.value("audio_extract_len", 3.0)))
+        self._spn_audio_len.valueChanged.connect(self._on_audio_len_changed)
+        self._btn_extract_audio = QPushButton("♪ Extract audio")
+        self._btn_extract_audio.setFocusPolicy(Qt.FocusPolicy.NoFocus)
+        self._btn_extract_audio.setToolTip(
+            "Extract this exact length of audio from the playhead and save it")
+        self._btn_extract_audio.setEnabled(False)
+        self._btn_extract_audio.clicked.connect(self._on_extract_audio)
+        transport_row.addWidget(self._spn_audio_len)
+        transport_row.addWidget(self._btn_extract_audio)
        self._transport_row = transport_row

        # Row 1b — subcategory (subprofile) export buttons live on their own
@@ -5789,6 +5838,8 @@ class MainWindow(QMainWindow):
        self._btn_play.setEnabled(True)
        self._btn_pause.setEnabled(True)
        self._btn_export.setEnabled(True)
+        self._btn_extract_audio.setEnabled(True)
+        self._update_audio_region()
        self._set_subprofile_btns_enabled(True)
        # Reset stale state from previous file
        self._overwrite_path = ""
@@ -6321,6 +6372,7 @@ class MainWindow(QMainWindow):
        self._cursor = t
        dur = self._mpv.get_duration()
        self._lbl_time.setText(f"{format_time(t)} / {format_time(dur)}")
+        self._update_audio_region()
        self._preview_timer.start()
        if self._timeline._scan_mode:
            self._scan_panel.highlight_time(t)
@@ -6330,6 +6382,65 @@ class MainWindow(QMainWindow):
        else:
            self._mpv.seek(t)

+    def _on_audio_len_changed(self, value: float) -> None:
+        self._settings.setValue("audio_extract_len", value)
+        self._update_audio_region()
+
+    def _update_audio_region(self) -> None:
+        """Keep the timeline's audio-area band in sync with the playhead and
+        the audio-length control."""
+        if not self._file_path:
+            self._timeline.clear_audio_region()
+            return
+        start = self._cursor
+        self._timeline.set_audio_region(start, start + self._spn_audio_len.value())
+
+    def _on_extract_audio(self) -> None:
+        """Extract an exact-length audio slice starting at the playhead and
+        prompt for where to save it (format follows the chosen extension)."""
+        if not self._file_path:
+            self._show_status("Load a video first", 3000)
+            return
+        start = self._cursor
+        dur = self._spn_audio_len.value()
+        if start + dur > self._timeline._duration + 0.05:
+            dur = max(0.05, self._timeline._duration - start)
+        stem = os.path.splitext(os.path.basename(self._file_path))[0]
+        default_name = f"{stem}_{start:.2f}-{start + dur:.2f}s.wav"
+        default_dir = (self._settings.value("audio_extract_dir", "")
+                       or self._tab_export_folder()
+                       or os.path.dirname(self._file_path))
+        path, _sel = QFileDialog.getSaveFileName(
+            self, "Save audio clip", os.path.join(default_dir, default_name),
+            "WAV (*.wav);;MP3 (*.mp3);;FLAC (*.flac);;All files (*)")
+        if not path:
+            return
+        if not os.path.splitext(path)[1]:
+            path += ".wav"
+        os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
+        cmd = build_audio_clip_command(self._file_path, start, dur, path)
+        self._btn_extract_audio.setEnabled(False)
+        QApplication.setOverrideCursor(Qt.CursorShape.WaitCursor)
+        self._show_status(f"Extracting {dur:.2f}s of audio…")
+        try:
+            proc = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
+        except Exception as e:
+            proc = None
+            err = str(e)
+        finally:
+            QApplication.restoreOverrideCursor()
+            self._btn_extract_audio.setEnabled(True)
+        if proc is not None and proc.returncode == 0 and os.path.exists(path):
+            self._settings.setValue("audio_extract_dir", os.path.dirname(path))
+            self._show_status(f"Saved audio: {os.path.basename(path)}", 5000)
+            _log(f"Audio extracted: {path} ({dur:.2f}s @ {start:.2f}s)")
+        else:
+            err = (proc.stderr.strip().splitlines()[-1] if proc and proc.stderr
+                   else (err if proc is None else "ffmpeg failed"))
+            self._show_status("Audio extract failed", 5000)
+            QMessageBox.warning(self, "Audio extract failed",
+                                f"Could not extract audio:\n\n{err}")
+
    def _toggle_play(self):
        if not self._file_path:
            return
@@ -243,3 +243,28 @@ def test_subprofile_button_visibility_exact_match(win):
    win._apply_subcat_visibility()
    assert btns["blowjob"].isHidden()
    assert not btns["clap"].isHidden()
+
+
+def test_extract_audio_controls_exist(win):
+    from PyQt6.QtWidgets import QPushButton, QDoubleSpinBox
+    assert isinstance(win._btn_extract_audio, QPushButton)
+    assert isinstance(win._spn_audio_len, QDoubleSpinBox)
+    # Disabled until a file is loaded.
+    assert not win._btn_extract_audio.isEnabled()
+
+
+def test_audio_region_tracks_cursor_and_length(win):
+    # The teal audio band spans [cursor, cursor + length]; changing the length
+    # or moving the cursor moves the band. Fake a loaded file so the guard in
+    # _update_audio_region passes.
+    win._file_path = "/x/video.mp4"
+    win._cursor = 10.0
+    win._spn_audio_len.setValue(4.0)     # fires _on_audio_len_changed
+    assert win._timeline._audio_region == (10.0, 14.0)
+    win._cursor = 20.0
+    win._update_audio_region()
+    assert win._timeline._audio_region == (20.0, 24.0)
+    # No file -> band cleared.
+    win._file_path = ""
+    win._update_audio_region()
+    assert win._timeline._audio_region is None
@@ -1,5 +1,6 @@
 import tempfile, os, json
 from main import build_export_path, format_time, build_ffmpeg_command, build_sequence_dir, build_audio_extract_command, resolve_keyframe, apply_keyframes_to_jobs
+from core.ffmpeg import build_audio_clip_command
 from core.annotations import build_annotation_json_path, upsert_clip_annotation
 from main import ProcessedDB

@@ -54,6 +55,27 @@ def test_ffmpeg_command_with_resize():
    assert cmd[-1] == "/out/clip_001.mp4"


+def test_audio_clip_command_exact_length():
+    cmd = build_audio_clip_command("/in/video.mp4", 12.5, 3.2, "/out/clip.wav")
+    assert cmd[0] == "ffmpeg"
+    # fast seek before input, exact duration, no video
+    assert cmd[cmd.index("-ss") + 1] == "12.5"
+    assert cmd[cmd.index("-t") + 1] == "3.2"
+    assert cmd.index("-ss") < cmd.index("-i")
+    assert "-vn" in cmd
+    assert cmd[-1] == "/out/clip.wav"
+
+def test_audio_clip_command_codec_by_extension():
+    assert "pcm_s16le" in build_audio_clip_command("/in.mp4", 0, 1, "/o/a.wav")
+    assert "libmp3lame" in build_audio_clip_command("/in.mp4", 0, 1, "/o/a.mp3")
+    assert "flac" in build_audio_clip_command("/in.mp4", 0, 1, "/o/a.flac")
+    # Unknown extension -> no explicit -c:a, let ffmpeg pick from the container.
+    assert "-c:a" not in build_audio_clip_command("/in.mp4", 0, 1, "/o/a.xyz")
+
+def test_audio_clip_command_extension_case_insensitive():
+    assert "flac" in build_audio_clip_command("/in.mp4", 0, 1, "/o/A.FLAC")
+
+
 # --- ProcessedDB ---

 def test_db_add_and_get_markers():