feat: auto-install pip venv for feature extraction on first use

PrismAudioFeatureExtractor now creates and populates a managed venv (_extract_env/) automatically when python_env is left as the default 'python'. Also adds scripts/install_extract_env.sh for manual/Docker setup without conda. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-27 19:27:27 +01:00
parent 9b1cb71b2a
commit 35d0615253
3 changed files with 90 additions and 1 deletions
@@ -0,0 +1,9 @@
+__pycache__/
+*.pyc
+*.pyo
+*.egg-info/
+dist/
+build/
+.eggs/
+*.so
+.env
@@ -1,4 +1,5 @@
 import os
+import sys
 import hashlib
 import subprocess
 import tempfile
@@ -7,6 +8,37 @@ import torch
 from .utils import PRISMAUDIO_CATEGORY
 from .feature_loader import PrismAudioFeatureLoader

+# Managed venv created automatically when python_env is left as default
+_PLUGIN_DIR = os.path.dirname(os.path.dirname(__file__))
+_MANAGED_VENV = os.path.join(_PLUGIN_DIR, "_extract_env")
+_MANAGED_PYTHON = os.path.join(_MANAGED_VENV, "bin", "python")
+
+_EXTRACT_PACKAGES = [
+    "torch", "torchaudio", "torchvision",
+    "tensorflow-cpu==2.15.0",
+    "jax[cpu]", "jaxlib",
+    "transformers", "decord", "einops", "numpy", "mediapy",
+    "git+https://github.com/google-deepmind/videoprism.git",
+]
+
+
+def _ensure_extract_env():
+    """Create and populate the managed venv on first use."""
+    if os.path.exists(_MANAGED_PYTHON):
+        return _MANAGED_PYTHON
+
+    print("[PrismAudio] Feature-extraction env not found — creating venv at:", _MANAGED_VENV)
+    subprocess.run([sys.executable, "-m", "venv", _MANAGED_VENV], check=True)
+
+    pip = os.path.join(_MANAGED_VENV, "bin", "pip")
+    subprocess.run([pip, "install", "--upgrade", "pip"], check=True)
+
+    print("[PrismAudio] Installing feature-extraction dependencies (this takes a few minutes)...")
+    subprocess.run([pip, "install"] + _EXTRACT_PACKAGES, check=True)
+
+    print("[PrismAudio] Feature-extraction env ready.")
+    return _MANAGED_PYTHON
+

 def _hash_inputs(video_tensor, cot_text):
    """Create a hash of the inputs for caching."""
@@ -34,7 +66,7 @@ class PrismAudioFeatureExtractor:
                "caption_cot": ("STRING", {"default": "", "multiline": True, "tooltip": "Chain-of-thought description"}),
            },
            "optional": {
-                "python_env": ("STRING", {"default": "python", "tooltip": "Path to python binary with JAX/TF (e.g., /path/to/conda/envs/prismaudio-extract/bin/python)"}),
+                "python_env": ("STRING", {"default": "python", "tooltip": "Path to python binary with JAX/TF. Leave as 'python' to auto-install a managed venv on first use."}),
                "cache_dir": ("STRING", {"default": "", "tooltip": "Directory to cache extracted features. Empty = temp dir"}),
                "synchformer_ckpt": ("STRING", {"default": "", "tooltip": "Path to synchformer checkpoint (auto-resolved if empty)"}),
            },
@@ -46,6 +78,10 @@ class PrismAudioFeatureExtractor:
    CATEGORY = PRISMAUDIO_CATEGORY

    def extract_features(self, video, caption_cot, python_env="python", cache_dir="", synchformer_ckpt=""):
+        # Resolve python binary — auto-install managed venv if using default
+        if python_env == "python":
+            python_env = _ensure_extract_env()
+
        # Determine cache directory
        if not cache_dir:
            cache_dir = os.path.join(tempfile.gettempdir(), "prismaudio_features")
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+# Install the PrismAudio feature-extraction environment using pip venv.
+# Use this instead of environment.yml when conda is unavailable (e.g. NVIDIA Docker).
+#
+# Usage:
+#   bash scripts/install_extract_env.sh [/path/to/venv]
+#
+# Default venv path: /opt/prismaudio-extract
+# After installation, point the PrismAudioFeatureExtractor node's python_env to:
+#   <venv>/bin/python   (Linux/Mac)
+#   <venv>\Scripts\python.exe  (Windows)
+
+set -euo pipefail
+
+VENV_DIR="${1:-/opt/prismaudio-extract}"
+
+echo "[PrismAudio] Creating venv at: ${VENV_DIR}"
+python3 -m venv "${VENV_DIR}"
+
+PIP="${VENV_DIR}/bin/pip"
+
+echo "[PrismAudio] Upgrading pip..."
+"${PIP}" install --upgrade pip
+
+echo "[PrismAudio] Installing PyTorch stack..."
+"${PIP}" install torch torchaudio torchvision
+
+echo "[PrismAudio] Installing feature-extraction dependencies..."
+"${PIP}" install \
+    "tensorflow-cpu==2.15.0" \
+    "jax[cpu]" \
+    "jaxlib" \
+    "transformers" \
+    "decord" \
+    "einops" \
+    "numpy" \
+    "mediapy"
+
+echo "[PrismAudio] Installing VideoPrism..."
+"${PIP}" install "git+https://github.com/google-deepmind/videoprism.git"
+
+echo ""
+echo "[PrismAudio] Done. Set python_env in PrismAudioFeatureExtractor to:"
+echo "  ${VENV_DIR}/bin/python"