diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4793a78 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +__pycache__/ +*.pyc +*.pyo +*.egg-info/ +dist/ +build/ +.eggs/ +*.so +.env diff --git a/nodes/feature_extractor.py b/nodes/feature_extractor.py index d593743..f152fb1 100644 --- a/nodes/feature_extractor.py +++ b/nodes/feature_extractor.py @@ -1,4 +1,5 @@ import os +import sys import hashlib import subprocess import tempfile @@ -7,6 +8,37 @@ import torch from .utils import PRISMAUDIO_CATEGORY from .feature_loader import PrismAudioFeatureLoader +# Managed venv created automatically when python_env is left as default +_PLUGIN_DIR = os.path.dirname(os.path.dirname(__file__)) +_MANAGED_VENV = os.path.join(_PLUGIN_DIR, "_extract_env") +_MANAGED_PYTHON = os.path.join(_MANAGED_VENV, "bin", "python") + +_EXTRACT_PACKAGES = [ + "torch", "torchaudio", "torchvision", + "tensorflow-cpu==2.15.0", + "jax[cpu]", "jaxlib", + "transformers", "decord", "einops", "numpy", "mediapy", + "git+https://github.com/google-deepmind/videoprism.git", +] + + +def _ensure_extract_env(): + """Create and populate the managed venv on first use.""" + if os.path.exists(_MANAGED_PYTHON): + return _MANAGED_PYTHON + + print("[PrismAudio] Feature-extraction env not found — creating venv at:", _MANAGED_VENV) + subprocess.run([sys.executable, "-m", "venv", _MANAGED_VENV], check=True) + + pip = os.path.join(_MANAGED_VENV, "bin", "pip") + subprocess.run([pip, "install", "--upgrade", "pip"], check=True) + + print("[PrismAudio] Installing feature-extraction dependencies (this takes a few minutes)...") + subprocess.run([pip, "install"] + _EXTRACT_PACKAGES, check=True) + + print("[PrismAudio] Feature-extraction env ready.") + return _MANAGED_PYTHON + def _hash_inputs(video_tensor, cot_text): """Create a hash of the inputs for caching.""" @@ -34,7 +66,7 @@ class PrismAudioFeatureExtractor: "caption_cot": ("STRING", {"default": "", "multiline": True, "tooltip": "Chain-of-thought description"}), }, "optional": { - "python_env": ("STRING", {"default": "python", "tooltip": "Path to python binary with JAX/TF (e.g., /path/to/conda/envs/prismaudio-extract/bin/python)"}), + "python_env": ("STRING", {"default": "python", "tooltip": "Path to python binary with JAX/TF. Leave as 'python' to auto-install a managed venv on first use."}), "cache_dir": ("STRING", {"default": "", "tooltip": "Directory to cache extracted features. Empty = temp dir"}), "synchformer_ckpt": ("STRING", {"default": "", "tooltip": "Path to synchformer checkpoint (auto-resolved if empty)"}), }, @@ -46,6 +78,10 @@ class PrismAudioFeatureExtractor: CATEGORY = PRISMAUDIO_CATEGORY def extract_features(self, video, caption_cot, python_env="python", cache_dir="", synchformer_ckpt=""): + # Resolve python binary — auto-install managed venv if using default + if python_env == "python": + python_env = _ensure_extract_env() + # Determine cache directory if not cache_dir: cache_dir = os.path.join(tempfile.gettempdir(), "prismaudio_features") diff --git a/scripts/install_extract_env.sh b/scripts/install_extract_env.sh new file mode 100755 index 0000000..621b6d3 --- /dev/null +++ b/scripts/install_extract_env.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Install the PrismAudio feature-extraction environment using pip venv. +# Use this instead of environment.yml when conda is unavailable (e.g. NVIDIA Docker). +# +# Usage: +# bash scripts/install_extract_env.sh [/path/to/venv] +# +# Default venv path: /opt/prismaudio-extract +# After installation, point the PrismAudioFeatureExtractor node's python_env to: +# /bin/python (Linux/Mac) +# \Scripts\python.exe (Windows) + +set -euo pipefail + +VENV_DIR="${1:-/opt/prismaudio-extract}" + +echo "[PrismAudio] Creating venv at: ${VENV_DIR}" +python3 -m venv "${VENV_DIR}" + +PIP="${VENV_DIR}/bin/pip" + +echo "[PrismAudio] Upgrading pip..." +"${PIP}" install --upgrade pip + +echo "[PrismAudio] Installing PyTorch stack..." +"${PIP}" install torch torchaudio torchvision + +echo "[PrismAudio] Installing feature-extraction dependencies..." +"${PIP}" install \ + "tensorflow-cpu==2.15.0" \ + "jax[cpu]" \ + "jaxlib" \ + "transformers" \ + "decord" \ + "einops" \ + "numpy" \ + "mediapy" + +echo "[PrismAudio] Installing VideoPrism..." +"${PIP}" install "git+https://github.com/google-deepmind/videoprism.git" + +echo "" +echo "[PrismAudio] Done. Set python_env in PrismAudioFeatureExtractor to:" +echo " ${VENV_DIR}/bin/python"