remove: mask generation, venv setup, and settings dialog

Dead code — masking is handled externally via ComfyUI. Removes SetupWorker, MaskWorker, SettingsDialog, build_mask_output_dir, the mask UI row, Settings button, and associated test cases. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-12 15:53:31 +02:00
parent bb6e3c623a
commit e2b4f9bf8d
4 changed files with 2 additions and 408 deletions
@@ -1,75 +0,0 @@
-"""Depth Anything V2 mask generation script.
-
-Usage:
-    python tools/depth_masks.py --input video.mp4 --output masks_dir/
-
-Outputs one binary PNG per frame: frame_0000.png, frame_0001.png, …
-Foreground = white (255), background = black (0), via Otsu threshold on depth map.
-Requires: torch, transformers, opencv-python, Pillow
-"""
-import argparse
-import os
-import sys
-
-import cv2
-import numpy as np
-from PIL import Image
-from transformers import pipeline
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--input", required=True)
-    parser.add_argument("--output", required=True)
-    args = parser.parse_args()
-
-    os.makedirs(args.output, exist_ok=True)
-
-    import torch
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    print(f"Using device: {device}", flush=True)
-
-    pipe = pipeline(
-        "depth-estimation",
-        model="depth-anything/Depth-Anything-V2-Large-hf",
-        device=device,
-    )
-
-    cap = cv2.VideoCapture(args.input)
-    if not cap.isOpened():
-        print(f"ERROR: cannot open {args.input}", file=sys.stderr)
-        sys.exit(1)
-
-    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    idx = 0
-    while True:
-        ret, frame = cap.read()
-        if not ret:
-            break
-
-        pil_img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-        result = pipe(pil_img)
-        depth = np.array(result["depth"])  # float32 array
-
-        # Normalise to 0–255
-        d_min, d_max = depth.min(), depth.max()
-        if d_max > d_min:
-            depth_u8 = ((depth - d_min) / (d_max - d_min) * 255).astype(np.uint8)
-        else:
-            depth_u8 = np.zeros_like(depth, dtype=np.uint8)
-
-        # Otsu threshold: closer objects (higher depth value) = foreground
-        _, mask = cv2.threshold(depth_u8, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
-
-        out_path = os.path.join(args.output, f"frame_{idx:04d}.png")
-        cv2.imwrite(out_path, mask)
-
-        idx += 1
-        print(f"frame {idx}/{total}", flush=True)
-
-    cap.release()
-    print("done", flush=True)
-
-
-if __name__ == "__main__":
-    main()
@@ -1,83 +0,0 @@
-"""SAM2 mask generation script.
-
-Usage:
-    python tools/sam_masks.py --input video.mp4 --output masks_dir/
-
-Outputs one binary PNG per frame: frame_0000.png, frame_0001.png, …
-Uses center of first frame as positive point prompt, propagates across all frames.
-Requires: torch, segment-anything-2, opencv-python
-"""
-import argparse
-import os
-import sys
-import tempfile
-
-import cv2
-import numpy as np
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--input", required=True)
-    parser.add_argument("--output", required=True)
-    args = parser.parse_args()
-
-    os.makedirs(args.output, exist_ok=True)
-
-    import torch
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    print(f"Using device: {device}", flush=True)
-
-    # Extract frames to temp directory (SAM2 video predictor needs image files)
-    with tempfile.TemporaryDirectory() as frame_dir:
-        cap = cv2.VideoCapture(args.input)
-        if not cap.isOpened():
-            print(f"ERROR: cannot open {args.input}", file=sys.stderr)
-            sys.exit(1)
-
-        total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-        idx = 0
-        while True:
-            ret, frame = cap.read()
-            if not ret:
-                break
-            cv2.imwrite(os.path.join(frame_dir, f"{idx:04d}.jpg"), frame)
-            idx += 1
-        cap.release()
-
-        print(f"Extracted {idx} frames", flush=True)
-
-        # SAM2: use from_pretrained (SAM2.1+ / HuggingFace integration)
-        from sam2.sam2_video_predictor import SAM2VideoPredictor
-
-        predictor = SAM2VideoPredictor.from_pretrained(
-            "facebook/sam2-hiera-large"
-        ).to(device)
-
-        with torch.inference_mode():
-            state = predictor.init_state(video_path=frame_dir)
-
-            # Center of first frame as positive point prompt
-            cx, cy = width // 2, height // 2
-            _, _, _ = predictor.add_new_points_or_box(
-                inference_state=state,
-                frame_idx=0,
-                obj_id=1,
-                points=np.array([[cx, cy]], dtype=np.float32),
-                labels=np.array([1], dtype=np.int32),
-            )
-
-            for frame_idx, obj_ids, out_mask_logits in predictor.propagate_in_video(state):
-                # out_mask_logits: (N_objects, 1, H, W) — threshold logits at 0
-                mask = (out_mask_logits[0].squeeze().cpu().numpy() > 0.0).astype(np.uint8) * 255
-                out_path = os.path.join(args.output, f"frame_{frame_idx:04d}.png")
-                cv2.imwrite(out_path, mask)
-                print(f"frame {frame_idx + 1}/{total}", flush=True)
-
-    print("done", flush=True)
-
-
-if __name__ == "__main__":
-    main()