fix: bug sweep and improvements
- nodes/__init__.py: fix [PrismAudio] leftover label in error print - selva_feature_extractor: hash beginning, middle and end of video tensor instead of just first 1MB, avoiding collisions on videos with same opening frames - selva_sampler: derive SequenceConfig from model template via dataclasses.replace instead of hardcoding sampling_rate/spectrogram_frame_rate per mode Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -14,4 +14,4 @@ for key, (module_path, class_name, display_name) in _NODES.items():
|
|||||||
NODE_CLASS_MAPPINGS[key] = getattr(mod, class_name)
|
NODE_CLASS_MAPPINGS[key] = getattr(mod, class_name)
|
||||||
NODE_DISPLAY_NAME_MAPPINGS[key] = display_name
|
NODE_DISPLAY_NAME_MAPPINGS[key] = display_name
|
||||||
except (ImportError, AttributeError) as e:
|
except (ImportError, AttributeError) as e:
|
||||||
print(f"[PrismAudio] Skipping {key}: {e}")
|
print(f"[SelVA] Skipping {key}: {e}")
|
||||||
|
|||||||
@@ -36,7 +36,12 @@ def _resize_frames(frames, size):
|
|||||||
|
|
||||||
def _hash_inputs(video_tensor, prompt, fps, duration, variant):
|
def _hash_inputs(video_tensor, prompt, fps, duration, variant):
|
||||||
h = hashlib.sha256()
|
h = hashlib.sha256()
|
||||||
h.update(video_tensor.cpu().numpy().tobytes()[:1024 * 1024])
|
raw = video_tensor.cpu().numpy().tobytes()
|
||||||
|
n = len(raw)
|
||||||
|
chunk = 512 * 1024 # 512 KB per sample
|
||||||
|
h.update(raw[:chunk])
|
||||||
|
h.update(raw[n // 2: n // 2 + chunk])
|
||||||
|
h.update(raw[max(0, n - chunk):])
|
||||||
h.update(prompt.encode())
|
h.update(prompt.encode())
|
||||||
h.update(str(fps).encode())
|
h.update(str(fps).encode())
|
||||||
h.update(str(round(duration, 3)).encode()) # resolved duration affects frame count
|
h.update(str(round(duration, 3)).encode()) # resolved duration affects frame count
|
||||||
|
|||||||
@@ -38,8 +38,8 @@ class SelvaSampler:
|
|||||||
CATEGORY = SELVA_CATEGORY
|
CATEGORY = SELVA_CATEGORY
|
||||||
|
|
||||||
def generate(self, model, features, prompt, negative_prompt, duration, steps, cfg_strength, seed):
|
def generate(self, model, features, prompt, negative_prompt, duration, steps, cfg_strength, seed):
|
||||||
|
import dataclasses
|
||||||
from selva_core.model.flow_matching import FlowMatching
|
from selva_core.model.flow_matching import FlowMatching
|
||||||
from selva_core.model.sequence_config import SequenceConfig
|
|
||||||
|
|
||||||
device = get_device()
|
device = get_device()
|
||||||
dtype = model["dtype"]
|
dtype = model["dtype"]
|
||||||
@@ -63,11 +63,8 @@ class SelvaSampler:
|
|||||||
duration = features["duration"]
|
duration = features["duration"]
|
||||||
print(f"[SelVA] Using video duration from features: {duration:.2f}s", flush=True)
|
print(f"[SelVA] Using video duration from features: {duration:.2f}s", flush=True)
|
||||||
|
|
||||||
# Compute sequence config for this duration
|
# Derive sequence config for this duration from the model's mode template
|
||||||
if mode == "16k":
|
seq_cfg = dataclasses.replace(model["seq_cfg"], duration=duration)
|
||||||
seq_cfg = SequenceConfig(duration=duration, sampling_rate=16000, spectrogram_frame_rate=256)
|
|
||||||
else:
|
|
||||||
seq_cfg = SequenceConfig(duration=duration, sampling_rate=44100, spectrogram_frame_rate=512)
|
|
||||||
sample_rate = seq_cfg.sampling_rate
|
sample_rate = seq_cfg.sampling_rate
|
||||||
|
|
||||||
if strategy == "offload_to_cpu":
|
if strategy == "offload_to_cpu":
|
||||||
|
|||||||
Reference in New Issue
Block a user