chore: vendor selva_core from jnwnlee/selva@d7d40a9
Pure PyTorch SelVA source for SelvaModelLoader/FeatureExtractor/Sampler nodes. Imports rewritten from selva.* to selva_core.*. mel_converter.py: replaced librosa.filters.mel with pure-numpy implementation to avoid librosa→numba→NumPy version incompatibility in some ComfyUI environments. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,62 @@
|
||||
import dataclasses
|
||||
import math
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class SequenceConfig:
|
||||
# general
|
||||
duration: float
|
||||
|
||||
# audio
|
||||
sampling_rate: int
|
||||
spectrogram_frame_rate: int
|
||||
latent_downsample_rate: int = 2
|
||||
|
||||
# visual
|
||||
clip_frame_rate: int = 8
|
||||
sync_frame_rate: int = 25
|
||||
sync_num_frames_per_segment: int = 16
|
||||
sync_step_size: int = 8
|
||||
sync_downsample_rate: int = 2
|
||||
|
||||
@property
|
||||
def num_audio_frames(self) -> int:
|
||||
# we need an integer number of latents
|
||||
return self.latent_seq_len * self.spectrogram_frame_rate * self.latent_downsample_rate
|
||||
|
||||
@property
|
||||
def latent_seq_len(self) -> int:
|
||||
return int(
|
||||
math.ceil(self.duration * self.sampling_rate / self.spectrogram_frame_rate /
|
||||
self.latent_downsample_rate))
|
||||
|
||||
@property
|
||||
def clip_seq_len(self) -> int:
|
||||
return int(self.duration * self.clip_frame_rate)
|
||||
|
||||
@property
|
||||
def sync_seg_len(self) -> int:
|
||||
num_frames = self.duration * self.sync_frame_rate
|
||||
num_segments = (num_frames - self.sync_num_frames_per_segment) // self.sync_step_size + 1
|
||||
return int(num_segments)
|
||||
|
||||
@property
|
||||
def sync_seq_len(self) -> int:
|
||||
return int(self.sync_seg_len * self.sync_num_frames_per_segment / self.sync_downsample_rate)
|
||||
|
||||
|
||||
CONFIG_16K = SequenceConfig(duration=8.0, sampling_rate=16000, spectrogram_frame_rate=256)
|
||||
CONFIG_44K = SequenceConfig(duration=8.0, sampling_rate=44100, spectrogram_frame_rate=512)
|
||||
|
||||
if __name__ == '__main__':
|
||||
assert CONFIG_16K.latent_seq_len == 250
|
||||
assert CONFIG_16K.clip_seq_len == 64
|
||||
assert CONFIG_16K.sync_seq_len == 192
|
||||
assert CONFIG_16K.num_audio_frames == 128000
|
||||
|
||||
assert CONFIG_44K.latent_seq_len == 345
|
||||
assert CONFIG_44K.clip_seq_len == 64
|
||||
assert CONFIG_44K.sync_seq_len == 192
|
||||
assert CONFIG_44K.num_audio_frames == 353280
|
||||
|
||||
print('Passed')
|
||||
Reference in New Issue
Block a user