Initial release: ComfyUI-UniverSR

ComfyUI nodes for UniverSR (ICASSP 2026) — vocoder-free audio
super-resolution (8/12/16/24 kHz → 48 kHz) via flow matching.

- UniverSR Model Loader: presets auto-download to models/universr,
  plus local dir / raw .pth (from_local) loading, with caching.
- UniverSR Super-Resolution: chunked overlap-add for long audio,
  per-channel stereo, seed control with global-RNG isolation,
  wet/dry blend, and an optional before/after spectrogram.
- Vendors the universr inference package under vendor/ (prefers an
  installed copy); only extra dep beyond ComfyUI's stack is torchdiffeq.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-01 12:59:42 +02:00
commit 5f29b225b7
20 changed files with 2129 additions and 0 deletions
+87
View File
@@ -0,0 +1,87 @@
seed: 42
wandb:
project_name: "UniverSR"
entity: null # set to your wandb username or team
run_name: "audio"
notes: ""
dataloader:
batch_size: 4
num_workers: 4
prefetch_factor: 2
persistent_workers: True
pin_memory: True
collator:
sampling_rates_probs:
8: 0.7
12: 0.1
16: 0.1
24: 0.1
validation_probs:
8: 1.0
dataset:
common:
num_samples: 32767
sr: 48000
train:
file_list: "./data/train.txt"
val:
file_list: "./data/val.txt"
path:
class_path: universr.flow.path.OriginalCFMPath
init_args:
sigma_min: 1.0e-4
transform:
window_fn: 'hann'
n_fft: 1024
sampling_rate: 48000
hop_length: 512
alpha: 0.2
beta: 1
comp_eps: 1.0e-4
model:
in_channels: 2
out_channels: 2
dims: [96, 192, 384, 768]
depths: [2, 2, 4, 2]
drop_path: 0
time_dim: 256
cond_dim: 384
total_freq_bins: 512
hr_freq_bins: 432
feature_enc_layers: 4
cond_dropout_prob: 0.1
sr_to_lr_bins: {8: 80, 12: 128, 16: 170, 24: 256}
scheduler:
type: CosineLR
init_args:
num_warmup_steps: 10000
num_training_steps: 5000000
optimizer:
lr: 2.0e-4
betas: [0.9, 0.99]
train:
num_epochs: 200
max_steps: 5000000
ckpt_save_dir: ./ckpts/audio/
ckpt_load_path: null
log_step_interval: 1000
val_step_interval: 50000
num_val_log_samples: 5
val_ode_steps: 4
val_max_sec: 5
eval:
ode_steps: 4
guidance_scale: 1.5
max_batches: null
num_log_samples: 6