f7a6f7790d
Modernized MisoTTS integration for ComfyUI with no torchtune/moshi: - vendored plain-torch Llama backbone (csm_llama), parity-verified Δ=0 vs torchtune - transformers.MimiModel codec (bit-identical codes to moshi), drops moshi/bnb/sphn - low-memory loader: streams 32GB fp32 checkpoint to GPU in bf16 (~18GB VRAM) - nodes: Model Loader, Generate (audiobook chunking + voice anchoring), EPUB Loader - pin-free requirements; runs on modern torch / Blackwell GPUs Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
15 lines
403 B
TOML
15 lines
403 B
TOML
[project]
|
|
name = "comfyui-misotts"
|
|
description = "ComfyUI nodes for MisoTTS (Sesame CSM 8B) — modernized off torchtune/moshi, with EPUB/audiobook chunking and voice cloning."
|
|
version = "0.1.0"
|
|
license = { text = "Apache-2.0" }
|
|
dependencies = []
|
|
|
|
[project.urls]
|
|
Repository = "https://github.com/ethanfel/ComfyUI-MisoTTS"
|
|
|
|
[tool.comfy]
|
|
PublisherId = "ethanfel"
|
|
DisplayName = "ComfyUI-MisoTTS"
|
|
Icon = ""
|