Replace torchaudio.save with soundfile.write; add EPUB loader node

- nodes/generator.py: swap torchaudio.save for soundfile.write to avoid
  torchcodec/FFmpeg dependency crash in environments without FFmpeg shared libs
- nodes/epub_loader.py: new OmniVoiceEpubLoader node for loading EPUB chapters
- tests/test_epub_loader.py: 8 tests for the EPUB loader
- install.py: add beautifulsoup4 to runtime deps
- __init__.py, nodes/__init__.py: register OmniVoiceEpubLoader

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-05 17:24:18 +02:00
parent 5366f6992e
commit 5dfaa0b300
8 changed files with 254 additions and 6 deletions
+3 -1
View File
@@ -1,13 +1,15 @@
from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate, OmniVoiceEpubLoader
NODE_CLASS_MAPPINGS = { NODE_CLASS_MAPPINGS = {
"OmniVoiceModelLoader": OmniVoiceModelLoader, "OmniVoiceModelLoader": OmniVoiceModelLoader,
"OmniVoiceGenerate": OmniVoiceGenerate, "OmniVoiceGenerate": OmniVoiceGenerate,
"OmniVoiceEpubLoader": OmniVoiceEpubLoader,
} }
NODE_DISPLAY_NAME_MAPPINGS = { NODE_DISPLAY_NAME_MAPPINGS = {
"OmniVoiceModelLoader": "OmniVoice Model Loader", "OmniVoiceModelLoader": "OmniVoice Model Loader",
"OmniVoiceGenerate": "OmniVoice Generate", "OmniVoiceGenerate": "OmniVoice Generate",
"OmniVoiceEpubLoader": "OmniVoice EPUB Loader",
} }
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"] __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"]
+1
View File
@@ -25,6 +25,7 @@ pip(
"pydub", "pydub",
"soundfile", "soundfile",
"numpy", "numpy",
"beautifulsoup4",
) )
print("\n[ComfyUI-Omnivoice] Installation complete.") print("\n[ComfyUI-Omnivoice] Installation complete.")
+2 -1
View File
@@ -1,4 +1,5 @@
from .loader import OmniVoiceModelLoader from .loader import OmniVoiceModelLoader
from .generator import OmniVoiceGenerate from .generator import OmniVoiceGenerate
from .epub_loader import OmniVoiceEpubLoader
__all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate"] __all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate", "OmniVoiceEpubLoader"]
+109
View File
@@ -0,0 +1,109 @@
import zipfile
import io
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup
def _local(tag):
"""Strip XML namespace prefix, return local tag name."""
return tag.split('}')[-1]
def _extract_chapters(epub_path):
"""Parse EPUB and return list of {"title": str|None, "text": str}."""
chapters = []
with zipfile.ZipFile(epub_path, 'r') as zf:
# 1. Find OPF path from container.xml
container = ET.fromstring(zf.read('META-INF/container.xml'))
rootfile = next(
el for el in container.iter()
if _local(el.tag) == 'rootfile'
)
opf_path = rootfile.attrib['full-path']
opf_dir = opf_path.rsplit('/', 1)[0] + '/' if '/' in opf_path else ''
# 2. Parse OPF: build manifest and spine
opf = ET.fromstring(zf.read(opf_path))
manifest = {
el.attrib['id']: el.attrib['href']
for el in opf.iter()
if _local(el.tag) == 'item'
and 'xhtml' in el.attrib.get('media-type', '')
}
spine = [
el.attrib['idref']
for el in opf.iter()
if _local(el.tag) == 'itemref'
]
# 3. Extract text from each chapter XHTML
for idref in spine:
href = manifest.get(idref)
if href is None:
continue
xhtml = zf.read(opf_dir + href).decode('utf-8', errors='replace')
soup = BeautifulSoup(xhtml, 'html.parser')
for tag in soup(['script', 'style']):
tag.decompose()
# Title: <title> → <h1/h2/h3> → None
title = None
if soup.title and soup.title.string:
title = soup.title.string.strip()
if not title:
for hn in ['h1', 'h2', 'h3']:
tag = soup.find(hn)
if tag:
title = tag.get_text(strip=True)
break
text = soup.get_text(separator=' ', strip=True)
chapters.append({"title": title, "text": text})
return chapters
class OmniVoiceEpubLoader:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"epub_path": ("STRING", {
"default": "",
"tooltip": "Absolute path to the .epub file to load.",
}),
"chapter_start": ("INT", {
"default": 1, "min": 1, "max": 9999, "step": 1,
"tooltip": "First chapter to include (1-indexed). Clamped to valid range automatically.",
}),
"chapter_end": ("INT", {
"default": 1, "min": 1, "max": 9999, "step": 1,
"tooltip": "Last chapter to include (1-indexed, inclusive). Clamped automatically. If less than chapter_start, set to chapter_start.",
}),
},
}
RETURN_TYPES = ("STRING", "STRING")
RETURN_NAMES = ("text", "chapter_list")
FUNCTION = "load_epub"
CATEGORY = "OmniVoice"
def load_epub(self, epub_path, chapter_start, chapter_end):
chapters = _extract_chapters(epub_path)
n = len(chapters)
if n == 0:
return ("", "")
start = max(1, min(chapter_start, n))
end = max(start, min(chapter_end, n))
# chapter_list: all chapters regardless of selection
chapter_list = "\n".join(
f"{i}. {ch['title'] if ch['title'] else f'Chapter {i}'}"
for i, ch in enumerate(chapters, 1)
)
# text: selected range joined by delimiter
selected = chapters[start - 1 : end]
text = "\n\n---\n\n".join(ch["text"] for ch in selected)
return (text, chapter_list)
+4 -2
View File
@@ -1,7 +1,7 @@
import tempfile import tempfile
import os import os
import torch import torch
import torchaudio import soundfile as sf
class OmniVoiceGenerate: class OmniVoiceGenerate:
@@ -109,7 +109,9 @@ class OmniVoiceGenerate:
tmp.close() tmp.close()
try: try:
ref_waveform = ref_audio["waveform"].squeeze(0).cpu() # (channels, samples) ref_waveform = ref_audio["waveform"].squeeze(0).cpu() # (channels, samples)
torchaudio.save(tmp_path, ref_waveform, int(ref_audio["sample_rate"])) audio_np = ref_waveform.numpy()
# soundfile expects (samples,) for mono or (samples, channels) for multi-channel
sf.write(tmp_path, audio_np[0] if audio_np.shape[0] == 1 else audio_np.T, int(ref_audio["sample_rate"]))
kwargs["ref_audio"] = tmp_path kwargs["ref_audio"] = tmp_path
if ref_text: if ref_text:
kwargs["ref_text"] = ref_text kwargs["ref_text"] = ref_text
+1
View File
@@ -1,3 +1,4 @@
# Dependencies are managed by install.py to avoid overwriting ComfyUI's torch. # Dependencies are managed by install.py to avoid overwriting ComfyUI's torch.
# omnivoice pins torch==2.8.* (CUDA 12.8) which would break ComfyUI's torch build. # omnivoice pins torch==2.8.* (CUDA 12.8) which would break ComfyUI's torch build.
# Do not add omnivoice here — install.py handles it with --no-deps. # Do not add omnivoice here — install.py handles it with --no-deps.
# EPUB parsing (OmniVoiceEpubLoader) requires beautifulsoup4 (installed by install.py).
+132
View File
@@ -0,0 +1,132 @@
import io
import zipfile
from unittest.mock import patch
import pytest
from nodes.epub_loader import OmniVoiceEpubLoader
# Capture the real ZipFile class BEFORE any patching so epub_opener can use it
# without hitting the patch and causing infinite recursion.
_RealZipFile = zipfile.ZipFile
_CONTAINER_XML = """<?xml version="1.0"?>
<container xmlns="urn:oasis:schemas:container" version="1.0">
<rootfiles>
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>"""
def make_fake_epub(chapters):
"""chapters: list of (title, body_html). Returns EPUB bytes."""
buf = io.BytesIO()
with zipfile.ZipFile(buf, 'w') as z:
z.writestr('mimetype', 'application/epub+zip')
z.writestr('META-INF/container.xml', _CONTAINER_XML)
items = "\n".join(
f'<item id="ch{i}" href="ch{i}.xhtml" media-type="application/xhtml+xml"/>'
for i in range(len(chapters))
)
spine = "\n".join(f'<itemref idref="ch{i}"/>' for i in range(len(chapters)))
opf = f"""<?xml version="1.0"?>
<package xmlns="http://www.idpf.org/2007/opf">
<manifest>{items}</manifest>
<spine>{spine}</spine>
</package>"""
z.writestr('OEBPS/content.opf', opf)
for i, (title, body) in enumerate(chapters):
z.writestr(f'OEBPS/ch{i}.xhtml',
f'<html><head><title>{title}</title></head><body><h1>{title}</h1>{body}</body></html>')
buf.seek(0)
return buf.read()
def epub_opener(epub_bytes):
def _open(path, mode='r'):
return _RealZipFile(io.BytesIO(epub_bytes), mode)
return _open
def test_input_types_structure():
inputs = OmniVoiceEpubLoader.INPUT_TYPES()
req = inputs["required"]
assert "epub_path" in req
assert req["epub_path"][0] == "STRING"
assert "chapter_start" in req
assert req["chapter_start"][0] == "INT"
assert "chapter_end" in req
assert req["chapter_end"][0] == "INT"
def test_return_types():
assert OmniVoiceEpubLoader.RETURN_TYPES == ("STRING", "STRING")
assert OmniVoiceEpubLoader.RETURN_NAMES == ("text", "chapter_list")
def test_chapter_extraction_basic():
epub = make_fake_epub([("Intro", "<p>Hello world</p>"), ("Chapter One", "<p>Body here</p>")])
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
text, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 2)
assert "Hello world" in text
assert "Body here" in text
assert "---" in text
assert len(chapter_list.strip().splitlines()) == 2
def test_chapter_range_single():
epub = make_fake_epub([("One", "<p>First</p>"), ("Two", "<p>Second</p>"), ("Three", "<p>Third</p>")])
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 2)
assert "Second" in text
assert "First" not in text
assert "Third" not in text
def test_chapter_list_contains_all():
epub = make_fake_epub([("A", ""), ("B", ""), ("C", "")])
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
_, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 2)
lines = chapter_list.strip().splitlines()
assert len(lines) == 3
assert lines[0].startswith("1.")
assert lines[2].startswith("3.")
def test_range_clamping_high():
epub = make_fake_epub([("A", "<p>aaa</p>"), ("B", "<p>bbb</p>")])
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 99)
assert "aaa" in text and "bbb" in text
def test_range_clamping_end_below_start():
epub = make_fake_epub([("A", "<p>aaa</p>"), ("B", "<p>bbb</p>")])
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 1)
assert "bbb" in text
assert "aaa" not in text
def test_missing_title_fallback():
buf = io.BytesIO()
with zipfile.ZipFile(buf, 'w') as z:
z.writestr('mimetype', 'application/epub+zip')
z.writestr('META-INF/container.xml', _CONTAINER_XML)
z.writestr('OEBPS/content.opf', """<?xml version="1.0"?>
<package xmlns="http://www.idpf.org/2007/opf">
<manifest><item id="ch0" href="ch0.xhtml" media-type="application/xhtml+xml"/></manifest>
<spine><itemref idref="ch0"/></spine>
</package>""")
z.writestr('OEBPS/ch0.xhtml', '<html><body><p>No title here</p></body></html>')
buf.seek(0)
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(buf.read())):
_, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 1)
assert "1. Chapter 1" in chapter_list
def test_script_style_stripped():
epub = make_fake_epub([("Test", '<script>alert("xss")</script><style>color:red</style><p>clean</p>')])
with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 1)
assert "alert" not in text
assert "color" not in text
assert "clean" in text
+2 -2
View File
@@ -75,7 +75,7 @@ def test_generate_voice_cloning():
ref_waveform = torch.zeros(1, 1, 24000) ref_waveform = torch.zeros(1, 1, 24000)
ref_audio_input = {"waveform": ref_waveform, "sample_rate": 24000} ref_audio_input = {"waveform": ref_waveform, "sample_rate": 24000}
with patch("nodes.generator.torchaudio.save") as mock_save: with patch("nodes.generator.sf.write") as mock_write:
result = node.generate( result = node.generate(
model=mock_model, model=mock_model,
text="Hello world", text="Hello world",
@@ -86,7 +86,7 @@ def test_generate_voice_cloning():
num_step=32, num_step=32,
) )
assert mock_save.called assert mock_write.called
call_kwargs = mock_model.generate.call_args[1] call_kwargs = mock_model.generate.call_args[1]
assert call_kwargs["ref_text"] == "reference text" assert call_kwargs["ref_text"] == "reference text"
assert "ref_audio" in call_kwargs assert "ref_audio" in call_kwargs