From 5dfaa0b3004ca2b83a70bc96c7c43af26730e021 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sun, 5 Apr 2026 17:24:18 +0200 Subject: [PATCH] Replace torchaudio.save with soundfile.write; add EPUB loader node - nodes/generator.py: swap torchaudio.save for soundfile.write to avoid torchcodec/FFmpeg dependency crash in environments without FFmpeg shared libs - nodes/epub_loader.py: new OmniVoiceEpubLoader node for loading EPUB chapters - tests/test_epub_loader.py: 8 tests for the EPUB loader - install.py: add beautifulsoup4 to runtime deps - __init__.py, nodes/__init__.py: register OmniVoiceEpubLoader Co-Authored-By: Claude Sonnet 4.6 --- __init__.py | 4 +- install.py | 1 + nodes/__init__.py | 3 +- nodes/epub_loader.py | 109 +++++++++++++++++++++++++++++++ nodes/generator.py | 6 +- requirements.txt | 1 + tests/test_epub_loader.py | 132 ++++++++++++++++++++++++++++++++++++++ tests/test_generator.py | 4 +- 8 files changed, 254 insertions(+), 6 deletions(-) create mode 100644 nodes/epub_loader.py create mode 100644 tests/test_epub_loader.py diff --git a/__init__.py b/__init__.py index 571d1d5..1474cdd 100644 --- a/__init__.py +++ b/__init__.py @@ -1,13 +1,15 @@ -from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate +from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate, OmniVoiceEpubLoader NODE_CLASS_MAPPINGS = { "OmniVoiceModelLoader": OmniVoiceModelLoader, "OmniVoiceGenerate": OmniVoiceGenerate, + "OmniVoiceEpubLoader": OmniVoiceEpubLoader, } NODE_DISPLAY_NAME_MAPPINGS = { "OmniVoiceModelLoader": "OmniVoice Model Loader", "OmniVoiceGenerate": "OmniVoice Generate", + "OmniVoiceEpubLoader": "OmniVoice EPUB Loader", } __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"] diff --git a/install.py b/install.py index 1f2fa24..de6bdb1 100644 --- a/install.py +++ b/install.py @@ -25,6 +25,7 @@ pip( "pydub", "soundfile", "numpy", + "beautifulsoup4", ) print("\n[ComfyUI-Omnivoice] Installation complete.") diff --git a/nodes/__init__.py b/nodes/__init__.py index f17a35a..fcc139b 100644 --- a/nodes/__init__.py +++ b/nodes/__init__.py @@ -1,4 +1,5 @@ from .loader import OmniVoiceModelLoader from .generator import OmniVoiceGenerate +from .epub_loader import OmniVoiceEpubLoader -__all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate"] +__all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate", "OmniVoiceEpubLoader"] diff --git a/nodes/epub_loader.py b/nodes/epub_loader.py new file mode 100644 index 0000000..e9485f1 --- /dev/null +++ b/nodes/epub_loader.py @@ -0,0 +1,109 @@ +import zipfile +import io +import xml.etree.ElementTree as ET +from bs4 import BeautifulSoup + + +def _local(tag): + """Strip XML namespace prefix, return local tag name.""" + return tag.split('}')[-1] + + +def _extract_chapters(epub_path): + """Parse EPUB and return list of {"title": str|None, "text": str}.""" + chapters = [] + with zipfile.ZipFile(epub_path, 'r') as zf: + # 1. Find OPF path from container.xml + container = ET.fromstring(zf.read('META-INF/container.xml')) + rootfile = next( + el for el in container.iter() + if _local(el.tag) == 'rootfile' + ) + opf_path = rootfile.attrib['full-path'] + opf_dir = opf_path.rsplit('/', 1)[0] + '/' if '/' in opf_path else '' + + # 2. Parse OPF: build manifest and spine + opf = ET.fromstring(zf.read(opf_path)) + manifest = { + el.attrib['id']: el.attrib['href'] + for el in opf.iter() + if _local(el.tag) == 'item' + and 'xhtml' in el.attrib.get('media-type', '') + } + spine = [ + el.attrib['idref'] + for el in opf.iter() + if _local(el.tag) == 'itemref' + ] + + # 3. Extract text from each chapter XHTML + for idref in spine: + href = manifest.get(idref) + if href is None: + continue + xhtml = zf.read(opf_dir + href).decode('utf-8', errors='replace') + soup = BeautifulSoup(xhtml, 'html.parser') + for tag in soup(['script', 'style']): + tag.decompose() + # Title: → <h1/h2/h3> → None + title = None + if soup.title and soup.title.string: + title = soup.title.string.strip() + if not title: + for hn in ['h1', 'h2', 'h3']: + tag = soup.find(hn) + if tag: + title = tag.get_text(strip=True) + break + text = soup.get_text(separator=' ', strip=True) + chapters.append({"title": title, "text": text}) + + return chapters + + +class OmniVoiceEpubLoader: + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "epub_path": ("STRING", { + "default": "", + "tooltip": "Absolute path to the .epub file to load.", + }), + "chapter_start": ("INT", { + "default": 1, "min": 1, "max": 9999, "step": 1, + "tooltip": "First chapter to include (1-indexed). Clamped to valid range automatically.", + }), + "chapter_end": ("INT", { + "default": 1, "min": 1, "max": 9999, "step": 1, + "tooltip": "Last chapter to include (1-indexed, inclusive). Clamped automatically. If less than chapter_start, set to chapter_start.", + }), + }, + } + + RETURN_TYPES = ("STRING", "STRING") + RETURN_NAMES = ("text", "chapter_list") + FUNCTION = "load_epub" + CATEGORY = "OmniVoice" + + def load_epub(self, epub_path, chapter_start, chapter_end): + chapters = _extract_chapters(epub_path) + n = len(chapters) + + if n == 0: + return ("", "") + + start = max(1, min(chapter_start, n)) + end = max(start, min(chapter_end, n)) + + # chapter_list: all chapters regardless of selection + chapter_list = "\n".join( + f"{i}. {ch['title'] if ch['title'] else f'Chapter {i}'}" + for i, ch in enumerate(chapters, 1) + ) + + # text: selected range joined by delimiter + selected = chapters[start - 1 : end] + text = "\n\n---\n\n".join(ch["text"] for ch in selected) + + return (text, chapter_list) diff --git a/nodes/generator.py b/nodes/generator.py index 013b1f4..a93132a 100644 --- a/nodes/generator.py +++ b/nodes/generator.py @@ -1,7 +1,7 @@ import tempfile import os import torch -import torchaudio +import soundfile as sf class OmniVoiceGenerate: @@ -109,7 +109,9 @@ class OmniVoiceGenerate: tmp.close() try: ref_waveform = ref_audio["waveform"].squeeze(0).cpu() # (channels, samples) - torchaudio.save(tmp_path, ref_waveform, int(ref_audio["sample_rate"])) + audio_np = ref_waveform.numpy() + # soundfile expects (samples,) for mono or (samples, channels) for multi-channel + sf.write(tmp_path, audio_np[0] if audio_np.shape[0] == 1 else audio_np.T, int(ref_audio["sample_rate"])) kwargs["ref_audio"] = tmp_path if ref_text: kwargs["ref_text"] = ref_text diff --git a/requirements.txt b/requirements.txt index 59d015d..b505f76 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ # Dependencies are managed by install.py to avoid overwriting ComfyUI's torch. # omnivoice pins torch==2.8.* (CUDA 12.8) which would break ComfyUI's torch build. # Do not add omnivoice here — install.py handles it with --no-deps. +# EPUB parsing (OmniVoiceEpubLoader) requires beautifulsoup4 (installed by install.py). diff --git a/tests/test_epub_loader.py b/tests/test_epub_loader.py new file mode 100644 index 0000000..8b1ead5 --- /dev/null +++ b/tests/test_epub_loader.py @@ -0,0 +1,132 @@ +import io +import zipfile +from unittest.mock import patch +import pytest +from nodes.epub_loader import OmniVoiceEpubLoader + +# Capture the real ZipFile class BEFORE any patching so epub_opener can use it +# without hitting the patch and causing infinite recursion. +_RealZipFile = zipfile.ZipFile + +_CONTAINER_XML = """<?xml version="1.0"?> +<container xmlns="urn:oasis:schemas:container" version="1.0"> + <rootfiles> + <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/> + </rootfiles> +</container>""" + + +def make_fake_epub(chapters): + """chapters: list of (title, body_html). Returns EPUB bytes.""" + buf = io.BytesIO() + with zipfile.ZipFile(buf, 'w') as z: + z.writestr('mimetype', 'application/epub+zip') + z.writestr('META-INF/container.xml', _CONTAINER_XML) + items = "\n".join( + f'<item id="ch{i}" href="ch{i}.xhtml" media-type="application/xhtml+xml"/>' + for i in range(len(chapters)) + ) + spine = "\n".join(f'<itemref idref="ch{i}"/>' for i in range(len(chapters))) + opf = f"""<?xml version="1.0"?> +<package xmlns="http://www.idpf.org/2007/opf"> + <manifest>{items}</manifest> + <spine>{spine}</spine> +</package>""" + z.writestr('OEBPS/content.opf', opf) + for i, (title, body) in enumerate(chapters): + z.writestr(f'OEBPS/ch{i}.xhtml', + f'<html><head><title>{title}

{title}

{body}') + buf.seek(0) + return buf.read() + + +def epub_opener(epub_bytes): + def _open(path, mode='r'): + return _RealZipFile(io.BytesIO(epub_bytes), mode) + return _open + + +def test_input_types_structure(): + inputs = OmniVoiceEpubLoader.INPUT_TYPES() + req = inputs["required"] + assert "epub_path" in req + assert req["epub_path"][0] == "STRING" + assert "chapter_start" in req + assert req["chapter_start"][0] == "INT" + assert "chapter_end" in req + assert req["chapter_end"][0] == "INT" + + +def test_return_types(): + assert OmniVoiceEpubLoader.RETURN_TYPES == ("STRING", "STRING") + assert OmniVoiceEpubLoader.RETURN_NAMES == ("text", "chapter_list") + + +def test_chapter_extraction_basic(): + epub = make_fake_epub([("Intro", "

Hello world

"), ("Chapter One", "

Body here

")]) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)): + text, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 2) + assert "Hello world" in text + assert "Body here" in text + assert "---" in text + assert len(chapter_list.strip().splitlines()) == 2 + + +def test_chapter_range_single(): + epub = make_fake_epub([("One", "

First

"), ("Two", "

Second

"), ("Three", "

Third

")]) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)): + text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 2) + assert "Second" in text + assert "First" not in text + assert "Third" not in text + + +def test_chapter_list_contains_all(): + epub = make_fake_epub([("A", ""), ("B", ""), ("C", "")]) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)): + _, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 2) + lines = chapter_list.strip().splitlines() + assert len(lines) == 3 + assert lines[0].startswith("1.") + assert lines[2].startswith("3.") + + +def test_range_clamping_high(): + epub = make_fake_epub([("A", "

aaa

"), ("B", "

bbb

")]) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)): + text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 99) + assert "aaa" in text and "bbb" in text + + +def test_range_clamping_end_below_start(): + epub = make_fake_epub([("A", "

aaa

"), ("B", "

bbb

")]) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)): + text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 1) + assert "bbb" in text + assert "aaa" not in text + + +def test_missing_title_fallback(): + buf = io.BytesIO() + with zipfile.ZipFile(buf, 'w') as z: + z.writestr('mimetype', 'application/epub+zip') + z.writestr('META-INF/container.xml', _CONTAINER_XML) + z.writestr('OEBPS/content.opf', """ + + + +""") + z.writestr('OEBPS/ch0.xhtml', '

No title here

') + buf.seek(0) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(buf.read())): + _, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 1) + assert "1. Chapter 1" in chapter_list + + +def test_script_style_stripped(): + epub = make_fake_epub([("Test", '

clean

')]) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)): + text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 1) + assert "alert" not in text + assert "color" not in text + assert "clean" in text diff --git a/tests/test_generator.py b/tests/test_generator.py index 6c1a306..52ab25e 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -75,7 +75,7 @@ def test_generate_voice_cloning(): ref_waveform = torch.zeros(1, 1, 24000) ref_audio_input = {"waveform": ref_waveform, "sample_rate": 24000} - with patch("nodes.generator.torchaudio.save") as mock_save: + with patch("nodes.generator.sf.write") as mock_write: result = node.generate( model=mock_model, text="Hello world", @@ -86,7 +86,7 @@ def test_generate_voice_cloning(): num_step=32, ) - assert mock_save.called + assert mock_write.called call_kwargs = mock_model.generate.call_args[1] assert call_kwargs["ref_text"] == "reference text" assert "ref_audio" in call_kwargs