diff --git a/__init__.py b/__init__.py index 571d1d5..1474cdd 100644 --- a/__init__.py +++ b/__init__.py @@ -1,13 +1,15 @@ -from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate +from .nodes import OmniVoiceModelLoader, OmniVoiceGenerate, OmniVoiceEpubLoader NODE_CLASS_MAPPINGS = { "OmniVoiceModelLoader": OmniVoiceModelLoader, "OmniVoiceGenerate": OmniVoiceGenerate, + "OmniVoiceEpubLoader": OmniVoiceEpubLoader, } NODE_DISPLAY_NAME_MAPPINGS = { "OmniVoiceModelLoader": "OmniVoice Model Loader", "OmniVoiceGenerate": "OmniVoice Generate", + "OmniVoiceEpubLoader": "OmniVoice EPUB Loader", } __all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS"] diff --git a/install.py b/install.py index 1f2fa24..de6bdb1 100644 --- a/install.py +++ b/install.py @@ -25,6 +25,7 @@ pip( "pydub", "soundfile", "numpy", + "beautifulsoup4", ) print("\n[ComfyUI-Omnivoice] Installation complete.") diff --git a/nodes/__init__.py b/nodes/__init__.py index f17a35a..fcc139b 100644 --- a/nodes/__init__.py +++ b/nodes/__init__.py @@ -1,4 +1,5 @@ from .loader import OmniVoiceModelLoader from .generator import OmniVoiceGenerate +from .epub_loader import OmniVoiceEpubLoader -__all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate"] +__all__ = ["OmniVoiceModelLoader", "OmniVoiceGenerate", "OmniVoiceEpubLoader"] diff --git a/nodes/epub_loader.py b/nodes/epub_loader.py new file mode 100644 index 0000000..e9485f1 --- /dev/null +++ b/nodes/epub_loader.py @@ -0,0 +1,109 @@ +import zipfile +import io +import xml.etree.ElementTree as ET +from bs4 import BeautifulSoup + + +def _local(tag): + """Strip XML namespace prefix, return local tag name.""" + return tag.split('}')[-1] + + +def _extract_chapters(epub_path): + """Parse EPUB and return list of {"title": str|None, "text": str}.""" + chapters = [] + with zipfile.ZipFile(epub_path, 'r') as zf: + # 1. Find OPF path from container.xml + container = ET.fromstring(zf.read('META-INF/container.xml')) + rootfile = next( + el for el in container.iter() + if _local(el.tag) == 'rootfile' + ) + opf_path = rootfile.attrib['full-path'] + opf_dir = opf_path.rsplit('/', 1)[0] + '/' if '/' in opf_path else '' + + # 2. Parse OPF: build manifest and spine + opf = ET.fromstring(zf.read(opf_path)) + manifest = { + el.attrib['id']: el.attrib['href'] + for el in opf.iter() + if _local(el.tag) == 'item' + and 'xhtml' in el.attrib.get('media-type', '') + } + spine = [ + el.attrib['idref'] + for el in opf.iter() + if _local(el.tag) == 'itemref' + ] + + # 3. Extract text from each chapter XHTML + for idref in spine: + href = manifest.get(idref) + if href is None: + continue + xhtml = zf.read(opf_dir + href).decode('utf-8', errors='replace') + soup = BeautifulSoup(xhtml, 'html.parser') + for tag in soup(['script', 'style']): + tag.decompose() + # Title:
Hello world
"), ("Chapter One", "Body here
")]) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)): + text, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 2) + assert "Hello world" in text + assert "Body here" in text + assert "---" in text + assert len(chapter_list.strip().splitlines()) == 2 + + +def test_chapter_range_single(): + epub = make_fake_epub([("One", "First
"), ("Two", "Second
"), ("Three", "Third
")]) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)): + text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 2) + assert "Second" in text + assert "First" not in text + assert "Third" not in text + + +def test_chapter_list_contains_all(): + epub = make_fake_epub([("A", ""), ("B", ""), ("C", "")]) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)): + _, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 2) + lines = chapter_list.strip().splitlines() + assert len(lines) == 3 + assert lines[0].startswith("1.") + assert lines[2].startswith("3.") + + +def test_range_clamping_high(): + epub = make_fake_epub([("A", "aaa
"), ("B", "bbb
")]) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)): + text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 99) + assert "aaa" in text and "bbb" in text + + +def test_range_clamping_end_below_start(): + epub = make_fake_epub([("A", "aaa
"), ("B", "bbb
")]) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)): + text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 1) + assert "bbb" in text + assert "aaa" not in text + + +def test_missing_title_fallback(): + buf = io.BytesIO() + with zipfile.ZipFile(buf, 'w') as z: + z.writestr('mimetype', 'application/epub+zip') + z.writestr('META-INF/container.xml', _CONTAINER_XML) + z.writestr('OEBPS/content.opf', """ +No title here
') + buf.seek(0) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(buf.read())): + _, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 1) + assert "1. Chapter 1" in chapter_list + + +def test_script_style_stripped(): + epub = make_fake_epub([("Test", 'clean
')]) + with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)): + text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 1) + assert "alert" not in text + assert "color" not in text + assert "clean" in text diff --git a/tests/test_generator.py b/tests/test_generator.py index 6c1a306..52ab25e 100644 --- a/tests/test_generator.py +++ b/tests/test_generator.py @@ -75,7 +75,7 @@ def test_generate_voice_cloning(): ref_waveform = torch.zeros(1, 1, 24000) ref_audio_input = {"waveform": ref_waveform, "sample_rate": 24000} - with patch("nodes.generator.torchaudio.save") as mock_save: + with patch("nodes.generator.sf.write") as mock_write: result = node.generate( model=mock_model, text="Hello world", @@ -86,7 +86,7 @@ def test_generate_voice_cloning(): num_step=32, ) - assert mock_save.called + assert mock_write.called call_kwargs = mock_model.generate.call_args[1] assert call_kwargs["ref_text"] == "reference text" assert "ref_audio" in call_kwargs