Replace torchaudio.save with soundfile.write; add EPUB loader node

- nodes/generator.py: swap torchaudio.save for soundfile.write to avoid torchcodec/FFmpeg dependency crash in environments without FFmpeg shared libs - nodes/epub_loader.py: new OmniVoiceEpubLoader node for loading EPUB chapters - tests/test_epub_loader.py: 8 tests for the EPUB loader - install.py: add beautifulsoup4 to runtime deps - __init__.py, nodes/__init__.py: register OmniVoiceEpubLoader Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-05 17:24:18 +02:00
parent 5366f6992e
commit 5dfaa0b300
8 changed files with 254 additions and 6 deletions
@@ -0,0 +1,132 @@
+import io
+import zipfile
+from unittest.mock import patch
+import pytest
+from nodes.epub_loader import OmniVoiceEpubLoader
+
+# Capture the real ZipFile class BEFORE any patching so epub_opener can use it
+# without hitting the patch and causing infinite recursion.
+_RealZipFile = zipfile.ZipFile
+
+_CONTAINER_XML = """<?xml version="1.0"?>
+<container xmlns="urn:oasis:schemas:container" version="1.0">
+  <rootfiles>
+    <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml"/>
+  </rootfiles>
+</container>"""
+
+
+def make_fake_epub(chapters):
+    """chapters: list of (title, body_html). Returns EPUB bytes."""
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, 'w') as z:
+        z.writestr('mimetype', 'application/epub+zip')
+        z.writestr('META-INF/container.xml', _CONTAINER_XML)
+        items = "\n".join(
+            f'<item id="ch{i}" href="ch{i}.xhtml" media-type="application/xhtml+xml"/>'
+            for i in range(len(chapters))
+        )
+        spine = "\n".join(f'<itemref idref="ch{i}"/>' for i in range(len(chapters)))
+        opf = f"""<?xml version="1.0"?>
+<package xmlns="http://www.idpf.org/2007/opf">
+  <manifest>{items}</manifest>
+  <spine>{spine}</spine>
+</package>"""
+        z.writestr('OEBPS/content.opf', opf)
+        for i, (title, body) in enumerate(chapters):
+            z.writestr(f'OEBPS/ch{i}.xhtml',
+                f'<html><head><title>{title}</title></head><body><h1>{title}</h1>{body}</body></html>')
+    buf.seek(0)
+    return buf.read()
+
+
+def epub_opener(epub_bytes):
+    def _open(path, mode='r'):
+        return _RealZipFile(io.BytesIO(epub_bytes), mode)
+    return _open
+
+
+def test_input_types_structure():
+    inputs = OmniVoiceEpubLoader.INPUT_TYPES()
+    req = inputs["required"]
+    assert "epub_path" in req
+    assert req["epub_path"][0] == "STRING"
+    assert "chapter_start" in req
+    assert req["chapter_start"][0] == "INT"
+    assert "chapter_end" in req
+    assert req["chapter_end"][0] == "INT"
+
+
+def test_return_types():
+    assert OmniVoiceEpubLoader.RETURN_TYPES == ("STRING", "STRING")
+    assert OmniVoiceEpubLoader.RETURN_NAMES == ("text", "chapter_list")
+
+
+def test_chapter_extraction_basic():
+    epub = make_fake_epub([("Intro", "<p>Hello world</p>"), ("Chapter One", "<p>Body here</p>")])
+    with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
+        text, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 2)
+    assert "Hello world" in text
+    assert "Body here" in text
+    assert "---" in text
+    assert len(chapter_list.strip().splitlines()) == 2
+
+
+def test_chapter_range_single():
+    epub = make_fake_epub([("One", "<p>First</p>"), ("Two", "<p>Second</p>"), ("Three", "<p>Third</p>")])
+    with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
+        text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 2)
+    assert "Second" in text
+    assert "First" not in text
+    assert "Third" not in text
+
+
+def test_chapter_list_contains_all():
+    epub = make_fake_epub([("A", ""), ("B", ""), ("C", "")])
+    with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
+        _, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 2)
+    lines = chapter_list.strip().splitlines()
+    assert len(lines) == 3
+    assert lines[0].startswith("1.")
+    assert lines[2].startswith("3.")
+
+
+def test_range_clamping_high():
+    epub = make_fake_epub([("A", "<p>aaa</p>"), ("B", "<p>bbb</p>")])
+    with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
+        text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 99)
+    assert "aaa" in text and "bbb" in text
+
+
+def test_range_clamping_end_below_start():
+    epub = make_fake_epub([("A", "<p>aaa</p>"), ("B", "<p>bbb</p>")])
+    with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
+        text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 2, 1)
+    assert "bbb" in text
+    assert "aaa" not in text
+
+
+def test_missing_title_fallback():
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, 'w') as z:
+        z.writestr('mimetype', 'application/epub+zip')
+        z.writestr('META-INF/container.xml', _CONTAINER_XML)
+        z.writestr('OEBPS/content.opf', """<?xml version="1.0"?>
+<package xmlns="http://www.idpf.org/2007/opf">
+  <manifest><item id="ch0" href="ch0.xhtml" media-type="application/xhtml+xml"/></manifest>
+  <spine><itemref idref="ch0"/></spine>
+</package>""")
+        z.writestr('OEBPS/ch0.xhtml', '<html><body><p>No title here</p></body></html>')
+    buf.seek(0)
+    with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(buf.read())):
+        _, chapter_list = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 1)
+    assert "1. Chapter 1" in chapter_list
+
+
+def test_script_style_stripped():
+    epub = make_fake_epub([("Test", '<script>alert("xss")</script><style>color:red</style><p>clean</p>')])
+    with patch('nodes.epub_loader.zipfile.ZipFile', side_effect=epub_opener(epub)):
+        text, _ = OmniVoiceEpubLoader().load_epub('/fake.epub', 1, 1)
+    assert "alert" not in text
+    assert "color" not in text
+    assert "clean" in text
@@ -75,7 +75,7 @@ def test_generate_voice_cloning():
    ref_waveform = torch.zeros(1, 1, 24000)
    ref_audio_input = {"waveform": ref_waveform, "sample_rate": 24000}

-    with patch("nodes.generator.torchaudio.save") as mock_save:
+    with patch("nodes.generator.sf.write") as mock_write:
        result = node.generate(
            model=mock_model,
            text="Hello world",
@@ -86,7 +86,7 @@ def test_generate_voice_cloning():
            num_step=32,
        )

-    assert mock_save.called
+    assert mock_write.called
    call_kwargs = mock_model.generate.call_args[1]
    assert call_kwargs["ref_text"] == "reference text"
    assert "ref_audio" in call_kwargs