import re import zipfile import xml.etree.ElementTree as ET from bs4 import BeautifulSoup _BLOCK_TAGS = {"p", "h1", "h2", "h3", "h4", "h5", "h6", "li", "div", "br", "tr"} def _local(tag): return tag.split("}")[-1] def _extract_chapters(epub_path): chapters = [] with zipfile.ZipFile(epub_path, "r") as zf: container = ET.fromstring(zf.read("META-INF/container.xml")) rootfile = next(el for el in container.iter() if _local(el.tag) == "rootfile") opf_path = rootfile.attrib["full-path"] opf_dir = opf_path.rsplit("/", 1)[0] + "/" if "/" in opf_path else "" opf = ET.fromstring(zf.read(opf_path)) manifest = { el.attrib["id"]: el.attrib["href"] for el in opf.iter() if _local(el.tag) == "item" and "xhtml" in el.attrib.get("media-type", "") } spine = [el.attrib["idref"] for el in opf.iter() if _local(el.tag) == "itemref"] for idref in spine: href = manifest.get(idref) if href is None: continue xhtml = zf.read(opf_dir + href).decode("utf-8", errors="replace") soup = BeautifulSoup(xhtml, "html.parser") for tag in soup(["script", "style"]): tag.decompose() title = None if soup.title and soup.title.string: title = soup.title.string.strip() if not title: for hn in ["h1", "h2", "h3"]: tag = soup.find(hn) if tag: title = tag.get_text(strip=True) break if soup.title: soup.title.decompose() for hn in ["h1", "h2", "h3"]: for tag in soup.find_all(hn): tag.decompose() for tag in soup.find_all(_BLOCK_TAGS): tag.append(soup.new_string("\n\n")) text = soup.get_text(separator="") text = re.sub(r"[^\S\n]+", " ", text) text = re.sub(r" *\n *", "\n", text) text = re.sub(r"\n{3,}", "\n\n", text) chapters.append({"title": title, "text": text.strip()}) return chapters class MisoTTSEpubLoader: """Load an EPUB and emit a chapter range as text, ready for the MisoTTS Generate node.""" @classmethod def INPUT_TYPES(cls): return { "required": { "epub_path": ("STRING", {"default": "", "tooltip": "Absolute path to the .epub file."}), "chapter_start": ("INT", {"default": 1, "min": 1, "max": 9999, "step": 1, "tooltip": "First chapter (1-indexed). Clamped to valid range."}), "chapter_end": ("INT", {"default": 1, "min": 1, "max": 9999, "step": 1, "tooltip": "Last chapter (1-indexed, inclusive). Clamped automatically."}), }, } RETURN_TYPES = ("STRING", "STRING", "STRING") RETURN_NAMES = ("text", "chapter_title", "chapter_list") FUNCTION = "load_epub" CATEGORY = "MisoTTS" def load_epub(self, epub_path, chapter_start, chapter_end): chapters = _extract_chapters(epub_path) n = len(chapters) if n == 0: return ("", "", "") start = max(1, min(chapter_start, n)) end = max(start, min(chapter_end, n)) chapter_list = "\n".join( f"{i}. {ch['title'] if ch['title'] else f'Chapter {i}'}" for i, ch in enumerate(chapters, 1) ) first = chapters[start - 1] chapter_title = first["title"] if first["title"] else f"Chapter {start}" text = "\n\n---\n\n".join(ch["text"] for ch in chapters[start - 1: end]) return (text, chapter_title, chapter_list)