From 04f6271867c3ce9fe494bf12607b559157bdbd7f Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Fri, 5 Jun 2026 11:51:45 +0200 Subject: [PATCH] fix: build object_info off the event loop so a slow mount can't hang ComfyUI Root cause of 'refresh hangs ComfyUI': object_info is built by walking the model folders synchronously on the single aiohttp event loop. When the model tree is on a slow/stalling network mount (CIFS), that walk blocks in 'wait_for_response' and freezes the entire UI until the NAS answers. Fix: - Run the object_info build in a worker thread (folder-walk syscalls release the GIL, so the loop stays responsive). Uses ComfyUI's real node_info, resolved from the /object_info route closure, with a safe fallback to the in-loop build. - Offload the Quick scan and Register work to a thread too (POST no longer freezes). - Guard the incremental scanner against symlink cycles (visited realpaths). Unit-tested: threaded build bypasses the in-loop handler; node_info resolves; cycle guard terminates. Co-Authored-By: Claude Opus 4.8 --- __init__.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 89 insertions(+), 4 deletions(-) diff --git a/__init__.py b/__init__.py index d7a9caf..f131a4a 100644 --- a/__init__.py +++ b/__init__.py @@ -6,8 +6,9 @@ model/LoRA collection (especially on a network mount). It injects an aiohttp middleware that caches the huge /api/object_info response in memory and on disk (survives restarts) and serves it gzipped, so the slow -build (which freezes ComfyUI's event loop) runs only on the first load or an -explicit refresh — not on every page load. +build runs only on the first load or an explicit refresh — not on every page +load. That build (and the refresh folder-walk) runs in a worker thread, so a +slow/stalling network model mount no longer freezes ComfyUI's event loop. Three refresh modes are exposed (menu buttons, a graph node, and HTTP): * full - clear ComfyUI's folder cache -> full re-walk of every model @@ -26,6 +27,7 @@ import os import gzip import json import time +import asyncio import hashlib import logging import threading @@ -173,9 +175,17 @@ def _scan_root_incremental(root, old): """Walk a root, scandir-ing only dirs whose mtime changed; reuse the rest.""" new = {} scanned = reused = 0 + visited = set() # real paths, to defend against symlink cycles on network mounts stack = [root] while stack: d = stack.pop() + try: + rp = os.path.realpath(d) + except OSError: + continue + if rp in visited: + continue + visited.add(rp) try: m = os.path.getmtime(d) except OSError: @@ -344,6 +354,72 @@ def _check_node_signature(): log.warning("Tenaciousload: node signature check failed: %s", e) +# --------------------------------------------------------------------------- # +# Off-loop object_info builder +# --------------------------------------------------------------------------- # +# Building object_info walks the model folders synchronously. On a slow/stalling +# network mount that walk blocks ComfyUI's single event loop = the whole UI +# hangs. We instead run the build in a worker thread: the folder-walk syscalls +# release the GIL while they wait on the NAS, so the event loop stays responsive. +_node_info_fn = None +_node_info_resolved = False + + +def _resolve_node_info_fn(): + """Pull ComfyUI's own `node_info` closure off the /object_info route, so the + threaded build is byte-for-byte the same logic (no drift). Routes are added + after custom nodes load, so this is done lazily on first use.""" + global _node_info_fn, _node_info_resolved + _node_info_resolved = True + try: + for route in PromptServer.instance.app.router.routes(): + if route.method != "GET": + continue + path = getattr(route.resource, "canonical", None) + if path not in ("/object_info", "/api/object_info"): + continue + fn = getattr(route.handler, "__wrapped__", route.handler) + code = getattr(fn, "__code__", None) + if code and fn.__closure__: + for name, cell in zip(code.co_freevars, fn.__closure__): + if name == "node_info" and callable(cell.cell_contents): + _node_info_fn = cell.cell_contents + log.info("Tenaciousload: threaded object_info build enabled") + return + except Exception as e: # pragma: no cover + log.warning("Tenaciousload: could not resolve node_info (%s); builds stay on the loop", e) + + +def _build_object_info_bytes(): + """Replicate ComfyUI's object_info build. Runs in a worker thread.""" + import nodes + out = {} + with folder_paths.cache_helper: + for x in list(nodes.NODE_CLASS_MAPPINGS.keys()): + try: + out[x] = _node_info_fn(x) + except Exception: # pragma: no cover + log.error("Tenaciousload: node_info failed for '%s'", x, exc_info=True) + return json.dumps(out).encode("utf-8") + + +async def _build_object_info_off_loop(): + """Build object_info in a thread; return raw bytes, or None to fall back.""" + if _node_info_fn is None and not _node_info_resolved: + _resolve_node_info_fn() + if _node_info_fn is None: + return None + try: + loop = asyncio.get_event_loop() + raw = await loop.run_in_executor(None, _build_object_info_bytes) + if isinstance(raw, (bytes, bytearray)) and len(raw) > 1000: # sanity: real one is huge + return bytes(raw) + log.warning("Tenaciousload: threaded build looked wrong (%d bytes); falling back", len(raw or b"")) + except Exception as e: # pragma: no cover + log.warning("Tenaciousload: threaded build failed (%s); falling back", e) + return None + + # --------------------------------------------------------------------------- # # object_info caching middleware # --------------------------------------------------------------------------- # @@ -377,6 +453,13 @@ async def _object_info_cache_mw(request, handler): if "nocache" not in request.query and _mem["raw"] is not None: return _serve_cached(request) + # MISS / refresh: build in a worker thread so a slow folder-walk does not + # freeze the event loop. Falls back to the normal in-loop handler. + raw = await _build_object_info_off_loop() + if raw is not None: + _store(raw) + return _serve_cached(request) + resp = await handler(request) try: body = getattr(resp, "body", None) @@ -409,9 +492,11 @@ async def _refresh(request): except Exception: data = {} mode = (data.get("mode") or "full").lower() + loop = asyncio.get_event_loop() if mode == "quick": - summary = quick_rescan_all() + # run the folder walk off the loop so the UI stays responsive + summary = await loop.run_in_executor(None, quick_rescan_all) invalidate_object_info_cache() rescanned = sum(s["scanned"] for s in summary) log.info("Tenaciousload: quick refresh — %d folders touched, %d dirs rescanned", len(summary), rescanned) @@ -420,7 +505,7 @@ async def _refresh(request): if mode == "register": folder = data.get("folder") or "loras" files = data.get("files") or [] - result = register_files(folder, files) + result = await loop.run_in_executor(None, register_files, folder, files) invalidate_object_info_cache() log.info("Tenaciousload: register — %s", result) return web.json_response({"status": "ok", "mode": "register", **result})