From 85552d8b25bbbc89f431f694e0b3efbc87b8f0a4 Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sun, 21 Jun 2026 09:27:10 +0200 Subject: [PATCH] fix: single-flight object_info build (prevents concurrent-build hang) The off-loop (threaded) build introduced a concurrency bug: ComfyUI's cache_helper is a global, so a manual refresh (R) fired while a rebuild was still running started a SECOND build; when the first finished it cleared the shared cache_helper, making the second re-walk the CIFS mount per-node = hang. Now an asyncio lock serialises builds: concurrent object_info requests wait for the in-flight build and serve its result instead of starting another. Verified: 3 concurrent requests -> exactly one build. Docs: note that Quick refresh detects changes by directory mtime, which network mounts (cache=loose CIFS) can report stale/coarse, so it may miss a brand-new file -- use Full refresh for just-added models. Co-Authored-By: Claude Opus 4.8 --- README.md | 8 +++++++- __init__.py | 40 ++++++++++++++++++++++++++-------------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 4611191..48d9c84 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ from the **`Extensions`** menu (and the command palette): | Mode | What it does | Speed | |------|--------------|-------| | ⚡ **Quick refresh** | Re-walks only the folders whose timestamp **changed** since the last scan; reuses the cache for the rest. Catches new / removed / renamed files. | Fast on local disks; **~2× faster** on a slow network mount (it still has to stat every folder to find which changed). | -| 🔄 **Full refresh** | Clears ComfyUI's folder cache and re-walks **everything**. Catches moves/deletes anywhere. | Slowest (the original behaviour). | +| 🔄 **Full refresh** | Clears ComfyUI's folder cache and re-walks **everything**, ignoring timestamps. Catches moves/deletes anywhere. **Use this for files you *just* added.** | Slowest (the original behaviour). | | ➕ **Register new file…** | You give it the path(s) of the file(s) you just added; it appends them to the cache with **no folder walk**. | Instant disk-wise — only the `object_info` rebuild remains. | Also available: @@ -55,6 +55,12 @@ Also available: > so it's as slow as a Full refresh that one time; every Quick refresh after that > is incremental. The index is saved to `./cache/scan_snapshot.json`. +> **Network mounts (CIFS/SMB/NFS):** Quick refresh detects changes by directory +> timestamp, which network filesystems can report with a delay or coarse +> resolution (e.g. a `cache=loose` CIFS mount), so it may *occasionally miss a +> brand-new file*. If a just-added model doesn't show up after a Quick refresh, +> use **Full refresh** — it re-walks everything and doesn't rely on timestamps. + Whichever mode you pick, the button shows a "refreshing…" toast and normal loads stay instant. diff --git a/__init__.py b/__init__.py index f909226..1a880bb 100644 --- a/__init__.py +++ b/__init__.py @@ -415,6 +415,12 @@ _node_info_resolved = False # Live build progress, surfaced at /tenaciousload/status for the loading overlay. _build_state = {"building": False, "started": 0.0, "done": 0, "total": 0, "last_ms": 0, "last_bytes": 0} +# Single-flight: only ONE object_info build may run at a time. ComfyUI's +# cache_helper is a global, so two concurrent builds (e.g. a manual refresh +# fired during a rebuild) corrupt it and the second build re-walks the network +# mount per-node = a hang. Concurrent requests wait on this and serve the result. +_build_lock = asyncio.Lock() + def _resolve_node_info_fn(): """Pull ComfyUI's own `node_info` closure off the /object_info route, so the @@ -514,21 +520,27 @@ async def _object_info_cache_mw(request, handler): return _serve_cached(request) # MISS / refresh: build in a worker thread so a slow folder-walk does not - # freeze the event loop. Falls back to the normal in-loop handler. - raw = await _build_object_info_off_loop() - if raw is not None: - _store(raw) - return _serve_cached(request) - - resp = await handler(request) - try: - body = getattr(resp, "body", None) - if resp.status == 200 and isinstance(body, (bytes, bytearray)) and len(body) > 0: - _store(bytes(body)) + # freeze the event loop. Single-flight via _build_lock — a concurrent + # request (e.g. a manual refresh during a rebuild) waits here and then serves + # the fresh result instead of starting a second, conflicting build. + async with _build_lock: + # another request may have finished the build while we waited for the lock + if "nocache" not in request.query and _mem["raw"] is not None: return _serve_cached(request) - except Exception as e: # pragma: no cover - log.warning("Tenaciousload: caching skipped: %s", e) - return resp + raw = await _build_object_info_off_loop() + if raw is not None: + _store(raw) + return _serve_cached(request) + # off-loop build unavailable -> in-loop handler (still under the lock) + resp = await handler(request) + try: + body = getattr(resp, "body", None) + if resp.status == 200 and isinstance(body, (bytes, bytearray)) and len(body) > 0: + _store(bytes(body)) + return _serve_cached(request) + except Exception as e: # pragma: no cover + log.warning("Tenaciousload: caching skipped: %s", e) + return resp def _install_middleware():