diff --git a/tests/test_generate_popular_node_signatures.py b/tests/test_generate_popular_node_signatures.py index a5461ce..4c3ef4c 100644 --- a/tests/test_generate_popular_node_signatures.py +++ b/tests/test_generate_popular_node_signatures.py @@ -1,4 +1,5 @@ import json +import subprocess import tempfile import textwrap import unittest @@ -7,8 +8,15 @@ from pathlib import Path from unittest import mock from tools.generate_popular_node_signatures import ( + build_artifact, + clone_or_update_repo, extract_repo_signatures, + fetch_json, + main, normalise_input_spec, + normalise_manager_entries, + rank_packs, + repo_cache_path, write_artifact, ) @@ -5499,5 +5507,267 @@ NODE_CLASS_MAPPINGS = { self.assertEqual("2026-07-02T00:00:00Z", parsed["generated_at"]) +class ManagerIngestionTests(unittest.TestCase): + def test_fetch_json_reads_and_decodes_json_with_clear_url_errors(self): + response = mock.Mock() + response.read.return_value = b'{"custom_nodes": []}' + response.__enter__ = mock.Mock(return_value=response) + response.__exit__ = mock.Mock(return_value=False) + + with mock.patch("tools.generate_popular_node_signatures.urllib.request.urlopen", return_value=response): + self.assertEqual({"custom_nodes": []}, fetch_json("https://example.invalid/list.json")) + + with mock.patch( + "tools.generate_popular_node_signatures.urllib.request.urlopen", + side_effect=OSError("network down"), + ): + with self.assertRaisesRegex(RuntimeError, "https://example.invalid/list.json"): + fetch_json("https://example.invalid/list.json") + + def test_normalise_manager_entries_accepts_git_clone_repos_and_skips_raw_file_installs(self): + manager_data = { + "custom_nodes": [ + { + "author": "Alice", + "id": "alpha-id", + "title": "Alpha Nodes", + "files": ["https://github.com/example/alpha-nodes"], + "install_type": "git-clone", + "description": "Alpha description", + "downloads": "42", + }, + { + "author": "Raw", + "id": "raw-id", + "title": "Raw File Node", + "files": ["https://raw.githubusercontent.com/example/raw-node.py"], + "install_type": "copy", + }, + { + "author": "Bob", + "id": "reference-id", + "title": "Reference Nodes", + "reference": "https://github.com/example/reference-nodes.git", + "install_type": "git-clone", + "stars": 7, + }, + ] + } + + entries = normalise_manager_entries(manager_data) + + self.assertEqual(["alpha-id", "reference-id"], [entry["id"] for entry in entries]) + self.assertEqual("https://github.com/example/alpha-nodes", entries[0]["repository"]) + self.assertEqual("Alpha Nodes", entries[0]["title"]) + self.assertEqual("Alice", entries[0]["author"]) + self.assertEqual(42, entries[0]["metrics"]["downloads"]) + self.assertEqual("https://github.com/example/reference-nodes.git", entries[1]["repository"]) + + def test_rank_packs_uses_popularity_metrics_then_stable_fallbacks(self): + packs = [ + { + "id": "tie-b", + "title": "Tie B", + "repository": "https://github.com/example/tie-b", + "metrics": {"downloads": 5}, + }, + { + "id": "most", + "title": "Most", + "repository": "https://github.com/example/most", + "metrics": {"stars": 10}, + }, + { + "id": "tie-a", + "title": "Tie A", + "repository": "https://github.com/example/tie-a", + "metrics": {"favorites": 5}, + }, + { + "id": "none", + "title": "None", + "repository": "https://github.com/example/none", + "metrics": {}, + }, + ] + + ranked = rank_packs(packs) + + self.assertEqual(["most", "tie-a", "tie-b", "none"], [pack["id"] for pack in ranked]) + self.assertEqual([1, 2, 3, 4], [pack["rank"] for pack in ranked]) + + +class RepoCacheTests(unittest.TestCase): + def test_repo_cache_path_is_safe_stable_and_collision_resistant(self): + with tempfile.TemporaryDirectory() as tmp: + cache_dir = Path(tmp) + + first = repo_cache_path("https://github.com/Owner/Repo.git", cache_dir) + same = repo_cache_path("https://github.com/Owner/Repo.git", cache_dir) + collision = repo_cache_path("https://github.com/Other/Repo.git", cache_dir) + + self.assertEqual(first, same) + self.assertNotEqual(first, collision) + self.assertEqual("repos", first.parent.name) + self.assertNotIn("..", first.name) + self.assertRegex(first.name, r"^github-com-owner-repo-[0-9a-f]{12}$") + + def test_clone_or_update_repo_clones_missing_repo_and_pulls_existing_repo(self): + with tempfile.TemporaryDirectory() as tmp: + cache_dir = Path(tmp) + url = "https://github.com/example/pack.git" + expected_path = repo_cache_path(url, cache_dir) + + with mock.patch("tools.generate_popular_node_signatures.subprocess.run") as run: + self.assertEqual(expected_path, clone_or_update_repo(url, cache_dir)) + + run.assert_called_once_with( + ["git", "clone", "--depth", "1", url, str(expected_path)], + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True, + ) + + expected_path.mkdir(parents=True) + with mock.patch("tools.generate_popular_node_signatures.subprocess.run") as run: + self.assertEqual(expected_path, clone_or_update_repo(url, cache_dir)) + + run.assert_not_called() + + with mock.patch("tools.generate_popular_node_signatures.subprocess.run") as run: + self.assertEqual(expected_path, clone_or_update_repo(url, cache_dir, refresh=True)) + + run.assert_called_once_with( + ["git", "-C", str(expected_path), "pull", "--ff-only"], + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True, + ) + + +class BuildArtifactTests(unittest.TestCase): + def _write_fixture_repo(self, path): + Path(path, "__init__.py").write_text( + textwrap.dedent( + ''' + class GoodNode: + RETURN_TYPES = ("IMAGE",) + + @classmethod + def INPUT_TYPES(cls): + return { + "required": { + "image": ("IMAGE",), + }, + } + + + NODE_CLASS_MAPPINGS = { + "GoodNode": GoodNode, + } + ''' + ), + encoding="utf-8", + ) + + def test_build_artifact_continues_after_failed_repo_and_records_pack_error(self): + manager_data = { + "custom_nodes": [ + { + "id": "broken-pack", + "title": "Broken Pack", + "files": ["https://github.com/example/broken-pack"], + "install_type": "git-clone", + "downloads": 20, + }, + { + "id": "good-pack", + "title": "Good Pack", + "files": ["https://github.com/example/good-pack"], + "install_type": "git-clone", + "downloads": 10, + }, + ] + } + with tempfile.TemporaryDirectory() as tmp: + tmp_path = Path(tmp) + repo_dir = tmp_path / "good-repo" + repo_dir.mkdir() + self._write_fixture_repo(repo_dir) + output = tmp_path / "popular_node_signatures.json" + + with ( + mock.patch("tools.generate_popular_node_signatures.fetch_json", return_value=manager_data), + mock.patch( + "tools.generate_popular_node_signatures.clone_or_update_repo", + side_effect=[RuntimeError("clone failed"), repo_dir], + ), + ): + summary = build_artifact( + manager_url="https://example.invalid/manager.json", + cache_dir=tmp_path / "cache", + output=output, + limit=2, + generated_at="2026-07-02T00:00:00Z", + ) + + payload = json.loads(output.read_text(encoding="utf-8")) + + self.assertEqual(2, summary["processed"]) + self.assertEqual(1, summary["errors"]) + self.assertEqual(1, summary["node_count"]) + self.assertEqual("error", payload["packs"]["broken-pack"]["status"]) + self.assertIn("clone failed", payload["packs"]["broken-pack"]["error"]) + self.assertEqual("ok", payload["packs"]["good-pack"]["status"]) + self.assertIn("GoodNode", payload["nodes"]) + + def test_cli_invokes_build_artifact_and_prints_summary(self): + with tempfile.TemporaryDirectory() as tmp: + output = Path(tmp, "artifact.json") + cache = Path(tmp, "cache") + fake_summary = { + "processed": 3, + "pack_count": 2, + "node_count": 7, + "errors": 1, + "output": output, + } + with ( + mock.patch("tools.generate_popular_node_signatures.build_artifact", return_value=fake_summary) as build, + mock.patch("builtins.print") as print_mock, + ): + exit_code = main( + [ + "--manager-url", + "https://example.invalid/manager.json", + "--cache-dir", + str(cache), + "--output", + str(output), + "--limit", + "3", + "--refresh", + ] + ) + + self.assertEqual(0, exit_code) + build.assert_called_once_with( + manager_url="https://example.invalid/manager.json", + cache_dir=cache, + output=output, + limit=3, + refresh=True, + generated_at=mock.ANY, + ) + printed = print_mock.call_args.args[0] + self.assertIn("processed=3", printed) + self.assertIn("packs=2", printed) + self.assertIn("nodes=7", printed) + self.assertIn("errors=1", printed) + self.assertIn(str(output), printed) + + if __name__ == "__main__": unittest.main() diff --git a/tools/generate_popular_node_signatures.py b/tools/generate_popular_node_signatures.py index b713798..08da44e 100644 --- a/tools/generate_popular_node_signatures.py +++ b/tools/generate_popular_node_signatures.py @@ -2,15 +2,24 @@ """Generate UTFCN's popular_node_signatures.json artifact.""" import ast +import argparse +import hashlib import json import os +import re +import subprocess +import urllib.request from datetime import datetime, timezone from pathlib import Path +from urllib.parse import urlparse SCHEMA_VERSION = 1 MANAGER_LIST_URL = "https://raw.githubusercontent.com/ltdrdata/ComfyUI-Manager/main/custom-node-list.json" REGISTRY_NODES_URL = "https://api.comfy.org/nodes" DEFAULT_GENERATED_AT = "1970-01-01T00:00:00Z" +DEFAULT_CACHE_DIR = Path(".cache/utfcn-popular-node-repos") +DEFAULT_OUTPUT = Path("popular_node_signatures.json") +USER_AGENT = "ComfyUI-UTFCN popular node signature generator" class UnsupportedStaticExpression(Exception): @@ -41,6 +50,293 @@ _CLASS_SIGNATURE_ATTRS = {"INPUT_TYPES", "RETURN_NAMES", "RETURN_TYPES"} _DYNAMIC_NAMESPACE_MUTATION = object() _NAMESPACE_FUNCTIONS = {"globals", "locals", "vars"} _NAMESPACE_DUNDER_MUTATORS = {"__delitem__", "__setitem__"} +_METRIC_FIELDS = ( + "downloads", + "download_count", + "stars", + "github_stars", + "stargazers_count", + "favorites", + "favourites", + "installed", + "installs", + "install_count", + "count", +) + + +def fetch_json(url): + request = urllib.request.Request(url, headers={"User-Agent": USER_AGENT}) + try: + with urllib.request.urlopen(request, timeout=30) as response: + return json.loads(response.read().decode("utf-8")) + except Exception as exc: + raise RuntimeError(f"failed to fetch JSON from {url}: {exc}") from exc + + +def _manager_entries(raw): + if isinstance(raw, list): + return raw + if not isinstance(raw, dict): + return [] + for key in ("custom_nodes", "customNodes", "nodes", "items"): + value = raw.get(key) + if isinstance(value, list): + return value + return [] + + +def _coerce_int(value): + if isinstance(value, bool): + return 0 + if isinstance(value, int): + return value + if isinstance(value, float): + return int(value) + if isinstance(value, str): + text = value.strip().replace(",", "") + if text.isdigit() or (text.startswith("-") and text[1:].isdigit()): + return int(text) + return 0 + + +def _slug(value, default="unnamed-pack"): + text = str(value or "").strip().lower() + text = re.sub(r"[^a-z0-9]+", "-", text).strip("-") + return text or default + + +def github_repo_url(value): + if not isinstance(value, str): + return None + text = value.strip() + if not text: + return None + parsed = urlparse(text) + if parsed.scheme not in {"http", "https"} or parsed.netloc.lower() != "github.com": + return None + parts = [part for part in parsed.path.split("/") if part] + if len(parts) < 2: + return None + owner, repo = parts[0], parts[1] + return f"https://github.com/{owner}/{repo}" + + +def _normalise_repository_url(value): + if not isinstance(value, str): + return None + text = value.strip() + if not text: + return None + if re.match(r"^[A-Za-z0-9_.-]+@[A-Za-z0-9_.-]+:.+/.+(\.git)?$", text): + return text + parsed = urlparse(text) + if parsed.netloc.lower() == "github.com": + return github_repo_url(text) + if parsed.netloc.lower() == "raw.githubusercontent.com": + return None + if parsed.scheme not in {"http", "https", "git", "ssh"}: + return None + host = parsed.netloc.lower() + if not host: + return None + path_parts = [part for part in parsed.path.split("/") if part] + if len(path_parts) < 2: + return None + last = path_parts[-1].lower() + if not last.endswith(".git") and "." in last: + return None + return text + + +def _is_cloneable_repo_url(value): + return _normalise_repository_url(value) is not None + + +def _repository_candidates(item): + for key in ("repository", "repo", "git", "git_url", "url", "reference"): + value = item.get(key) + if isinstance(value, str): + yield value + elif isinstance(value, list): + for candidate in value: + yield candidate + files = item.get("files") + if isinstance(files, str): + yield files + elif isinstance(files, list): + for candidate in files: + yield candidate + + +def _manager_entry_repository(item): + install_type = str(item.get("install_type") or item.get("installType") or "").lower() + candidates = list(_repository_candidates(item)) + if "git" in install_type: + for candidate in candidates: + repository = _normalise_repository_url(candidate) + if repository: + return repository + return None + for candidate in candidates: + repository = _normalise_repository_url(candidate) + if repository: + return repository + return None + + +def _entry_metrics(item): + metrics = {} + sources = [item] + for key in ("stats", "statistics", "metadata"): + value = item.get(key) + if isinstance(value, dict): + sources.append(value) + for source in sources: + for field in _METRIC_FIELDS: + value = _coerce_int(source.get(field)) + if value: + metrics[field] = value + return metrics + + +def _pack_id_from_repository(repository): + parsed = urlparse(repository) + if parsed.netloc: + parts = [part for part in parsed.path.split("/") if part] + if parts: + return _slug(parts[-1].removesuffix(".git")) + return _slug(parsed.netloc) + if ":" in repository: + return _slug(repository.rsplit("/", 1)[-1].removesuffix(".git")) + return _slug(repository) + + +def normalise_manager_entries(raw): + entries = [] + for manager_order, item in enumerate(_manager_entries(raw)): + if not isinstance(item, dict): + continue + repository = _manager_entry_repository(item) + if repository is None: + continue + pack_id = str(item.get("id") or "").strip() + if not pack_id: + pack_id = _slug(item.get("title") or _pack_id_from_repository(repository)) + title = str(item.get("title") or pack_id).strip() or pack_id + entry = { + "id": pack_id, + "title": title, + "author": str(item.get("author") or "").strip(), + "repository": repository, + "manager_order": manager_order, + "metrics": _entry_metrics(item), + } + description = str(item.get("description") or "").strip() + if description: + entry["description"] = description + entries.append(entry) + return entries + + +def _popularity_score(pack): + return sum(_coerce_int(value) for value in pack.get("metrics", {}).values()) + + +def rank_packs(packs, limit=None): + best_by_repository = {} + for pack in packs: + repository = pack.get("repository") + if not repository: + continue + candidate = dict(pack) + previous = best_by_repository.get(repository) + if previous is None: + best_by_repository[repository] = candidate + continue + candidate_key = ( + _popularity_score(candidate), + -int(candidate.get("manager_order", 0)), + str(candidate.get("id", "")), + ) + previous_key = ( + _popularity_score(previous), + -int(previous.get("manager_order", 0)), + str(previous.get("id", "")), + ) + if candidate_key > previous_key: + best_by_repository[repository] = candidate + + ranked = sorted( + best_by_repository.values(), + key=lambda pack: ( + -_popularity_score(pack), + str(pack.get("title", "")).lower(), + str(pack.get("id", "")), + str(pack.get("repository", "")), + ), + ) + if limit is not None: + ranked = ranked[:limit] + result = [] + for index, pack in enumerate(ranked, start=1): + ranked_pack = dict(pack) + ranked_pack["rank"] = index + result.append(ranked_pack) + return result + + +def rank_entries(entries, limit=None): + return rank_packs(entries, limit) + + +def _repo_cache_slug(url): + text = str(url).strip() + parsed = urlparse(text) + if parsed.netloc: + parts = [parsed.netloc, *[part for part in parsed.path.split("/") if part]] + elif ":" in text: + host, path = text.split(":", 1) + host = host.split("@")[-1] + parts = [host, *[part for part in path.split("/") if part]] + else: + parts = [text] + if parts and parts[-1].endswith(".git"): + parts[-1] = parts[-1][:-4] + slug = "-".join(parts).lower() + slug = re.sub(r"[^a-z0-9]+", "-", slug).strip("-") + return slug[:80].strip("-") or "repo" + + +def repo_cache_path(url, cache_dir): + digest = hashlib.sha256(str(url).encode("utf-8")).hexdigest()[:12] + return Path(cache_dir) / "repos" / f"{_repo_cache_slug(url)}-{digest}" + + +def _run_git(command): + try: + subprocess.run( + command, + check=True, + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + text=True, + ) + except subprocess.CalledProcessError as exc: + stderr = (exc.stderr or "").strip() + detail = f": {stderr}" if stderr else "" + raise RuntimeError(f"git command failed ({' '.join(command)}){detail}") from exc + + +def clone_or_update_repo(url, cache_dir, *, refresh=False): + target = repo_cache_path(url, cache_dir) + target.parent.mkdir(parents=True, exist_ok=True) + if target.exists(): + if refresh: + _run_git(["git", "-C", str(target), "pull", "--ff-only"]) + return target + _run_git(["git", "clone", "--depth", "1", url, str(target)]) + return target def _literal(node, env, allow_mutable_env=True): @@ -2630,3 +2926,120 @@ def write_artifact(path, sources, packs, nodes, *, generated_at=DEFAULT_GENERATE } path.parent.mkdir(parents=True, exist_ok=True) path.write_text(json.dumps(payload, indent=2, sort_keys=False) + "\n", encoding="utf-8") + + +def _pack_record_from_meta(pack, status, *, node_count=0, error=None): + record = { + "id": pack["id"], + "title": pack.get("title", pack["id"]), + "repository": pack.get("repository", ""), + "rank": pack.get("rank", 0), + "status": status, + "node_count": node_count, + } + if pack.get("author"): + record["author"] = pack["author"] + if pack.get("description"): + record["description"] = pack["description"] + if pack.get("metrics"): + record["metrics"] = dict(pack["metrics"]) + if error is not None: + record["error"] = str(error) + return record + + +def _merge_pack_metadata(extracted_pack, pack): + merged = dict(extracted_pack) + if pack.get("author"): + merged["author"] = pack["author"] + if pack.get("description"): + merged["description"] = pack["description"] + if pack.get("metrics"): + merged["metrics"] = dict(pack["metrics"]) + return merged + + +def build_artifact( + *, + manager_url=MANAGER_LIST_URL, + cache_dir=DEFAULT_CACHE_DIR, + output=DEFAULT_OUTPUT, + limit=1000, + refresh=False, + generated_at=None, +): + manager_raw = fetch_json(manager_url) + normalised = normalise_manager_entries(manager_raw) + ranked = rank_packs(normalised, limit) + packs = {} + nodes = {} + errors = 0 + + for pack in ranked: + try: + repo_dir = clone_or_update_repo(pack["repository"], cache_dir, refresh=refresh) + extracted = extract_repo_signatures(repo_dir, pack) + except Exception as exc: + errors += 1 + packs[pack["id"]] = _pack_record_from_meta(pack, "error", error=exc) + continue + + packs[pack["id"]] = _merge_pack_metadata(extracted["pack"], pack) + for node_type, node in sorted(extracted["nodes"].items()): + nodes.setdefault(node_type, node) + + generated_at = generated_at if generated_at is not None else datetime.now(timezone.utc) + write_artifact( + Path(output), + sources={ + "manager_url": manager_url, + "limit": limit, + "normalised_packs": len(normalised), + "processed_packs": len(ranked), + }, + packs=packs, + nodes=nodes, + generated_at=generated_at, + ) + return { + "processed": len(ranked), + "pack_count": len(packs), + "node_count": len(nodes), + "errors": errors, + "output": Path(output), + } + + +def main(argv=None): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--manager-url", default=MANAGER_LIST_URL) + parser.add_argument("--cache-dir", type=Path, default=DEFAULT_CACHE_DIR) + parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT) + parser.add_argument("--limit", type=int, default=1000) + parser.add_argument("--refresh", action="store_true") + parser.add_argument("--quiet", action="store_true") + args = parser.parse_args(argv) + + summary = build_artifact( + manager_url=args.manager_url, + cache_dir=args.cache_dir, + output=args.output, + limit=args.limit, + refresh=args.refresh, + generated_at=datetime.now(timezone.utc), + ) + if not args.quiet: + print( + "wrote {output} processed={processed} packs={packs} nodes={nodes} errors={errors}".format( + output=summary["output"], + processed=summary["processed"], + packs=summary["pack_count"], + nodes=summary["node_count"], + errors=summary["errors"], + ) + ) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())