Update qwen_plus_node.py

2026-01-10 16:49:57 +01:00
parent b2c4f43fd1
commit 758ee869b6
1 changed files with 50 additions and 38 deletions
@@ -1,58 +1,70 @@
+import os
 import torch
-import numpy as np
-from PIL import Image
-import torch.nn.functional as F
+import folder_paths
+from comfy.model_patcher import ModelPatcher

-class Mamad8_QwenEditPlus_Standalone:
+# --- Loader Logic ---
+class QwenImageEditLoaderStandalone:
+    @classmethod
+    def INPUT_TYPES(s):
+        return {
+            "required": {
+                "model_name": (folder_paths.get_filename_list("checkpoints"),),
+            }
+        }
+
+    RETURN_TYPES = ("MODEL", "CLIP", "VAE")
+    FUNCTION = "load_qwen"
+    CATEGORY = "QwenEdit/Standalone"
+
+    def load_qwen(self, model_name):
+        model_path = folder_paths.get_full_path("checkpoints", model_name)
+        # This part relies on ComfyUI's internal state to map the Qwen architecture
+        # In a real-world scenario, you'd ensure the 'diffusers' or 'transformers' 
+        # library is used here to map the Mamad8 specific keys.
+        out = comfylib_load_checkpoint(model_path) # Placeholder for internal loader
+        return out
+
+# --- The Requested Encoder Node ---
+class TextEncodeQwenImageEditPlusStandalone:
    @classmethod
    def INPUT_TYPES(s):
        return {
            "required": {
                "clip": ("CLIP",),
-                "image1": ("IMAGE",),
-                "text": ("STRING", {"multiline": True, "default": "Describe the change..."}),
+                "prompt": ("STRING", {"multiline": True}),
+                "latent_target_pixel": ("INT", {"default": 1024, "min": 64, "max": 4096, "step": 64}),
            },
            "optional": {
+                "vae": ("VAE",),
+                "image1": ("IMAGE",),
                "image2": ("IMAGE",),
                "image3": ("IMAGE",),
-                "negative_prompt": ("STRING", {"multiline": True, "default": "low quality, blurry"}),
-                "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
            }
        }

-    RETURN_TYPES = ("CONDITIONING", "CONDITIONING")
-    RETURN_NAMES = ("conditioning", "negative_conditioning")
+    RETURN_TYPES = ("CONDITIONING",)
    FUNCTION = "encode"
-    CATEGORY = "Qwen/Edit_Standalone"
+    CATEGORY = "QwenEdit/Standalone"

-    def common_preprocessing(self, image):
-        # Conversion du tensor ComfyUI (BHWC) en format PIL pour Qwen
-        if len(image.shape) == 4:
-            image = image[0]
-        img = 255. * image.cpu().numpy()
-        img = Image.fromarray(np.clip(img, 0, 255).astype(np.uint8))
-        return img
-
-    def encode(self, clip, image1, text, image2=None, image3=None, negative_prompt="", strength=1.0):
-        # 1. Préparation des images pour le conditionnement visuel
-        images_input = [self.common_preprocessing(image1)]
-        
-        if image2 is not None:
-            images_input.append(self.common_preprocessing(image2))
-        if image3 is not None:
-            images_input.append(self.common_preprocessing(image3))
-
-        # 2. Encodage du texte positif avec les images injectées
-        # Note: Cette méthode utilise l'implémentation spécifique de Qwen2-VL CLIP
-        tokens = clip.tokenize(text)
+    def encode(self, clip, prompt, latent_target_pixel, vae=None, image1=None, image2=None, image3=None):
+        # We append the latent_target_pixel to the conditioning dictionary 
+        # so the sampler knows the target resolution Qwen was trained for.
+        tokens = clip.tokenize(prompt)
        cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True)
        
-        # Le dictionnaire 'images' est essentiel pour que le modèle Qwen sache quoi modifier
-        conditioning = [[cond, {"pooled_output": pooled, "images": images_input, "strength": strength}]]
+        return ([[cond, {
+            "pooled_output": pooled, 
+            "latent_target_pixel": latent_target_pixel,
+            "visual_context": [image1, image2, image3] # Passing images into conditioning
+        }]], )

-        # 3. Encodage du texte négatif
-        n_tokens = clip.tokenize(negative_prompt)
-        n_cond, n_pooled = clip.encode_from_tokens(n_tokens, return_pooled=True)
-        negative_conditioning = [[n_cond, {"pooled_output": n_pooled}]]
+NODE_CLASS_MAPPINGS = {
+    "QwenImageEditLoaderStandalone": QwenImageEditLoaderStandalone,
+    "TextEncodeQwenImageEditPlusStandalone": TextEncodeQwenImageEditPlusStandalone
+}

-        return (conditioning, negative_conditioning)
+NODE_DISPLAY_NAME_MAPPINGS = {
+    "QwenImageEditLoaderStandalone": "Qwen Image Edit Loader (Standalone)",
+    "TextEncodeQwenImageEditPlusStandalone": "Text Encode Qwen Image Edit Plus (Mamad8 Standalone)"
+}