From d07d6e25f538f07c027ac35312a50eb4065a2a4a Mon Sep 17 00:00:00 2001 From: Ethanfel Date: Sat, 10 Jan 2026 16:25:18 +0100 Subject: [PATCH] Add qwen_plus_node.py --- qwen_plus_node.py | 58 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 qwen_plus_node.py diff --git a/qwen_plus_node.py b/qwen_plus_node.py new file mode 100644 index 0000000..a99e7a9 --- /dev/null +++ b/qwen_plus_node.py @@ -0,0 +1,58 @@ +import torch +import numpy as np +from PIL import Image +import torch.nn.functional as F + +class Mamad8_QwenEditPlus_Standalone: + @classmethod + def INPUT_TYPES(s): + return { + "required": { + "clip": ("CLIP",), + "image1": ("IMAGE",), + "text": ("STRING", {"multiline": True, "default": "Describe the change..."}), + }, + "optional": { + "image2": ("IMAGE",), + "image3": ("IMAGE",), + "negative_prompt": ("STRING", {"multiline": True, "default": "low quality, blurry"}), + "strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}), + } + } + + RETURN_TYPES = ("CONDITIONING", "CONDITIONING") + RETURN_NAMES = ("conditioning", "negative_conditioning") + FUNCTION = "encode" + CATEGORY = "Qwen/Edit_Standalone" + + def common_preprocessing(self, image): + # Conversion du tensor ComfyUI (BHWC) en format PIL pour Qwen + if len(image.shape) == 4: + image = image[0] + img = 255. * image.cpu().numpy() + img = Image.fromarray(np.clip(img, 0, 255).astype(np.uint8)) + return img + + def encode(self, clip, image1, text, image2=None, image3=None, negative_prompt="", strength=1.0): + # 1. Préparation des images pour le conditionnement visuel + images_input = [self.common_preprocessing(image1)] + + if image2 is not None: + images_input.append(self.common_preprocessing(image2)) + if image3 is not None: + images_input.append(self.common_preprocessing(image3)) + + # 2. Encodage du texte positif avec les images injectées + # Note: Cette méthode utilise l'implémentation spécifique de Qwen2-VL CLIP + tokens = clip.tokenize(text) + cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True) + + # Le dictionnaire 'images' est essentiel pour que le modèle Qwen sache quoi modifier + conditioning = [[cond, {"pooled_output": pooled, "images": images_input, "strength": strength}]] + + # 3. Encodage du texte négatif + n_tokens = clip.tokenize(negative_prompt) + n_cond, n_pooled = clip.encode_from_tokens(n_tokens, return_pooled=True) + negative_conditioning = [[n_cond, {"pooled_output": n_pooled}]] + + return (conditioning, negative_conditioning) \ No newline at end of file