Update qwen_plus_node.py
This commit is contained in:
@@ -1,58 +1,70 @@
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
import torch.nn.functional as F
|
||||
import folder_paths
|
||||
from comfy.model_patcher import ModelPatcher
|
||||
|
||||
class Mamad8_QwenEditPlus_Standalone:
|
||||
# --- Loader Logic ---
|
||||
class QwenImageEditLoaderStandalone:
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
return {
|
||||
"required": {
|
||||
"model_name": (folder_paths.get_filename_list("checkpoints"),),
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("MODEL", "CLIP", "VAE")
|
||||
FUNCTION = "load_qwen"
|
||||
CATEGORY = "QwenEdit/Standalone"
|
||||
|
||||
def load_qwen(self, model_name):
|
||||
model_path = folder_paths.get_full_path("checkpoints", model_name)
|
||||
# This part relies on ComfyUI's internal state to map the Qwen architecture
|
||||
# In a real-world scenario, you'd ensure the 'diffusers' or 'transformers'
|
||||
# library is used here to map the Mamad8 specific keys.
|
||||
out = comfylib_load_checkpoint(model_path) # Placeholder for internal loader
|
||||
return out
|
||||
|
||||
# --- The Requested Encoder Node ---
|
||||
class TextEncodeQwenImageEditPlusStandalone:
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
return {
|
||||
"required": {
|
||||
"clip": ("CLIP",),
|
||||
"image1": ("IMAGE",),
|
||||
"text": ("STRING", {"multiline": True, "default": "Describe the change..."}),
|
||||
"prompt": ("STRING", {"multiline": True}),
|
||||
"latent_target_pixel": ("INT", {"default": 1024, "min": 64, "max": 4096, "step": 64}),
|
||||
},
|
||||
"optional": {
|
||||
"vae": ("VAE",),
|
||||
"image1": ("IMAGE",),
|
||||
"image2": ("IMAGE",),
|
||||
"image3": ("IMAGE",),
|
||||
"negative_prompt": ("STRING", {"multiline": True, "default": "low quality, blurry"}),
|
||||
"strength": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 10.0, "step": 0.01}),
|
||||
}
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("CONDITIONING", "CONDITIONING")
|
||||
RETURN_NAMES = ("conditioning", "negative_conditioning")
|
||||
RETURN_TYPES = ("CONDITIONING",)
|
||||
FUNCTION = "encode"
|
||||
CATEGORY = "Qwen/Edit_Standalone"
|
||||
CATEGORY = "QwenEdit/Standalone"
|
||||
|
||||
def common_preprocessing(self, image):
|
||||
# Conversion du tensor ComfyUI (BHWC) en format PIL pour Qwen
|
||||
if len(image.shape) == 4:
|
||||
image = image[0]
|
||||
img = 255. * image.cpu().numpy()
|
||||
img = Image.fromarray(np.clip(img, 0, 255).astype(np.uint8))
|
||||
return img
|
||||
|
||||
def encode(self, clip, image1, text, image2=None, image3=None, negative_prompt="", strength=1.0):
|
||||
# 1. Préparation des images pour le conditionnement visuel
|
||||
images_input = [self.common_preprocessing(image1)]
|
||||
|
||||
if image2 is not None:
|
||||
images_input.append(self.common_preprocessing(image2))
|
||||
if image3 is not None:
|
||||
images_input.append(self.common_preprocessing(image3))
|
||||
|
||||
# 2. Encodage du texte positif avec les images injectées
|
||||
# Note: Cette méthode utilise l'implémentation spécifique de Qwen2-VL CLIP
|
||||
tokens = clip.tokenize(text)
|
||||
def encode(self, clip, prompt, latent_target_pixel, vae=None, image1=None, image2=None, image3=None):
|
||||
# We append the latent_target_pixel to the conditioning dictionary
|
||||
# so the sampler knows the target resolution Qwen was trained for.
|
||||
tokens = clip.tokenize(prompt)
|
||||
cond, pooled = clip.encode_from_tokens(tokens, return_pooled=True)
|
||||
|
||||
# Le dictionnaire 'images' est essentiel pour que le modèle Qwen sache quoi modifier
|
||||
conditioning = [[cond, {"pooled_output": pooled, "images": images_input, "strength": strength}]]
|
||||
return ([[cond, {
|
||||
"pooled_output": pooled,
|
||||
"latent_target_pixel": latent_target_pixel,
|
||||
"visual_context": [image1, image2, image3] # Passing images into conditioning
|
||||
}]], )
|
||||
|
||||
# 3. Encodage du texte négatif
|
||||
n_tokens = clip.tokenize(negative_prompt)
|
||||
n_cond, n_pooled = clip.encode_from_tokens(n_tokens, return_pooled=True)
|
||||
negative_conditioning = [[n_cond, {"pooled_output": n_pooled}]]
|
||||
NODE_CLASS_MAPPINGS = {
|
||||
"QwenImageEditLoaderStandalone": QwenImageEditLoaderStandalone,
|
||||
"TextEncodeQwenImageEditPlusStandalone": TextEncodeQwenImageEditPlusStandalone
|
||||
}
|
||||
|
||||
return (conditioning, negative_conditioning)
|
||||
NODE_DISPLAY_NAME_MAPPINGS = {
|
||||
"QwenImageEditLoaderStandalone": "Qwen Image Edit Loader (Standalone)",
|
||||
"TextEncodeQwenImageEditPlusStandalone": "Text Encode Qwen Image Edit Plus (Mamad8 Standalone)"
|
||||
}
|
||||
Reference in New Issue
Block a user