Spaces:
Running
on
Zero
Running
on
Zero
Update pipline_StableDiffusion_ConsistentID.py
Browse files
pipline_StableDiffusion_ConsistentID.py
CHANGED
|
@@ -15,8 +15,8 @@ from diffusers.utils import _get_model_file
|
|
| 15 |
from functions import process_text_with_markers, masks_for_unique_values, fetch_mask_raw_image, tokenize_and_mask_noun_phrases_ends, prepare_image_token_idx
|
| 16 |
from functions import ProjPlusModel, masks_for_unique_values
|
| 17 |
from attention import Consistent_IPAttProcessor, Consistent_AttProcessor, FacialEncoder
|
| 18 |
-
from modelscope.outputs import OutputKeys
|
| 19 |
-
from modelscope.pipelines import pipeline
|
| 20 |
|
| 21 |
#TODO
|
| 22 |
import sys
|
|
@@ -43,7 +43,7 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
|
|
| 43 |
subfolder: str = '',
|
| 44 |
trigger_word_ID: str = '<|image|>',
|
| 45 |
trigger_word_facial: str = '<|facial|>',
|
| 46 |
-
image_encoder_path: str = 'CLIP-ViT-H-14-laion2B-s32B-b79K', # TODO
|
| 47 |
torch_dtype = torch.float16,
|
| 48 |
num_tokens = 4,
|
| 49 |
lora_rank= 128,
|
|
@@ -83,7 +83,7 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
|
|
| 83 |
[0, 255, 255], [85, 255, 255], [170, 255, 255]]
|
| 84 |
|
| 85 |
### LLVA Optional
|
| 86 |
-
self.llva_model_path = "llava-
|
| 87 |
self.llva_prompt = "Describe this person's facial features for me, including face, ears, eyes, nose, and mouth."
|
| 88 |
self.llva_tokenizer, self.llva_model, self.llva_image_processor, self.llva_context_len = None,None,None,None #load_pretrained_model(self.llva_model_path)
|
| 89 |
|
|
@@ -95,7 +95,7 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
|
|
| 95 |
).to(self.device, dtype=self.torch_dtype)
|
| 96 |
self.FacialEncoder = FacialEncoder(self.image_encoder).to(self.device, dtype=self.torch_dtype)
|
| 97 |
|
| 98 |
-
self.skin_retouching = pipeline('skin-retouching-torch', model='damo/cv_unet_skin_retouching_torch', model_revision='v1.0.2')
|
| 99 |
|
| 100 |
# Load the main state dict first.
|
| 101 |
cache_dir = kwargs.pop("cache_dir", None)
|
|
@@ -589,10 +589,10 @@ class ConsistentIDStableDiffusionPipeline(StableDiffusionPipeline):
|
|
| 589 |
# 9.3 Convert to PIL list
|
| 590 |
image = self.numpy_to_pil(image)
|
| 591 |
|
| 592 |
-
if retouching:
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
else:
|
| 597 |
# 9.1 Post-processing
|
| 598 |
image = self.decode_latents(latents)
|
|
|
|
| 15 |
from functions import process_text_with_markers, masks_for_unique_values, fetch_mask_raw_image, tokenize_and_mask_noun_phrases_ends, prepare_image_token_idx
|
| 16 |
from functions import ProjPlusModel, masks_for_unique_values
|
| 17 |
from attention import Consistent_IPAttProcessor, Consistent_AttProcessor, FacialEncoder
|
| 18 |
+
# from modelscope.outputs import OutputKeys
|
| 19 |
+
# from modelscope.pipelines import pipeline
|
| 20 |
|
| 21 |
#TODO
|
| 22 |
import sys
|
|
|
|
| 43 |
subfolder: str = '',
|
| 44 |
trigger_word_ID: str = '<|image|>',
|
| 45 |
trigger_word_facial: str = '<|facial|>',
|
| 46 |
+
image_encoder_path: str = 'laion/CLIP-ViT-H-14-laion2B-s32B-b79K', # TODO
|
| 47 |
torch_dtype = torch.float16,
|
| 48 |
num_tokens = 4,
|
| 49 |
lora_rank= 128,
|
|
|
|
| 83 |
[0, 255, 255], [85, 255, 255], [170, 255, 255]]
|
| 84 |
|
| 85 |
### LLVA Optional
|
| 86 |
+
self.llva_model_path = "llava-hf/llava-1.5-7b-hf" #TODO
|
| 87 |
self.llva_prompt = "Describe this person's facial features for me, including face, ears, eyes, nose, and mouth."
|
| 88 |
self.llva_tokenizer, self.llva_model, self.llva_image_processor, self.llva_context_len = None,None,None,None #load_pretrained_model(self.llva_model_path)
|
| 89 |
|
|
|
|
| 95 |
).to(self.device, dtype=self.torch_dtype)
|
| 96 |
self.FacialEncoder = FacialEncoder(self.image_encoder).to(self.device, dtype=self.torch_dtype)
|
| 97 |
|
| 98 |
+
# self.skin_retouching = pipeline('skin-retouching-torch', model='damo/cv_unet_skin_retouching_torch', model_revision='v1.0.2')
|
| 99 |
|
| 100 |
# Load the main state dict first.
|
| 101 |
cache_dir = kwargs.pop("cache_dir", None)
|
|
|
|
| 589 |
# 9.3 Convert to PIL list
|
| 590 |
image = self.numpy_to_pil(image)
|
| 591 |
|
| 592 |
+
# if retouching:
|
| 593 |
+
# after_retouching = self.skin_retouching(image[0])
|
| 594 |
+
# if OutputKeys.OUTPUT_IMG in after_retouching:
|
| 595 |
+
# image = [Image.fromarray(cv2.cvtColor(after_retouching[OutputKeys.OUTPUT_IMG], cv2.COLOR_BGR2RGB))]
|
| 596 |
else:
|
| 597 |
# 9.1 Post-processing
|
| 598 |
image = self.decode_latents(latents)
|