yonishafir commited on
Commit
f829957
·
verified ·
1 Parent(s): 5403e5d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +44 -51
README.md CHANGED
@@ -81,6 +81,7 @@ By submitting the form above, you agree to BRIA’s [Privacy policy](https://bri
81
 
82
  ### How To Use
83
  ```python
 
84
  opencv-python==4.10.0.84
85
  torch==2.4.0
86
  torchvision==0.19.0
@@ -90,15 +91,12 @@ diffusers==0.29.2
90
  insightface==0.7.3
91
  onnx==1.16.2
92
  onnxruntime==1.18.1
 
 
93
  ```
94
 
95
 
96
  ```python
97
- import gc
98
- import os
99
- import random
100
- import gradio as gr
101
-
102
  import cv2
103
  import torch
104
  import numpy as np
@@ -107,22 +105,29 @@ from PIL import Image
107
  from transformers import CLIPVisionModelWithProjection
108
  from diffusers.models import ControlNetModel
109
 
110
- from huggingface_hub import snapshot_download
111
 
112
  from insightface.app import FaceAnalysis
113
 
114
- import io
115
- import spaces
116
 
117
- from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps
 
 
 
 
 
118
 
119
- import pandas as pd
120
- import json
121
- import requests
122
- from io import BytesIO
123
- from huggingface_hub import hf_hub_download, HfApi
 
124
 
125
 
 
126
  def resize_img(input_image, max_side=1280, min_side=1024, size=None,
127
  pad_to_max_side=False, mode=Image.BILINEAR, base_pixel_number=64):
128
 
@@ -146,7 +151,6 @@ def resize_img(input_image, max_side=1280, min_side=1024, size=None,
146
  input_image = Image.fromarray(res)
147
  return input_image
148
 
149
-
150
  def make_canny_condition(image, min_val=100, max_val=200, w_bilateral=True):
151
  if w_bilateral:
152
  image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
@@ -161,14 +165,19 @@ def make_canny_condition(image, min_val=100, max_val=200, w_bilateral=True):
161
  return image
162
 
163
 
 
164
  default_negative_prompt = "Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad anatomy,Cloned face,Malformed limbs,Missing legs,Too many fingers"
165
 
166
- # Download face encoder
167
- snapshot_download(
168
- "fal/AuraFace-v1",
169
- local_dir="models/auraface",
170
- )
171
 
 
 
 
 
 
 
172
  app = FaceAnalysis(
173
  name="auraface",
174
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
@@ -177,24 +186,7 @@ app = FaceAnalysis(
177
 
178
  app.prepare(ctx_id=0, det_size=(640, 640))
179
 
180
-
181
- # download checkpoints
182
- print("Downloading checkpoints")
183
- hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/controlnet/config.json", local_dir="./checkpoints")
184
- hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/controlnet/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")
185
- hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/ip-adapter.bin", local_dir="./checkpoints")
186
- hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="image_encoder/pytorch_model.bin", local_dir="./checkpoints")
187
- hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="image_encoder/config.json", local_dir="./checkpoints")
188
-
189
-
190
- device = "cuda" if torch.cuda.is_available() else "cpu"
191
-
192
- # ckpts paths
193
- face_adapter = f"./checkpoints/checkpoint_105000/ip-adapter.bin"
194
- controlnet_path = f"./checkpoints/checkpoint_105000/controlnet"
195
- base_model_path = f'briaai/BRIA-2.3'
196
- resolution = 1024
197
-
198
  # Load ControlNet models
199
  controlnet_lnmks = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
200
  controlnet_canny = ControlNetModel.from_pretrained("briaai/BRIA-2.3-ControlNet-Canny",
@@ -202,12 +194,11 @@ controlnet_canny = ControlNetModel.from_pretrained("briaai/BRIA-2.3-ControlNet-C
202
 
203
  controlnet = [controlnet_lnmks, controlnet_canny]
204
 
205
-
206
  image_encoder = CLIPVisionModelWithProjection.from_pretrained(
207
  f"./checkpoints/image_encoder",
208
  torch_dtype=torch.float16,
209
  )
210
- pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
211
  base_model_path,
212
  controlnet=controlnet,
213
  torch_dtype=torch.float16,
@@ -220,15 +211,14 @@ pipe.use_native_ip_adapter=True
220
 
221
  pipe.load_ip_adapter_instantid(face_adapter)
222
 
223
- clip_embeds=None
224
 
225
 
226
- image_path = "<define a path to image>"
227
  img = Image.open(image_path)
228
 
229
- prompt = "A male with brown eyes, gray hair, short hair, and wearing sunglasses."
230
- face_image = resize_img(face_image_orig, max_side=resolution, min_side=resolution)
231
- face_image_padded = resize_img(face_image_orig, max_side=resolution, min_side=resolution, pad_to_max_side=True)
232
  face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
233
  face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
234
  face_emb = face_info['embedding']
@@ -239,6 +229,10 @@ face_kps = draw_kps(face_image, face_info['kps'])
239
  kps_scale = 0.6
240
  canny_scale = 0.4
241
  ip_adapter_scale = 0.8
 
 
 
 
242
 
243
  if canny_scale>0.0:
244
  canny_img = make_canny_condition(face_image, min_val=20, max_val=40, w_bilateral=True)
@@ -247,19 +241,18 @@ generator = torch.Generator(device=device).manual_seed(seed)
247
 
248
 
249
  images = pipe(
250
- prompt = full_prompt,
251
  negative_prompt = default_negative_prompt,
252
  image_embeds = face_emb,
253
- image = [face_kps, canny_img] if canny_scale > 0.0 else face_kps,
254
  controlnet_conditioning_scale = [kps_scale, canny_scale] if canny_scale>0.0 else kps_scale,
255
  ip_adapter_scale = ip_adapter_scale,
256
- num_inference_steps = num_steps,
257
- guidance_scale = guidance_scale,
258
  generator = generator,
259
  visual_prompt_embds = clip_embeds,
260
  cross_attention_kwargs = None,
261
- num_images_per_prompt=num_images,
262
  ).images[0]
263
 
264
-
265
  ```
 
81
 
82
  ### How To Use
83
  ```python
84
+ # requirements
85
  opencv-python==4.10.0.84
86
  torch==2.4.0
87
  torchvision==0.19.0
 
91
  insightface==0.7.3
92
  onnx==1.16.2
93
  onnxruntime==1.18.1
94
+ accelerate==0.33.0
95
+ huggingface-hub==0.27.1
96
  ```
97
 
98
 
99
  ```python
 
 
 
 
 
100
  import cv2
101
  import torch
102
  import numpy as np
 
105
  from transformers import CLIPVisionModelWithProjection
106
  from diffusers.models import ControlNetModel
107
 
108
+ from huggingface_hub import snapshot_download, hf_hub_download
109
 
110
  from insightface.app import FaceAnalysis
111
 
112
+ from pipeline_bria_id_preservation import BriaIDPreservationDiffusionPipeline, draw_kps
113
+
114
 
115
+ # ================= Prepare and download models and checkpoints =================
116
+ # Download face encoder
117
+ snapshot_download(
118
+ "fal/AuraFace-v1",
119
+ local_dir="./models/auraface",
120
+ )
121
 
122
+ # download checkpoints
123
+ hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/controlnet/config.json", local_dir="./checkpoints")
124
+ hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/controlnet/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")
125
+ hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/ip-adapter.bin", local_dir="./checkpoints")
126
+ hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="image_encoder/pytorch_model.bin", local_dir="./checkpoints")
127
+ hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="image_encoder/config.json", local_dir="./checkpoints")
128
 
129
 
130
+ # Util functions
131
  def resize_img(input_image, max_side=1280, min_side=1024, size=None,
132
  pad_to_max_side=False, mode=Image.BILINEAR, base_pixel_number=64):
133
 
 
151
  input_image = Image.fromarray(res)
152
  return input_image
153
 
 
154
  def make_canny_condition(image, min_val=100, max_val=200, w_bilateral=True):
155
  if w_bilateral:
156
  image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
 
165
  return image
166
 
167
 
168
+ # ================= Parameters =================
169
  default_negative_prompt = "Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad anatomy,Cloned face,Malformed limbs,Missing legs,Too many fingers"
170
 
171
+ resolution = 1024
172
+ seed = 12345
173
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
174
 
175
+ # ckpts paths
176
+ face_adapter = f"./checkpoints/checkpoint_105000/ip-adapter.bin"
177
+ controlnet_path = f"./checkpoints/checkpoint_105000/controlnet"
178
+ base_model_path = f'briaai/BRIA-2.3'
179
+
180
+ # ================= Prepare face encoder =================
181
  app = FaceAnalysis(
182
  name="auraface",
183
  providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
 
186
 
187
  app.prepare(ctx_id=0, det_size=(640, 640))
188
 
189
+ # ================= Prepare pipeline =================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  # Load ControlNet models
191
  controlnet_lnmks = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
192
  controlnet_canny = ControlNetModel.from_pretrained("briaai/BRIA-2.3-ControlNet-Canny",
 
194
 
195
  controlnet = [controlnet_lnmks, controlnet_canny]
196
 
 
197
  image_encoder = CLIPVisionModelWithProjection.from_pretrained(
198
  f"./checkpoints/image_encoder",
199
  torch_dtype=torch.float16,
200
  )
201
+ pipe = BriaIDPreservationDiffusionPipeline.from_pretrained(
202
  base_model_path,
203
  controlnet=controlnet,
204
  torch_dtype=torch.float16,
 
211
 
212
  pipe.load_ip_adapter_instantid(face_adapter)
213
 
214
+ clip_embeds=None
215
 
216
 
217
+ image_path = "<Set your image path>"
218
  img = Image.open(image_path)
219
 
220
+ face_image = resize_img(img, max_side=resolution, min_side=resolution)
221
+ face_image_padded = resize_img(img, max_side=resolution, min_side=resolution, pad_to_max_side=True)
 
222
  face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
223
  face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
224
  face_emb = face_info['embedding']
 
229
  kps_scale = 0.6
230
  canny_scale = 0.4
231
  ip_adapter_scale = 0.8
232
+ num_inference_steps = 30
233
+ guidance_scale = 5.0
234
+
235
+ prompt = "A male with brown eyes, blonde hair, short hair, in a white shirt, smiling, with a neutral background, cartoon style"
236
 
237
  if canny_scale>0.0:
238
  canny_img = make_canny_condition(face_image, min_val=20, max_val=40, w_bilateral=True)
 
241
 
242
 
243
  images = pipe(
244
+ prompt = prompt,
245
  negative_prompt = default_negative_prompt,
246
  image_embeds = face_emb,
247
+ image = [face_kps, canny_img] if canny_scale>0.0 else face_kps,
248
  controlnet_conditioning_scale = [kps_scale, canny_scale] if canny_scale>0.0 else kps_scale,
249
  ip_adapter_scale = ip_adapter_scale,
250
+ num_inference_steps = num_inference_steps,
251
+ guidance_scale = 5.0,
252
  generator = generator,
253
  visual_prompt_embds = clip_embeds,
254
  cross_attention_kwargs = None,
255
+ num_images_per_prompt=1,
256
  ).images[0]
257
 
 
258
  ```