Update README.md
Browse files
README.md
CHANGED
@@ -81,6 +81,7 @@ By submitting the form above, you agree to BRIA’s [Privacy policy](https://bri
|
|
81 |
|
82 |
### How To Use
|
83 |
```python
|
|
|
84 |
opencv-python==4.10.0.84
|
85 |
torch==2.4.0
|
86 |
torchvision==0.19.0
|
@@ -90,15 +91,12 @@ diffusers==0.29.2
|
|
90 |
insightface==0.7.3
|
91 |
onnx==1.16.2
|
92 |
onnxruntime==1.18.1
|
|
|
|
|
93 |
```
|
94 |
|
95 |
|
96 |
```python
|
97 |
-
import gc
|
98 |
-
import os
|
99 |
-
import random
|
100 |
-
import gradio as gr
|
101 |
-
|
102 |
import cv2
|
103 |
import torch
|
104 |
import numpy as np
|
@@ -107,22 +105,29 @@ from PIL import Image
|
|
107 |
from transformers import CLIPVisionModelWithProjection
|
108 |
from diffusers.models import ControlNetModel
|
109 |
|
110 |
-
from huggingface_hub import snapshot_download
|
111 |
|
112 |
from insightface.app import FaceAnalysis
|
113 |
|
114 |
-
import
|
115 |
-
|
116 |
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
124 |
|
125 |
|
|
|
126 |
def resize_img(input_image, max_side=1280, min_side=1024, size=None,
|
127 |
pad_to_max_side=False, mode=Image.BILINEAR, base_pixel_number=64):
|
128 |
|
@@ -146,7 +151,6 @@ def resize_img(input_image, max_side=1280, min_side=1024, size=None,
|
|
146 |
input_image = Image.fromarray(res)
|
147 |
return input_image
|
148 |
|
149 |
-
|
150 |
def make_canny_condition(image, min_val=100, max_val=200, w_bilateral=True):
|
151 |
if w_bilateral:
|
152 |
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
@@ -161,14 +165,19 @@ def make_canny_condition(image, min_val=100, max_val=200, w_bilateral=True):
|
|
161 |
return image
|
162 |
|
163 |
|
|
|
164 |
default_negative_prompt = "Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad anatomy,Cloned face,Malformed limbs,Missing legs,Too many fingers"
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
local_dir="models/auraface",
|
170 |
-
)
|
171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
app = FaceAnalysis(
|
173 |
name="auraface",
|
174 |
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
|
@@ -177,24 +186,7 @@ app = FaceAnalysis(
|
|
177 |
|
178 |
app.prepare(ctx_id=0, det_size=(640, 640))
|
179 |
|
180 |
-
|
181 |
-
# download checkpoints
|
182 |
-
print("Downloading checkpoints")
|
183 |
-
hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/controlnet/config.json", local_dir="./checkpoints")
|
184 |
-
hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/controlnet/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")
|
185 |
-
hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/ip-adapter.bin", local_dir="./checkpoints")
|
186 |
-
hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="image_encoder/pytorch_model.bin", local_dir="./checkpoints")
|
187 |
-
hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="image_encoder/config.json", local_dir="./checkpoints")
|
188 |
-
|
189 |
-
|
190 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
191 |
-
|
192 |
-
# ckpts paths
|
193 |
-
face_adapter = f"./checkpoints/checkpoint_105000/ip-adapter.bin"
|
194 |
-
controlnet_path = f"./checkpoints/checkpoint_105000/controlnet"
|
195 |
-
base_model_path = f'briaai/BRIA-2.3'
|
196 |
-
resolution = 1024
|
197 |
-
|
198 |
# Load ControlNet models
|
199 |
controlnet_lnmks = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
|
200 |
controlnet_canny = ControlNetModel.from_pretrained("briaai/BRIA-2.3-ControlNet-Canny",
|
@@ -202,12 +194,11 @@ controlnet_canny = ControlNetModel.from_pretrained("briaai/BRIA-2.3-ControlNet-C
|
|
202 |
|
203 |
controlnet = [controlnet_lnmks, controlnet_canny]
|
204 |
|
205 |
-
|
206 |
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
207 |
f"./checkpoints/image_encoder",
|
208 |
torch_dtype=torch.float16,
|
209 |
)
|
210 |
-
pipe =
|
211 |
base_model_path,
|
212 |
controlnet=controlnet,
|
213 |
torch_dtype=torch.float16,
|
@@ -220,15 +211,14 @@ pipe.use_native_ip_adapter=True
|
|
220 |
|
221 |
pipe.load_ip_adapter_instantid(face_adapter)
|
222 |
|
223 |
-
clip_embeds=None
|
224 |
|
225 |
|
226 |
-
image_path = "<
|
227 |
img = Image.open(image_path)
|
228 |
|
229 |
-
|
230 |
-
|
231 |
-
face_image_padded = resize_img(face_image_orig, max_side=resolution, min_side=resolution, pad_to_max_side=True)
|
232 |
face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
|
233 |
face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
|
234 |
face_emb = face_info['embedding']
|
@@ -239,6 +229,10 @@ face_kps = draw_kps(face_image, face_info['kps'])
|
|
239 |
kps_scale = 0.6
|
240 |
canny_scale = 0.4
|
241 |
ip_adapter_scale = 0.8
|
|
|
|
|
|
|
|
|
242 |
|
243 |
if canny_scale>0.0:
|
244 |
canny_img = make_canny_condition(face_image, min_val=20, max_val=40, w_bilateral=True)
|
@@ -247,19 +241,18 @@ generator = torch.Generator(device=device).manual_seed(seed)
|
|
247 |
|
248 |
|
249 |
images = pipe(
|
250 |
-
prompt =
|
251 |
negative_prompt = default_negative_prompt,
|
252 |
image_embeds = face_emb,
|
253 |
-
image = [face_kps, canny_img] if canny_scale
|
254 |
controlnet_conditioning_scale = [kps_scale, canny_scale] if canny_scale>0.0 else kps_scale,
|
255 |
ip_adapter_scale = ip_adapter_scale,
|
256 |
-
num_inference_steps =
|
257 |
-
guidance_scale =
|
258 |
generator = generator,
|
259 |
visual_prompt_embds = clip_embeds,
|
260 |
cross_attention_kwargs = None,
|
261 |
-
num_images_per_prompt=
|
262 |
).images[0]
|
263 |
|
264 |
-
|
265 |
```
|
|
|
81 |
|
82 |
### How To Use
|
83 |
```python
|
84 |
+
# requirements
|
85 |
opencv-python==4.10.0.84
|
86 |
torch==2.4.0
|
87 |
torchvision==0.19.0
|
|
|
91 |
insightface==0.7.3
|
92 |
onnx==1.16.2
|
93 |
onnxruntime==1.18.1
|
94 |
+
accelerate==0.33.0
|
95 |
+
huggingface-hub==0.27.1
|
96 |
```
|
97 |
|
98 |
|
99 |
```python
|
|
|
|
|
|
|
|
|
|
|
100 |
import cv2
|
101 |
import torch
|
102 |
import numpy as np
|
|
|
105 |
from transformers import CLIPVisionModelWithProjection
|
106 |
from diffusers.models import ControlNetModel
|
107 |
|
108 |
+
from huggingface_hub import snapshot_download, hf_hub_download
|
109 |
|
110 |
from insightface.app import FaceAnalysis
|
111 |
|
112 |
+
from pipeline_bria_id_preservation import BriaIDPreservationDiffusionPipeline, draw_kps
|
113 |
+
|
114 |
|
115 |
+
# ================= Prepare and download models and checkpoints =================
|
116 |
+
# Download face encoder
|
117 |
+
snapshot_download(
|
118 |
+
"fal/AuraFace-v1",
|
119 |
+
local_dir="./models/auraface",
|
120 |
+
)
|
121 |
|
122 |
+
# download checkpoints
|
123 |
+
hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/controlnet/config.json", local_dir="./checkpoints")
|
124 |
+
hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/controlnet/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")
|
125 |
+
hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/ip-adapter.bin", local_dir="./checkpoints")
|
126 |
+
hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="image_encoder/pytorch_model.bin", local_dir="./checkpoints")
|
127 |
+
hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="image_encoder/config.json", local_dir="./checkpoints")
|
128 |
|
129 |
|
130 |
+
# Util functions
|
131 |
def resize_img(input_image, max_side=1280, min_side=1024, size=None,
|
132 |
pad_to_max_side=False, mode=Image.BILINEAR, base_pixel_number=64):
|
133 |
|
|
|
151 |
input_image = Image.fromarray(res)
|
152 |
return input_image
|
153 |
|
|
|
154 |
def make_canny_condition(image, min_val=100, max_val=200, w_bilateral=True):
|
155 |
if w_bilateral:
|
156 |
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
|
|
|
165 |
return image
|
166 |
|
167 |
|
168 |
+
# ================= Parameters =================
|
169 |
default_negative_prompt = "Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad anatomy,Cloned face,Malformed limbs,Missing legs,Too many fingers"
|
170 |
|
171 |
+
resolution = 1024
|
172 |
+
seed = 12345
|
173 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
|
174 |
|
175 |
+
# ckpts paths
|
176 |
+
face_adapter = f"./checkpoints/checkpoint_105000/ip-adapter.bin"
|
177 |
+
controlnet_path = f"./checkpoints/checkpoint_105000/controlnet"
|
178 |
+
base_model_path = f'briaai/BRIA-2.3'
|
179 |
+
|
180 |
+
# ================= Prepare face encoder =================
|
181 |
app = FaceAnalysis(
|
182 |
name="auraface",
|
183 |
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
|
|
|
186 |
|
187 |
app.prepare(ctx_id=0, det_size=(640, 640))
|
188 |
|
189 |
+
# ================= Prepare pipeline =================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
# Load ControlNet models
|
191 |
controlnet_lnmks = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
|
192 |
controlnet_canny = ControlNetModel.from_pretrained("briaai/BRIA-2.3-ControlNet-Canny",
|
|
|
194 |
|
195 |
controlnet = [controlnet_lnmks, controlnet_canny]
|
196 |
|
|
|
197 |
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
|
198 |
f"./checkpoints/image_encoder",
|
199 |
torch_dtype=torch.float16,
|
200 |
)
|
201 |
+
pipe = BriaIDPreservationDiffusionPipeline.from_pretrained(
|
202 |
base_model_path,
|
203 |
controlnet=controlnet,
|
204 |
torch_dtype=torch.float16,
|
|
|
211 |
|
212 |
pipe.load_ip_adapter_instantid(face_adapter)
|
213 |
|
214 |
+
clip_embeds=None
|
215 |
|
216 |
|
217 |
+
image_path = "<Set your image path>"
|
218 |
img = Image.open(image_path)
|
219 |
|
220 |
+
face_image = resize_img(img, max_side=resolution, min_side=resolution)
|
221 |
+
face_image_padded = resize_img(img, max_side=resolution, min_side=resolution, pad_to_max_side=True)
|
|
|
222 |
face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
|
223 |
face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
|
224 |
face_emb = face_info['embedding']
|
|
|
229 |
kps_scale = 0.6
|
230 |
canny_scale = 0.4
|
231 |
ip_adapter_scale = 0.8
|
232 |
+
num_inference_steps = 30
|
233 |
+
guidance_scale = 5.0
|
234 |
+
|
235 |
+
prompt = "A male with brown eyes, blonde hair, short hair, in a white shirt, smiling, with a neutral background, cartoon style"
|
236 |
|
237 |
if canny_scale>0.0:
|
238 |
canny_img = make_canny_condition(face_image, min_val=20, max_val=40, w_bilateral=True)
|
|
|
241 |
|
242 |
|
243 |
images = pipe(
|
244 |
+
prompt = prompt,
|
245 |
negative_prompt = default_negative_prompt,
|
246 |
image_embeds = face_emb,
|
247 |
+
image = [face_kps, canny_img] if canny_scale>0.0 else face_kps,
|
248 |
controlnet_conditioning_scale = [kps_scale, canny_scale] if canny_scale>0.0 else kps_scale,
|
249 |
ip_adapter_scale = ip_adapter_scale,
|
250 |
+
num_inference_steps = num_inference_steps,
|
251 |
+
guidance_scale = 5.0,
|
252 |
generator = generator,
|
253 |
visual_prompt_embds = clip_embeds,
|
254 |
cross_attention_kwargs = None,
|
255 |
+
num_images_per_prompt=1,
|
256 |
).images[0]
|
257 |
|
|
|
258 |
```
|