briaai
/

BRIA-2.3-ID_Preservation

@@ -81,6 +81,7 @@ By submitting the form above, you agree to BRIA’s [Privacy policy](https://bri
 ### How To Use
 ```python
 opencv-python==4.10.0.84
 torch==2.4.0
 torchvision==0.19.0
@@ -90,15 +91,12 @@ diffusers==0.29.2
 insightface==0.7.3
 onnx==1.16.2
 onnxruntime==1.18.1
 ```
 ```python
-import gc
-import os
-import random
-import gradio as gr
 import cv2
 import torch
 import numpy as np
@@ -107,22 +105,29 @@ from PIL import Image
 from transformers import CLIPVisionModelWithProjection
 from diffusers.models import ControlNetModel
-from huggingface_hub import snapshot_download
 from insightface.app import FaceAnalysis
-import io
-import spaces
-from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps
-import pandas as pd
-import json
-import requests
-from io import BytesIO
-from huggingface_hub import hf_hub_download, HfApi
 def resize_img(input_image, max_side=1280, min_side=1024, size=None,
                pad_to_max_side=False, mode=Image.BILINEAR, base_pixel_number=64):
@@ -146,7 +151,6 @@ def resize_img(input_image, max_side=1280, min_side=1024, size=None,
         input_image = Image.fromarray(res)
     return input_image
 def make_canny_condition(image, min_val=100, max_val=200, w_bilateral=True):
     if w_bilateral:
         image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
@@ -161,14 +165,19 @@ def make_canny_condition(image, min_val=100, max_val=200, w_bilateral=True):
     return image
 default_negative_prompt = "Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad anatomy,Cloned face,Malformed limbs,Missing legs,Too many fingers"
-# Download face encoder
-snapshot_download(
-    "fal/AuraFace-v1",
-    local_dir="models/auraface",
-)
 app = FaceAnalysis(
     name="auraface",
     providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
@@ -177,24 +186,7 @@ app = FaceAnalysis(
 app.prepare(ctx_id=0, det_size=(640, 640))
-# download checkpoints
-print("Downloading checkpoints")
-hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/controlnet/config.json", local_dir="./checkpoints")
-hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/controlnet/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")
-hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/ip-adapter.bin", local_dir="./checkpoints")
-hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="image_encoder/pytorch_model.bin", local_dir="./checkpoints")
-hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="image_encoder/config.json", local_dir="./checkpoints")
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# ckpts paths
-face_adapter = f"./checkpoints/checkpoint_105000/ip-adapter.bin"
-controlnet_path = f"./checkpoints/checkpoint_105000/controlnet"
-base_model_path = f'briaai/BRIA-2.3'
-resolution = 1024
 # Load ControlNet models
 controlnet_lnmks = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
 controlnet_canny = ControlNetModel.from_pretrained("briaai/BRIA-2.3-ControlNet-Canny",
@@ -202,12 +194,11 @@ controlnet_canny = ControlNetModel.from_pretrained("briaai/BRIA-2.3-ControlNet-C
 controlnet = [controlnet_lnmks, controlnet_canny]
 image_encoder = CLIPVisionModelWithProjection.from_pretrained(
         f"./checkpoints/image_encoder",
         torch_dtype=torch.float16,
     )
-pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
         base_model_path,
         controlnet=controlnet,
         torch_dtype=torch.float16,
@@ -220,15 +211,14 @@ pipe.use_native_ip_adapter=True
 pipe.load_ip_adapter_instantid(face_adapter)
-clip_embeds=None
-image_path = "<define a path to image>"
 img = Image.open(image_path)
-prompt = "A male with brown eyes, gray hair, short hair, and wearing sunglasses."
-face_image = resize_img(face_image_orig, max_side=resolution, min_side=resolution)
-face_image_padded = resize_img(face_image_orig, max_side=resolution, min_side=resolution, pad_to_max_side=True)
 face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
 face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
 face_emb = face_info['embedding']
@@ -239,6 +229,10 @@ face_kps = draw_kps(face_image, face_info['kps'])
 kps_scale = 0.6
 canny_scale = 0.4
 ip_adapter_scale = 0.8
 if canny_scale>0.0:
   canny_img = make_canny_condition(face_image, min_val=20, max_val=40, w_bilateral=True)
@@ -247,19 +241,18 @@ generator = torch.Generator(device=device).manual_seed(seed)
 images = pipe(
-    prompt = full_prompt,
     negative_prompt = default_negative_prompt,
     image_embeds = face_emb,
-    image = [face_kps, canny_img] if canny_scale > 0.0 else face_kps,
     controlnet_conditioning_scale = [kps_scale, canny_scale] if canny_scale>0.0 else kps_scale,
     ip_adapter_scale = ip_adapter_scale,
-    num_inference_steps = num_steps,
-    guidance_scale = guidance_scale,
     generator = generator,
     visual_prompt_embds = clip_embeds,
     cross_attention_kwargs = None,
-    num_images_per_prompt=num_images,
 ).images[0]
 ```

 ### How To Use
 ```python
+# requirements
 opencv-python==4.10.0.84
 torch==2.4.0
 torchvision==0.19.0
 insightface==0.7.3
 onnx==1.16.2
 onnxruntime==1.18.1
+accelerate==0.33.0
+huggingface-hub==0.27.1
 ```
 ```python
 import cv2
 import torch
 import numpy as np
 from transformers import CLIPVisionModelWithProjection
 from diffusers.models import ControlNetModel
+from huggingface_hub import snapshot_download, hf_hub_download
 from insightface.app import FaceAnalysis
+from pipeline_bria_id_preservation import BriaIDPreservationDiffusionPipeline, draw_kps
+# =================  Prepare and download models and checkpoints =================
+# Download face encoder
+snapshot_download(
+    "fal/AuraFace-v1",
+    local_dir="./models/auraface",
+)
+# download checkpoints
+hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/controlnet/config.json", local_dir="./checkpoints")
+hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/controlnet/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")
+hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="checkpoint_105000/ip-adapter.bin", local_dir="./checkpoints")
+hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="image_encoder/pytorch_model.bin", local_dir="./checkpoints")
+hf_hub_download(repo_id="briaai/ID_preservation_2.3_auraFaceEnc", filename="image_encoder/config.json", local_dir="./checkpoints")
+# Util functions
 def resize_img(input_image, max_side=1280, min_side=1024, size=None,
                pad_to_max_side=False, mode=Image.BILINEAR, base_pixel_number=64):
         input_image = Image.fromarray(res)
     return input_image
 def make_canny_condition(image, min_val=100, max_val=200, w_bilateral=True):
     if w_bilateral:
         image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
     return image
+# ================= Parameters =================
 default_negative_prompt = "Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate,Mutilated,Mutilated hands,Poorly drawn face,Deformed,Bad anatomy,Cloned face,Malformed limbs,Missing legs,Too many fingers"
+resolution = 1024
+seed = 12345
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# ckpts paths
+face_adapter = f"./checkpoints/checkpoint_105000/ip-adapter.bin"
+controlnet_path = f"./checkpoints/checkpoint_105000/controlnet"
+base_model_path = f'briaai/BRIA-2.3'
+# =================  Prepare face encoder =================
 app = FaceAnalysis(
     name="auraface",
     providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
 app.prepare(ctx_id=0, det_size=(640, 640))
+# =================  Prepare pipeline =================
 # Load ControlNet models
 controlnet_lnmks = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
 controlnet_canny = ControlNetModel.from_pretrained("briaai/BRIA-2.3-ControlNet-Canny",
 controlnet = [controlnet_lnmks, controlnet_canny]
 image_encoder = CLIPVisionModelWithProjection.from_pretrained(
         f"./checkpoints/image_encoder",
         torch_dtype=torch.float16,
     )
+pipe = BriaIDPreservationDiffusionPipeline.from_pretrained(
         base_model_path,
         controlnet=controlnet,
         torch_dtype=torch.float16,
 pipe.load_ip_adapter_instantid(face_adapter)
+clip_embeds=None
+image_path = "<Set your image path>"
 img = Image.open(image_path)
+face_image = resize_img(img, max_side=resolution, min_side=resolution)
+face_image_padded = resize_img(img, max_side=resolution, min_side=resolution, pad_to_max_side=True)
 face_info = app.get(cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR))
 face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
 face_emb = face_info['embedding']
 kps_scale = 0.6
 canny_scale = 0.4
 ip_adapter_scale = 0.8
+num_inference_steps = 30
+guidance_scale = 5.0
+prompt = "A male with brown eyes, blonde hair, short hair, in a white shirt, smiling, with a neutral background, cartoon style"
 if canny_scale>0.0:
   canny_img = make_canny_condition(face_image, min_val=20, max_val=40, w_bilateral=True)
 images = pipe(
+    prompt = prompt,
     negative_prompt = default_negative_prompt,
     image_embeds = face_emb,
+    image = [face_kps, canny_img] if canny_scale>0.0 else face_kps,
     controlnet_conditioning_scale = [kps_scale, canny_scale] if canny_scale>0.0 else kps_scale,
     ip_adapter_scale = ip_adapter_scale,
+    num_inference_steps = num_inference_steps,
+    guidance_scale = 5.0,
     generator = generator,
     visual_prompt_embds = clip_embeds,
     cross_attention_kwargs = None,
+    num_images_per_prompt=1,
 ).images[0]
 ```