|
import gradio as gr |
|
import os |
|
import numpy as np |
|
import torch |
|
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler, DiffusionPipeline, EulerAncestralDiscreteScheduler |
|
from transformers import pipeline, AutoModelForImageSegmentation |
|
from torchvision.transforms import v2 |
|
from einops import rearrange |
|
from omegaconf import OmegaConf |
|
from huggingface_hub import hf_hub_download |
|
|
|
from src.utils.mesh_util import save_obj, save_glb |
|
from src.utils.train_util import instantiate_from_config |
|
from src.utils.camera_util import ( |
|
FOV_to_intrinsics, |
|
get_zero123plus_input_cameras, |
|
get_circular_camera_poses, |
|
) |
|
|
|
|
|
|
|
|
|
model_cache_dir = 'ckpts/' |
|
os.makedirs(model_cache_dir, exist_ok=True) |
|
|
|
|
|
config_path = 'configs/instant-mesh-large.yaml' |
|
config = OmegaConf.load(config_path) |
|
config_name = os.path.basename(config_path).replace('.yaml', '') |
|
model_config = config.model_config |
|
infer_config = config.infer_config |
|
|
|
|
|
device = torch.device('cuda') |
|
|
|
|
|
|
|
|
|
controlnet = ControlNetModel.from_pretrained( |
|
"lllyasviel/sd-controlnet-scribble", |
|
torch_dtype=torch.float16, |
|
use_safetensors=True, |
|
cache_dir=model_cache_dir |
|
) |
|
pipeline_1 = StableDiffusionControlNetPipeline.from_pretrained( |
|
"stable-diffusion-v1-5/stable-diffusion-v1-5", |
|
controlnet=controlnet, |
|
torch_dtype=torch.float16, |
|
use_safetensors=True, |
|
cache_dir=model_cache_dir |
|
) |
|
pipeline_1.scheduler = UniPCMultistepScheduler.from_config(pipeline_1.scheduler.config) |
|
pipeline_1.enable_model_cpu_offload() |
|
|
|
|
|
pipeline_2 = AutoModelForImageSegmentation.from_pretrained( |
|
'briaai/RMBG-2.0', |
|
trust_remote_code=True, |
|
cache_dir=model_cache_dir |
|
) |
|
pipeline_2.to(device) |
|
pipeline_2.eval() |
|
|
|
|
|
pipeline_3 = DiffusionPipeline.from_pretrained( |
|
"sudo-ai/zero123plus-v1.2", |
|
custom_pipeline="zero123plus", |
|
torch_dtype=torch.float16, |
|
cache_dir=model_cache_dir |
|
) |
|
pipeline_3.scheduler = EulerAncestralDiscreteScheduler.from_config( |
|
pipeline_3.scheduler.config, timestep_spacing='trailing' |
|
) |
|
unet_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="diffusion_pytorch_model.bin", repo_type="model", cache_dir=model_cache_dir) |
|
state_dict = torch.load(unet_ckpt_path, map_location='cpu') |
|
pipeline_3.unet.load_state_dict(state_dict, strict=True) |
|
pipeline_3.to(device) |
|
|
|
|
|
|
|
model_ckpt_path = hf_hub_download(repo_id="TencentARC/InstantMesh", filename="instant_mesh_large.ckpt", repo_type="model", cache_dir=model_cache_dir) |
|
model = instantiate_from_config(model_config) |
|
state_dict = torch.load(model_ckpt_path, map_location='cpu')['state_dict'] |
|
state_dict = {k[14:]: v for k, v in state_dict.items() if k.startswith('lrm_generator.') and 'source_camera' not in k} |
|
model.load_state_dict(state_dict, strict=True) |
|
model = model.to(device) |
|
model.init_flexicubes_geometry(device, fovy=30.0) |
|
model.eval() |
|
|
|
|
|
print('----------------------Loading Finished-----------------------') |
|
|
|
|
|
|
|
def input_image(input_img): |
|
if input_img is None: |
|
raise gr.Error("No image uploaded!") |
|
else: |
|
input_img.save("src/tmp/sketch.png") |
|
return |
|
|
|
def sketch_to_image( |
|
input_img, |
|
prompt, |
|
negative_prompt="low quality, black and white image", |
|
add_prompt=", 3d rendered, shadeless, white background, intact and single object", |
|
controlnet_conditioning_scale=0.75, |
|
num_inference_steps=50 |
|
): |
|
|
|
output = pipeline_1( |
|
prompt+add_prompt, |
|
num_inference_steps=int(num_inference_steps), |
|
guidance_scale=10, |
|
negative_prompt=negative_prompt, |
|
controlnet_conditioning_scale=float(controlnet_conditioning_scale), |
|
image=input_img |
|
).images[0] |
|
|
|
output.save("src/tmp/image.png") |
|
|
|
return output |
|
|
|
def background_remove(input_img): |
|
output = pipeline_2(input_img) |
|
output.save("src/tmp/image_nobg.png") |
|
|
|
return output |
|
|
|
def get_render_cameras(batch_size=1, M=120, radius=2.5, elevation=10.0, is_flexicubes=False): |
|
c2ws = get_circular_camera_poses(M=M, radius=radius, elevation=elevation) |
|
if is_flexicubes: |
|
cameras = torch.linalg.inv(c2ws) |
|
cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1, 1) |
|
else: |
|
extrinsics = c2ws.flatten(-2) |
|
intrinsics = FOV_to_intrinsics(30.0).unsqueeze(0).repeat(M, 1, 1).float().flatten(-2) |
|
cameras = torch.cat([extrinsics, intrinsics], dim=-1) |
|
cameras = cameras.unsqueeze(0).repeat(batch_size, 1, 1) |
|
return cameras |
|
|
|
def make_mesh(model_path, model_glb_path, planes): |
|
with torch.no_grad(): |
|
mesh_out = model.extract_mesh( |
|
planes, |
|
use_texture_map=False, |
|
**infer_config, |
|
) |
|
|
|
vertices, faces, vertex_colors = mesh_out |
|
vertices = vertices[:, [1, 2, 0]] |
|
|
|
save_obj(vertices, faces, vertex_colors, model_path) |
|
save_glb(vertices, faces, vertex_colors, model_glb_path) |
|
|
|
return model_path, model_glb_path |
|
|
|
def image_to_model(input_img): |
|
generator = torch.Generator(device=device) |
|
z123_image = pipeline_3( |
|
input_img, |
|
generator=generator, |
|
).images[0] |
|
|
|
input_img = np.asarray(z123_image, dtype=np.float32) / 255.0 |
|
input_img = torch.from_numpy(input_img ).permute(2, 0, 1).contiguous().float() |
|
input_img = rearrange(input_img, 'c (n h) (m w) -> (n m) c h w', n=3, m=2) |
|
|
|
input_cameras = get_zero123plus_input_cameras(batch_size=1, radius=4.0).to(device) |
|
|
|
input_img = input_img.unsqueeze(0).to(device) |
|
input_img = v2.functional.resize(input_img, (320, 320), interpolation=3, antialias=True).clamp(0, 1) |
|
|
|
model_path = "src/tmp/model.obj" |
|
model_glb_path = "src/tmp/model.glb" |
|
|
|
with torch.no_grad(): |
|
planes = model.forward_planes(input_img, input_cameras) |
|
|
|
model_path, model_glb_path = make_mesh(model_path, model_glb_path, planes) |
|
|
|
return model_path, model_glb_path |
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown(""" |
|
# SketchModeling: From Sketch to 3D Model |
|
|
|
**SketchModeling** is a method for 3D mesh generation from a sketch. |
|
|
|
It has three steps: |
|
1. It generates image from sketch using Stable Diffusion and ControlNet. |
|
2. It removes the background of the image using RMBG. |
|
3. It reconsturcted the 3D model of the image using InstantMesh. |
|
|
|
On below, you can either upload a sketch image or draw the sketch yourself. Then press Run and wait for the model to be generated. |
|
|
|
**ATTENTION:** If it's the first time you run SketchModeling, it could take some time to download models from the Internet. |
|
""") |
|
with gr.Row(variant="panel"): |
|
with gr.Column(): |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Tab("Sketch Pad"): |
|
input_img = gr.Sketchpad( |
|
crop_size=(640, 640), type="pil", label="Sketch Pad", image_mode="RGBA" |
|
) |
|
with gr.Tab("Input Image"): |
|
input_img = gr.Image( |
|
type="pil", label="Input Image", sources="upload", image_mode="RGBA" |
|
) |
|
with gr.Column(): |
|
with gr.Tab("Generated Image"): |
|
generated_img = gr.Image( |
|
type="pil", label="Gnerated Image", image_mode="RGBA", interactive=False |
|
) |
|
with gr.Tab("Processed Image"): |
|
processed_img = gr.Image( |
|
type="pil", label="Processed Image", image_mode="RGBA", interactive=False |
|
) |
|
with gr.Row(): |
|
prompt = gr.Textbox(label="Pompt", interactive=True) |
|
controlnet_conditioning_scale = gr.Slider( |
|
label="Controlnet Conditioning Scale", |
|
minimum=0.5, |
|
maximum=1.5, |
|
value=0.85, |
|
step=0.05, |
|
interactive=True |
|
) |
|
with gr.Accordion('Advanced options', open=False): |
|
with gr.Row(): |
|
negative_prompt = gr.Textbox(label="Negative Prompt", value="low quality, black and white image", interactive=True) |
|
add_prompt = gr.Textbox(label="Styles", value=", 3d rendered, shadeless, white background, intact and single object", interactive=True) |
|
num_inference_steps = gr.Number(label="Inference Steps", value=50, interactive=True) |
|
run_btn = gr.Button("Run", variant="primary") |
|
|
|
with gr.Column(): |
|
with gr.Tab("OBJ"): |
|
output_obj = gr.Model3D( |
|
label="Output Model (OBJ Format)", |
|
interactive=False |
|
) |
|
with gr.Tab("GLB"): |
|
output_glb = gr.Model3D( |
|
label="Output Model (GLB Format)", |
|
interactive=False |
|
) |
|
|
|
run_btn.click(fn=input_image, inputs=[input_img]).success( |
|
fn=sketch_to_image, |
|
inputs=[input_img, prompt, negative_prompt, add_prompt, controlnet_conditioning_scale, num_inference_steps], |
|
outputs=[generated_img] |
|
).success( |
|
fn=background_remove, |
|
inputs=[generated_img], |
|
outputs=[processed_img] |
|
).success( |
|
fn=image_to_model, |
|
inputs=[processed_img], |
|
outputs=[output_obj, output_glb] |
|
) |
|
|
|
demo.launch() |