YiftachEde commited on
Commit
01e52ef
·
1 Parent(s): 818fb4f
Files changed (5) hide show
  1. README.md +34 -7
  2. app.py +3 -10
  3. requirements.txt +13 -114
  4. runtime.txt +1 -0
  5. zero123plus/pipeline.py +0 -1
README.md CHANGED
@@ -1,14 +1,41 @@
1
  ---
2
- title: Sharp It
3
- emoji: 🌍
4
- colorFrom: gray
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 5.20.0
8
  app_file: app.py
9
  pinned: false
10
- license: apache-2.0
11
- short_description: Multi-View to Multi-View diffusion model
 
12
  ---
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Sharp-It 3D Model Generator
3
+ emoji: 🔮
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.0.2
8
  app_file: app.py
9
  pinned: false
10
+ python_version: 3.10.13
11
+ hf_oauth: false
12
+ hardware: zerogpu
13
  ---
14
 
15
+ # Sharp-It: 3D Model Generator
16
+
17
+ This application generates 3D models from text prompts using Shap-E and refines them with InstantMesh.
18
+
19
+ ## Features
20
+
21
+ - Generate 3D models from text descriptions
22
+ - Refine generated models with high-quality textures
23
+ - Export to OBJ format for use in 3D applications
24
+
25
+ ## How to Use
26
+
27
+ 1. Enter a text prompt describing the 3D object you want to create
28
+ 2. Adjust the generation parameters if needed
29
+ 3. Click "Generate Views" to create the initial model
30
+ 4. Enter a refinement prompt if you want to modify specific aspects
31
+ 5. Click "Refine" to enhance the model
32
+ 6. Download the resulting 3D mesh
33
+
34
+ ## Technical Details
35
+
36
+ This application uses:
37
+ - Shap-E for initial 3D generation
38
+ - InstantMesh for high-quality refinement
39
+ - ZeroGPU for efficient GPU resource management
40
+
41
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -25,6 +25,7 @@ from src.utils.camera_util import (
25
  from src.utils.mesh_util import save_obj, save_glb
26
  from src.utils.infer_util import remove_background, resize_foreground
27
 
 
28
  def load_models():
29
  """Initialize and load all required models"""
30
  config = OmegaConf.load('configs/instant-nerf-large-best.yaml')
@@ -59,16 +60,6 @@ def load_models():
59
 
60
  # Load custom UNet
61
  print('Loading custom UNet...')
62
- # unet_path = "best_21.ckpt"
63
- # state_dict = torch.load(unet_path, map_location='cpu')
64
-
65
- # # Process the state dict to match the model keys
66
- # if 'state_dict' in state_dict:
67
- # new_state_dict = {key.replace('unet.unet.', ''): value for key, value in state_dict['state_dict'].items()}
68
- # pipeline.unet.load_state_dict(new_state_dict, strict=False)
69
- # else:
70
- # pipeline.unet.load_state_dict(state_dict, strict=False)
71
- # pipeline.unet.push_to_hub("YiftachEde/Sharp-It")
72
  pipeline.unet = pipeline.unet.from_pretrained("YiftachEde/Sharp-It").to(torch.float16)
73
  pipeline = pipeline.to(device).to(torch_dtype=torch.float16)
74
 
@@ -89,6 +80,7 @@ def load_models():
89
 
90
  return pipeline, model, infer_config
91
 
 
92
  def process_images(input_images, prompt, steps=75, guidance_scale=7.5, pipeline=None):
93
  """Process input images and run refinement"""
94
  device = pipeline.device
@@ -158,6 +150,7 @@ def process_images(input_images, prompt, steps=75, guidance_scale=7.5, pipeline=
158
 
159
  return output, input_image
160
 
 
161
  def create_mesh(refined_image, model, infer_config):
162
  """Generate mesh from refined image"""
163
  # Convert PIL image to tensor
 
25
  from src.utils.mesh_util import save_obj, save_glb
26
  from src.utils.infer_util import remove_background, resize_foreground
27
 
28
+ @spaces.GPU(duration=60)
29
  def load_models():
30
  """Initialize and load all required models"""
31
  config = OmegaConf.load('configs/instant-nerf-large-best.yaml')
 
60
 
61
  # Load custom UNet
62
  print('Loading custom UNet...')
 
 
 
 
 
 
 
 
 
 
63
  pipeline.unet = pipeline.unet.from_pretrained("YiftachEde/Sharp-It").to(torch.float16)
64
  pipeline = pipeline.to(device).to(torch_dtype=torch.float16)
65
 
 
80
 
81
  return pipeline, model, infer_config
82
 
83
+ @spaces.GPU(duration=60)
84
  def process_images(input_images, prompt, steps=75, guidance_scale=7.5, pipeline=None):
85
  """Process input images and run refinement"""
86
  device = pipeline.device
 
150
 
151
  return output, input_image
152
 
153
+ @spaces.GPU(duration=60)
154
  def create_mesh(refined_image, model, infer_config):
155
  """Generate mesh from refined image"""
156
  # Convert PIL image to tensor
requirements.txt CHANGED
@@ -1,114 +1,13 @@
1
- aiofiles==23.2.1
2
- aiohappyeyeballs==2.4.6
3
- aiohttp==3.11.13
4
- aiosignal==1.3.2
5
- annotated-types==0.7.0
6
- antlr4-python3-runtime==4.9.3
7
- anyio==4.6.2.post1
8
- attrs==25.1.0
9
- cachetools==5.5.0
10
- click==8.1.7
11
- contourpy==1.3.0
12
- cycler==0.12.1
13
- dataclasses-json==0.6.7
14
- Deprecated==1.2.14
15
- diffusers
16
- einops==0.8.1
17
- fastapi==0.112.4
18
- ffmpy==0.4.0
19
- filelock==3.16.1
20
- fonttools==4.54.1
21
- frozenlist==1.5.0
22
- fsspec==2024.9.0
23
- gradio==4.43.0
24
- gradio_client==1.3.0
25
- h11==0.14.0
26
- httpcore==1.0.6
27
- httpx==0.27.2
28
- huggingface-hub
29
- imageio==2.35.1
30
- importlib_metadata==8.6.1
31
- importlib_resources==6.4.5
32
- Jinja2==3.1.4
33
- jsonpointer==2.1
34
- kiwisolver==1.4.7
35
- lightning-utilities==0.12.0
36
- markdown-it-py==3.0.0
37
- MarkupSafe==2.1.5
38
- marshmallow==3.22.0
39
- matplotlib==3.9.2
40
- mdurl==0.1.2
41
- mpmath==1.3.0
42
- multidict==6.1.0
43
- mypy-extensions==1.0.0
44
- networkx==3.4.2
45
- numpy==2.1.1
46
- nvidia-cublas-cu12==12.4.5.8
47
- nvidia-cuda-cupti-cu12==12.4.127
48
- nvidia-cuda-nvrtc-cu12==12.4.127
49
- nvidia-cuda-runtime-cu12==12.4.127
50
- nvidia-cudnn-cu12==9.1.0.70
51
- nvidia-cufft-cu12==11.2.1.3
52
- nvidia-curand-cu12==10.3.5.147
53
- nvidia-cusolver-cu12==11.6.1.9
54
- nvidia-cusparse-cu12==12.3.1.170
55
- nvidia-cusparselt-cu12==0.6.2
56
- nvidia-ml-py==12.535.161
57
- nvidia-nccl-cu12==2.21.5
58
- nvidia-nvjitlink-cu12==12.4.127
59
- nvidia-nvtx-cu12==12.4.127
60
- nvitop==1.3.2
61
- omegaconf==2.3.0
62
- orjson==3.10.7
63
- pandas==2.2.3
64
- pillow==10.4.0
65
- propcache==0.3.0
66
- psutil==6.0.0
67
- pydantic==2.9.2
68
- pydantic_core==2.23.4
69
- pydub==0.25.1
70
- pygltflib==1.16.2
71
- Pygments==2.18.0
72
- pyparsing==3.2.0
73
- python-dateutil==2.9.0.post0
74
- python-multipart==0.0.12
75
- pytorch-lightning==2.5.0.post0
76
- pytz==2024.2
77
- PyYAML==6.0.2
78
- regex==2024.11.6
79
- rich==13.9.2
80
- ruff==0.6.9
81
- safetensors==0.5.3
82
- semantic-version==2.10.0
83
- setuptools==72.1.0
84
- shellingham==1.5.4
85
- six==1.16.0
86
- sniffio==1.3.1
87
- starlette==0.38.6
88
- sympy==1.13.1
89
- termcolor==2.4.0
90
- tomlkit==0.12.0
91
- torch==2.6.0
92
- torchmetrics==1.6.1
93
- triton==3.2.0
94
- typer==0.12.5
95
- typing-inspect==0.9.0
96
- typing_extensions==4.12.2
97
- tzdata==2024.2
98
- uvicorn==0.32.0
99
- websockets==12.0
100
- wheel==0.43.0
101
- wrapt==1.16.0
102
- yarl==1.18.3
103
- zipp==3.21.0
104
- git+https://github.com/YiftachEde/shap-d-edit.git
105
- ipywidgets
106
- xatlas
107
- trimesh
108
- opencv-python
109
- git+https://github.com/NVlabs/nvdiffrast.git
110
- rembg
111
- onnxruntime
112
- kiui
113
- transformers
114
- PyMCubes
 
1
+ torch==2.0.1
2
+ torchvision==0.15.2
3
+ gradio==4.0.2
4
+ diffusers>=0.24.0
5
+ transformers>=4.30.0
6
+ accelerate>=0.20.3
7
+ omegaconf
8
+ pytorch-lightning
9
+ huggingface_hub
10
+ einops
11
+ numpy
12
+ Pillow
13
+ shap-e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.10.13
zero123plus/pipeline.py CHANGED
@@ -881,7 +881,6 @@ class Zero123PlusPipeline(diffusers.StableDiffusionPipeline):
881
  )
882
  else:
883
  image = latents
884
- has_nsfw_concept = None
885
 
886
  if has_nsfw_concept is None:
887
  do_denormalize = [True] * image.shape[0]
 
881
  )
882
  else:
883
  image = latents
 
884
 
885
  if has_nsfw_concept is None:
886
  do_denormalize = [True] * image.shape[0]