Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
776d5b3
1
Parent(s):
e03a824
add
Browse files
app.py
CHANGED
@@ -15,6 +15,7 @@ from shap_e.models.download import load_model, load_config
|
|
15 |
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
|
16 |
import spaces
|
17 |
from shap_e.models.nn.camera import DifferentiableCameraBatch, DifferentiableProjectiveCamera
|
|
|
18 |
|
19 |
from src.utils.train_util import instantiate_from_config
|
20 |
from src.utils.camera_util import (
|
@@ -27,15 +28,14 @@ from src.utils.mesh_util import save_obj, save_glb
|
|
27 |
from src.utils.infer_util import remove_background, resize_foreground
|
28 |
|
29 |
def create_custom_cameras(size: int, device: torch.device, azimuths: list, elevations: list,
|
30 |
-
fov_degrees: float,distance) -> DifferentiableCameraBatch:
|
31 |
# Object is in a 2x2x2 bounding box (-1 to 1 in each dimension)
|
32 |
-
object_diagonal =
|
33 |
|
34 |
# Calculate radius based on object size and FOV
|
35 |
fov_radians = math.radians(fov_degrees)
|
36 |
radius = (object_diagonal / 2) / math.tan(fov_radians / 2) # Correct radius calculation
|
37 |
-
|
38 |
-
# exit(0)
|
39 |
origins = []
|
40 |
xs = []
|
41 |
ys = []
|
@@ -75,8 +75,6 @@ def create_custom_cameras(size: int, device: torch.device, azimuths: list, eleva
|
|
75 |
),
|
76 |
)
|
77 |
|
78 |
-
|
79 |
-
@spaces.GPU(duration=60)
|
80 |
def load_models():
|
81 |
"""Initialize and load all required models"""
|
82 |
config = OmegaConf.load('configs/instant-nerf-large-best.yaml')
|
@@ -231,15 +229,22 @@ def create_mesh(refined_image, model, infer_config):
|
|
231 |
|
232 |
class ShapERenderer:
|
233 |
def __init__(self, device):
|
234 |
-
print("
|
235 |
self.device = device
|
236 |
-
self.xm =
|
237 |
-
self.model =
|
238 |
-
self.diffusion =
|
239 |
-
print("Shap-E models
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
|
241 |
-
@spaces.GPU(duration=60)
|
242 |
def generate_views(self, prompt, guidance_scale=15.0, num_steps=64):
|
|
|
|
|
243 |
# Generate latents using the text-to-3D model
|
244 |
batch_size = 1
|
245 |
guidance_scale = float(guidance_scale)
|
@@ -272,13 +277,13 @@ class ShapERenderer:
|
|
272 |
rendered_image = decode_latent_images(
|
273 |
self.xm,
|
274 |
latents[0],
|
275 |
-
|
276 |
-
|
277 |
)
|
278 |
-
images.append(rendered_image
|
279 |
|
280 |
# Convert images to uint8
|
281 |
-
images = [(image)
|
282 |
|
283 |
# Create 2x3 grid layout (640x960) instead of 3x2 (960x640)
|
284 |
layout = np.zeros((960, 640, 3), dtype=np.uint8)
|
@@ -292,12 +297,19 @@ class ShapERenderer:
|
|
292 |
class RefinerInterface:
|
293 |
def __init__(self):
|
294 |
print("Initializing InstantMesh models...")
|
295 |
-
self.pipeline
|
296 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
297 |
|
298 |
-
@spaces.GPU(duration=65)
|
299 |
def refine_model(self, input_image, prompt, steps=75, guidance_scale=7.5):
|
300 |
"""Main refinement function"""
|
|
|
|
|
301 |
# Process image and get refined output
|
302 |
input_image = Image.fromarray(input_image)
|
303 |
|
@@ -434,11 +446,13 @@ def create_demo():
|
|
434 |
)
|
435 |
|
436 |
# Set up event handlers
|
|
|
437 |
def generate(prompt, guidance_scale, num_steps):
|
438 |
with torch.no_grad():
|
439 |
layout, _ = shap_e.generate_views(prompt, guidance_scale, num_steps)
|
440 |
return layout
|
441 |
|
|
|
442 |
def refine(input_image, prompt, steps, guidance_scale):
|
443 |
refined_img, mesh_path = refiner.refine_model(
|
444 |
input_image,
|
|
|
15 |
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
|
16 |
import spaces
|
17 |
from shap_e.models.nn.camera import DifferentiableCameraBatch, DifferentiableProjectiveCamera
|
18 |
+
import math
|
19 |
|
20 |
from src.utils.train_util import instantiate_from_config
|
21 |
from src.utils.camera_util import (
|
|
|
28 |
from src.utils.infer_util import remove_background, resize_foreground
|
29 |
|
30 |
def create_custom_cameras(size: int, device: torch.device, azimuths: list, elevations: list,
|
31 |
+
fov_degrees: float, distance: float) -> DifferentiableCameraBatch:
|
32 |
# Object is in a 2x2x2 bounding box (-1 to 1 in each dimension)
|
33 |
+
object_diagonal = distance # Correct diagonal calculation for the cube
|
34 |
|
35 |
# Calculate radius based on object size and FOV
|
36 |
fov_radians = math.radians(fov_degrees)
|
37 |
radius = (object_diagonal / 2) / math.tan(fov_radians / 2) # Correct radius calculation
|
38 |
+
|
|
|
39 |
origins = []
|
40 |
xs = []
|
41 |
ys = []
|
|
|
75 |
),
|
76 |
)
|
77 |
|
|
|
|
|
78 |
def load_models():
|
79 |
"""Initialize and load all required models"""
|
80 |
config = OmegaConf.load('configs/instant-nerf-large-best.yaml')
|
|
|
229 |
|
230 |
class ShapERenderer:
|
231 |
def __init__(self, device):
|
232 |
+
print("Initializing Shap-E models...")
|
233 |
self.device = device
|
234 |
+
self.xm = None
|
235 |
+
self.model = None
|
236 |
+
self.diffusion = None
|
237 |
+
print("Shap-E models initialized!")
|
238 |
+
|
239 |
+
def ensure_models_loaded(self):
|
240 |
+
if self.model is None:
|
241 |
+
self.xm = load_model('transmitter', device=self.device)
|
242 |
+
self.model = load_model('text300M', device=self.device)
|
243 |
+
self.diffusion = diffusion_from_config(load_config('diffusion'))
|
244 |
|
|
|
245 |
def generate_views(self, prompt, guidance_scale=15.0, num_steps=64):
|
246 |
+
self.ensure_models_loaded()
|
247 |
+
|
248 |
# Generate latents using the text-to-3D model
|
249 |
batch_size = 1
|
250 |
guidance_scale = float(guidance_scale)
|
|
|
277 |
rendered_image = decode_latent_images(
|
278 |
self.xm,
|
279 |
latents[0],
|
280 |
+
cameras=cameras,
|
281 |
+
rendering_mode='stf'
|
282 |
)
|
283 |
+
images.append(rendered_image[0])
|
284 |
|
285 |
# Convert images to uint8
|
286 |
+
images = [np.array(image) for image in images]
|
287 |
|
288 |
# Create 2x3 grid layout (640x960) instead of 3x2 (960x640)
|
289 |
layout = np.zeros((960, 640, 3), dtype=np.uint8)
|
|
|
297 |
class RefinerInterface:
|
298 |
def __init__(self):
|
299 |
print("Initializing InstantMesh models...")
|
300 |
+
self.pipeline = None
|
301 |
+
self.model = None
|
302 |
+
self.infer_config = None
|
303 |
+
print("InstantMesh models initialized!")
|
304 |
+
|
305 |
+
def ensure_models_loaded(self):
|
306 |
+
if self.pipeline is None:
|
307 |
+
self.pipeline, self.model, self.infer_config = load_models()
|
308 |
|
|
|
309 |
def refine_model(self, input_image, prompt, steps=75, guidance_scale=7.5):
|
310 |
"""Main refinement function"""
|
311 |
+
self.ensure_models_loaded()
|
312 |
+
|
313 |
# Process image and get refined output
|
314 |
input_image = Image.fromarray(input_image)
|
315 |
|
|
|
446 |
)
|
447 |
|
448 |
# Set up event handlers
|
449 |
+
@spaces.GPU(duration=60)
|
450 |
def generate(prompt, guidance_scale, num_steps):
|
451 |
with torch.no_grad():
|
452 |
layout, _ = shap_e.generate_views(prompt, guidance_scale, num_steps)
|
453 |
return layout
|
454 |
|
455 |
+
@spaces.GPU(duration=60)
|
456 |
def refine(input_image, prompt, steps, guidance_scale):
|
457 |
refined_img, mesh_path = refiner.refine_model(
|
458 |
input_image,
|
app2.py
CHANGED
@@ -12,7 +12,8 @@ from einops import rearrange
|
|
12 |
from shap_e.diffusion.sample import sample_latents
|
13 |
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
|
14 |
from shap_e.models.download import load_model, load_config
|
15 |
-
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
|
|
|
16 |
|
17 |
from src.utils.train_util import instantiate_from_config
|
18 |
from src.utils.camera_util import (
|
|
|
12 |
from shap_e.diffusion.sample import sample_latents
|
13 |
from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
|
14 |
from shap_e.models.download import load_model, load_config
|
15 |
+
from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
|
16 |
+
from util import create_custom_cameras
|
17 |
|
18 |
from src.utils.train_util import instantiate_from_config
|
19 |
from src.utils.camera_util import (
|