YiftachEde commited on
Commit
776d5b3
·
1 Parent(s): e03a824
Files changed (2) hide show
  1. app.py +33 -19
  2. app2.py +2 -1
app.py CHANGED
@@ -15,6 +15,7 @@ from shap_e.models.download import load_model, load_config
15
  from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
16
  import spaces
17
  from shap_e.models.nn.camera import DifferentiableCameraBatch, DifferentiableProjectiveCamera
 
18
 
19
  from src.utils.train_util import instantiate_from_config
20
  from src.utils.camera_util import (
@@ -27,15 +28,14 @@ from src.utils.mesh_util import save_obj, save_glb
27
  from src.utils.infer_util import remove_background, resize_foreground
28
 
29
  def create_custom_cameras(size: int, device: torch.device, azimuths: list, elevations: list,
30
- fov_degrees: float,distance) -> DifferentiableCameraBatch:
31
  # Object is in a 2x2x2 bounding box (-1 to 1 in each dimension)
32
- object_diagonal = distance # Correct diagonal calculation for the cube
33
 
34
  # Calculate radius based on object size and FOV
35
  fov_radians = math.radians(fov_degrees)
36
  radius = (object_diagonal / 2) / math.tan(fov_radians / 2) # Correct radius calculation
37
- # print(radius)
38
- # exit(0)
39
  origins = []
40
  xs = []
41
  ys = []
@@ -75,8 +75,6 @@ def create_custom_cameras(size: int, device: torch.device, azimuths: list, eleva
75
  ),
76
  )
77
 
78
-
79
- @spaces.GPU(duration=60)
80
  def load_models():
81
  """Initialize and load all required models"""
82
  config = OmegaConf.load('configs/instant-nerf-large-best.yaml')
@@ -231,15 +229,22 @@ def create_mesh(refined_image, model, infer_config):
231
 
232
  class ShapERenderer:
233
  def __init__(self, device):
234
- print("Loading Shap-E models...")
235
  self.device = device
236
- self.xm = load_model('transmitter', device=device)
237
- self.model = load_model('text300M', device=device)
238
- self.diffusion = diffusion_from_config(load_config('diffusion'))
239
- print("Shap-E models loaded!")
 
 
 
 
 
 
240
 
241
- @spaces.GPU(duration=60)
242
  def generate_views(self, prompt, guidance_scale=15.0, num_steps=64):
 
 
243
  # Generate latents using the text-to-3D model
244
  batch_size = 1
245
  guidance_scale = float(guidance_scale)
@@ -272,13 +277,13 @@ class ShapERenderer:
272
  rendered_image = decode_latent_images(
273
  self.xm,
274
  latents[0],
275
- rendering_mode='stf',
276
- cameras=cameras
277
  )
278
- images.append(rendered_image.detach().cpu().numpy())
279
 
280
  # Convert images to uint8
281
- images = [(image).astype(np.uint8) for image in images]
282
 
283
  # Create 2x3 grid layout (640x960) instead of 3x2 (960x640)
284
  layout = np.zeros((960, 640, 3), dtype=np.uint8)
@@ -292,12 +297,19 @@ class ShapERenderer:
292
  class RefinerInterface:
293
  def __init__(self):
294
  print("Initializing InstantMesh models...")
295
- self.pipeline, self.model, self.infer_config = load_models()
296
- print("InstantMesh models loaded!")
 
 
 
 
 
 
297
 
298
- @spaces.GPU(duration=65)
299
  def refine_model(self, input_image, prompt, steps=75, guidance_scale=7.5):
300
  """Main refinement function"""
 
 
301
  # Process image and get refined output
302
  input_image = Image.fromarray(input_image)
303
 
@@ -434,11 +446,13 @@ def create_demo():
434
  )
435
 
436
  # Set up event handlers
 
437
  def generate(prompt, guidance_scale, num_steps):
438
  with torch.no_grad():
439
  layout, _ = shap_e.generate_views(prompt, guidance_scale, num_steps)
440
  return layout
441
 
 
442
  def refine(input_image, prompt, steps, guidance_scale):
443
  refined_img, mesh_path = refiner.refine_model(
444
  input_image,
 
15
  from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
16
  import spaces
17
  from shap_e.models.nn.camera import DifferentiableCameraBatch, DifferentiableProjectiveCamera
18
+ import math
19
 
20
  from src.utils.train_util import instantiate_from_config
21
  from src.utils.camera_util import (
 
28
  from src.utils.infer_util import remove_background, resize_foreground
29
 
30
  def create_custom_cameras(size: int, device: torch.device, azimuths: list, elevations: list,
31
+ fov_degrees: float, distance: float) -> DifferentiableCameraBatch:
32
  # Object is in a 2x2x2 bounding box (-1 to 1 in each dimension)
33
+ object_diagonal = distance # Correct diagonal calculation for the cube
34
 
35
  # Calculate radius based on object size and FOV
36
  fov_radians = math.radians(fov_degrees)
37
  radius = (object_diagonal / 2) / math.tan(fov_radians / 2) # Correct radius calculation
38
+
 
39
  origins = []
40
  xs = []
41
  ys = []
 
75
  ),
76
  )
77
 
 
 
78
  def load_models():
79
  """Initialize and load all required models"""
80
  config = OmegaConf.load('configs/instant-nerf-large-best.yaml')
 
229
 
230
  class ShapERenderer:
231
  def __init__(self, device):
232
+ print("Initializing Shap-E models...")
233
  self.device = device
234
+ self.xm = None
235
+ self.model = None
236
+ self.diffusion = None
237
+ print("Shap-E models initialized!")
238
+
239
+ def ensure_models_loaded(self):
240
+ if self.model is None:
241
+ self.xm = load_model('transmitter', device=self.device)
242
+ self.model = load_model('text300M', device=self.device)
243
+ self.diffusion = diffusion_from_config(load_config('diffusion'))
244
 
 
245
  def generate_views(self, prompt, guidance_scale=15.0, num_steps=64):
246
+ self.ensure_models_loaded()
247
+
248
  # Generate latents using the text-to-3D model
249
  batch_size = 1
250
  guidance_scale = float(guidance_scale)
 
277
  rendered_image = decode_latent_images(
278
  self.xm,
279
  latents[0],
280
+ cameras=cameras,
281
+ rendering_mode='stf'
282
  )
283
+ images.append(rendered_image[0])
284
 
285
  # Convert images to uint8
286
+ images = [np.array(image) for image in images]
287
 
288
  # Create 2x3 grid layout (640x960) instead of 3x2 (960x640)
289
  layout = np.zeros((960, 640, 3), dtype=np.uint8)
 
297
  class RefinerInterface:
298
  def __init__(self):
299
  print("Initializing InstantMesh models...")
300
+ self.pipeline = None
301
+ self.model = None
302
+ self.infer_config = None
303
+ print("InstantMesh models initialized!")
304
+
305
+ def ensure_models_loaded(self):
306
+ if self.pipeline is None:
307
+ self.pipeline, self.model, self.infer_config = load_models()
308
 
 
309
  def refine_model(self, input_image, prompt, steps=75, guidance_scale=7.5):
310
  """Main refinement function"""
311
+ self.ensure_models_loaded()
312
+
313
  # Process image and get refined output
314
  input_image = Image.fromarray(input_image)
315
 
 
446
  )
447
 
448
  # Set up event handlers
449
+ @spaces.GPU(duration=60)
450
  def generate(prompt, guidance_scale, num_steps):
451
  with torch.no_grad():
452
  layout, _ = shap_e.generate_views(prompt, guidance_scale, num_steps)
453
  return layout
454
 
455
+ @spaces.GPU(duration=60)
456
  def refine(input_image, prompt, steps, guidance_scale):
457
  refined_img, mesh_path = refiner.refine_model(
458
  input_image,
app2.py CHANGED
@@ -12,7 +12,8 @@ from einops import rearrange
12
  from shap_e.diffusion.sample import sample_latents
13
  from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
14
  from shap_e.models.download import load_model, load_config
15
- from shap_e.util.notebooks import create_pan_cameras, decode_latent_images, create_custom_cameras
 
16
 
17
  from src.utils.train_util import instantiate_from_config
18
  from src.utils.camera_util import (
 
12
  from shap_e.diffusion.sample import sample_latents
13
  from shap_e.diffusion.gaussian_diffusion import diffusion_from_config
14
  from shap_e.models.download import load_model, load_config
15
+ from shap_e.util.notebooks import create_pan_cameras, decode_latent_images
16
+ from util import create_custom_cameras
17
 
18
  from src.utils.train_util import instantiate_from_config
19
  from src.utils.camera_util import (