xingxm commited on
Commit
18fe89d
ยท
1 Parent(s): 36ed051

feat(method): support fp16

Browse files
ImageReward/ImageReward.py CHANGED
@@ -81,6 +81,12 @@ class ImageReward(nn.Module):
81
  self.mean = 0.16717362830052426
82
  self.std = 1.0333394966054072
83
 
 
 
 
 
 
 
84
  def score_gard(self, prompt_ids, prompt_attention_mask, image):
85
 
86
  image_embeds = self.blip.visual_encoder(image)
 
81
  self.mean = 0.16717362830052426
82
  self.std = 1.0333394966054072
83
 
84
+ def text_tokenizer(self, prompt):
85
+ # text encode
86
+ text_input = self.blip.tokenizer(prompt, padding='max_length', truncation=True, max_length=35,
87
+ return_tensors="pt").to(self.device)
88
+ return text_input
89
+
90
  def score_gard(self, prompt_ids, prompt_attention_mask, image):
91
 
92
  image_embeds = self.blip.visual_encoder(image)
README.md CHANGED
@@ -215,12 +215,18 @@ python svgdreamer.py x=ink "prompt='Big Wild Goose Pagoda. ink style. Minimalist
215
 
216
  - I highly recommend turning on xformer `enable_xformers=True` to speed up optimization.
217
  - `x.vpsd.t_schedule` greatly affects the style of the result. Please try more.
218
- - `neg_prompt` negative prompts affect the quality of the results.
 
 
 
 
 
219
 
220
  ## ๐Ÿ“‹ TODO
221
 
222
- - [x] Release the code
223
- - [x] Add docker image
 
224
 
225
  ## :books: Acknowledgement
226
 
 
215
 
216
  - I highly recommend turning on xformer `enable_xformers=True` to speed up optimization.
217
  - `x.vpsd.t_schedule` greatly affects the style of the result. Please try more.
218
+ - `neg_prompt` negative prompts affect the quality of the results
219
+ - By setting `state.mprec='fp16'`, you can significantly reduce GPU memory usage.
220
+
221
+ ```shell
222
+ CUDA_VISIBLE_DEVICES=0 python svgdreamer.py x=iconography skip_sive=True "prompt='Sydney opera house. oil painting. by Van Gogh'" result_path='./logs-reward/Sydney-reward' state.mprec='fp16' x.vpsd.phi_ReFL=True multirun=True srange='[10,12]'
223
+ ```
224
 
225
  ## ๐Ÿ“‹ TODO
226
 
227
+ - [x] Release the code.
228
+ - [x] Add docker image.
229
+ - [x] Support fp16 optimization.
230
 
231
  ## :books: Acknowledgement
232
 
svgdreamer/painter/VPSD_pipeline.py CHANGED
@@ -24,7 +24,12 @@ from svgdreamer.diffusers_warp import init_StableDiffusion_pipeline, init_diffus
24
 
25
  class VectorizedParticleSDSPipeline(torch.nn.Module):
26
 
27
- def __init__(self, model_cfg: DictConfig, diffuser_cfg: DictConfig, guidance_cfg: DictConfig, device: torch.device):
 
 
 
 
 
28
  super().__init__()
29
  self.device = device
30
  assert guidance_cfg.n_particle >= guidance_cfg.vsd_n_particle
@@ -32,7 +37,7 @@ class VectorizedParticleSDSPipeline(torch.nn.Module):
32
 
33
  pipe_kwargs = {
34
  "device": self.device,
35
- "torch_dtype": torch.float32,
36
  "local_files_only": not diffuser_cfg.download,
37
  "force_download": diffuser_cfg.force_download,
38
  "resume_download": diffuser_cfg.resume_download,
@@ -569,7 +574,7 @@ class VectorizedParticleSDSPipeline(torch.nn.Module):
569
  noise_pred_est = self.get_noise_map(noise_pred_est, self.guidance_scale_lora, use_cfg=False)
570
 
571
  # w(t), sigma_t^2
572
- w = (1 - self.alphas[self.t])
573
  grad = grad_scale * w * (noise_pred_pretrain - noise_pred_est.detach())
574
  grad = torch.nan_to_num(grad)
575
 
 
24
 
25
  class VectorizedParticleSDSPipeline(torch.nn.Module):
26
 
27
+ def __init__(self,
28
+ model_cfg: DictConfig,
29
+ diffuser_cfg: DictConfig,
30
+ guidance_cfg: DictConfig,
31
+ device: torch.device,
32
+ dtype):
33
  super().__init__()
34
  self.device = device
35
  assert guidance_cfg.n_particle >= guidance_cfg.vsd_n_particle
 
37
 
38
  pipe_kwargs = {
39
  "device": self.device,
40
+ "torch_dtype": torch.float16 if dtype == 'fp16' else torch.float32,
41
  "local_files_only": not diffuser_cfg.download,
42
  "force_download": diffuser_cfg.force_download,
43
  "resume_download": diffuser_cfg.resume_download,
 
574
  noise_pred_est = self.get_noise_map(noise_pred_est, self.guidance_scale_lora, use_cfg=False)
575
 
576
  # w(t), sigma_t^2
577
+ w = (1 - self.alphas[self.t]).to(pred_rgb.dtype)
578
  grad = grad_scale * w * (noise_pred_pretrain - noise_pred_est.detach())
579
  grad = torch.nan_to_num(grad)
580
 
svgdreamer/pipelines/SVGDreamer_pipeline.py CHANGED
@@ -445,7 +445,8 @@ class SVGDreamerPipeline(ModelState):
445
  path_reinit = self.x_cfg.path_reinit
446
 
447
  # init VPSD
448
- pipeline = VectorizedParticleSDSPipeline(vpsd_model_cfg, self.args.diffuser, guidance_cfg, self.device)
 
449
  # init reward model
450
  reward_model = None
451
  if guidance_cfg.phi_ReFL:
@@ -522,7 +523,7 @@ class SVGDreamerPipeline(ModelState):
522
  self.frame_idx += 1
523
 
524
  L_guide, grad, latents, t_step = pipeline.variational_score_distillation(
525
- raster_imgs,
526
  self.step,
527
  prompt=[text_prompt],
528
  negative_prompt=self.args.neg_prompt,
 
445
  path_reinit = self.x_cfg.path_reinit
446
 
447
  # init VPSD
448
+ pipeline = VectorizedParticleSDSPipeline(vpsd_model_cfg, self.args.diffuser, guidance_cfg,
449
+ self.device, self.args.state.mprec)
450
  # init reward model
451
  reward_model = None
452
  if guidance_cfg.phi_ReFL:
 
523
  self.frame_idx += 1
524
 
525
  L_guide, grad, latents, t_step = pipeline.variational_score_distillation(
526
+ raster_imgs.to(self.weight_dtype),
527
  self.step,
528
  prompt=[text_prompt],
529
  negative_prompt=self.args.neg_prompt,