feat(method): support fp16

Files changed (4) hide show

ImageReward/ImageReward.py CHANGED Viewed

@@ -81,6 +81,12 @@ class ImageReward(nn.Module):
         self.mean = 0.16717362830052426
         self.std = 1.0333394966054072
     def score_gard(self, prompt_ids, prompt_attention_mask, image):
         image_embeds = self.blip.visual_encoder(image)

         self.mean = 0.16717362830052426
         self.std = 1.0333394966054072
+    def text_tokenizer(self, prompt):
+        # text encode
+        text_input = self.blip.tokenizer(prompt, padding='max_length', truncation=True, max_length=35,
+                                         return_tensors="pt").to(self.device)
+        return text_input
     def score_gard(self, prompt_ids, prompt_attention_mask, image):
         image_embeds = self.blip.visual_encoder(image)

README.md CHANGED Viewed

@@ -215,12 +215,18 @@ python svgdreamer.py x=ink "prompt='Big Wild Goose Pagoda. ink style. Minimalist
 - I highly recommend turning on xformer `enable_xformers=True` to speed up optimization.
 - `x.vpsd.t_schedule` greatly affects the style of the result. Please try more.
-- `neg_prompt` negative prompts affect the quality of the results.
 ## 📋 TODO
-- [x] Release the code
-- [x] Add docker image
 ## :books: Acknowledgement

 - I highly recommend turning on xformer `enable_xformers=True` to speed up optimization.
 - `x.vpsd.t_schedule` greatly affects the style of the result. Please try more.
+- `neg_prompt` negative prompts affect the quality of the results
+- By setting `state.mprec='fp16'`, you can significantly reduce GPU memory usage.
+```shell
+CUDA_VISIBLE_DEVICES=0 python svgdreamer.py x=iconography skip_sive=True "prompt='Sydney opera house. oil painting. by Van Gogh'" result_path='./logs-reward/Sydney-reward' state.mprec='fp16' x.vpsd.phi_ReFL=True multirun=True srange='[10,12]'
+```
 ## 📋 TODO
+- [x] Release the code.
+- [x] Add docker image.
+- [x] Support fp16 optimization.
 ## :books: Acknowledgement

svgdreamer/painter/VPSD_pipeline.py CHANGED Viewed

@@ -24,7 +24,12 @@ from svgdreamer.diffusers_warp import init_StableDiffusion_pipeline, init_diffus
 class VectorizedParticleSDSPipeline(torch.nn.Module):
-    def __init__(self, model_cfg: DictConfig, diffuser_cfg: DictConfig, guidance_cfg: DictConfig, device: torch.device):
         super().__init__()
         self.device = device
         assert guidance_cfg.n_particle >= guidance_cfg.vsd_n_particle
@@ -32,7 +37,7 @@ class VectorizedParticleSDSPipeline(torch.nn.Module):
         pipe_kwargs = {
             "device": self.device,
-            "torch_dtype": torch.float32,
             "local_files_only": not diffuser_cfg.download,
             "force_download": diffuser_cfg.force_download,
             "resume_download": diffuser_cfg.resume_download,
@@ -569,7 +574,7 @@ class VectorizedParticleSDSPipeline(torch.nn.Module):
         noise_pred_est = self.get_noise_map(noise_pred_est, self.guidance_scale_lora, use_cfg=False)
         # w(t), sigma_t^2
-        w = (1 - self.alphas[self.t])
         grad = grad_scale * w * (noise_pred_pretrain - noise_pred_est.detach())
         grad = torch.nan_to_num(grad)

 class VectorizedParticleSDSPipeline(torch.nn.Module):
+    def __init__(self,
+                 model_cfg: DictConfig,
+                 diffuser_cfg: DictConfig,
+                 guidance_cfg: DictConfig,
+                 device: torch.device,
+                 dtype):
         super().__init__()
         self.device = device
         assert guidance_cfg.n_particle >= guidance_cfg.vsd_n_particle
         pipe_kwargs = {
             "device": self.device,
+            "torch_dtype": torch.float16 if dtype == 'fp16' else torch.float32,
             "local_files_only": not diffuser_cfg.download,
             "force_download": diffuser_cfg.force_download,
             "resume_download": diffuser_cfg.resume_download,
         noise_pred_est = self.get_noise_map(noise_pred_est, self.guidance_scale_lora, use_cfg=False)
         # w(t), sigma_t^2
+        w = (1 - self.alphas[self.t]).to(pred_rgb.dtype)
         grad = grad_scale * w * (noise_pred_pretrain - noise_pred_est.detach())
         grad = torch.nan_to_num(grad)

svgdreamer/pipelines/SVGDreamer_pipeline.py CHANGED Viewed

@@ -445,7 +445,8 @@ class SVGDreamerPipeline(ModelState):
         path_reinit = self.x_cfg.path_reinit
         # init VPSD
-        pipeline = VectorizedParticleSDSPipeline(vpsd_model_cfg, self.args.diffuser, guidance_cfg, self.device)
         # init reward model
         reward_model = None
         if guidance_cfg.phi_ReFL:
@@ -522,7 +523,7 @@ class SVGDreamerPipeline(ModelState):
                     self.frame_idx += 1
                 L_guide, grad, latents, t_step = pipeline.variational_score_distillation(
-                    raster_imgs,
                     self.step,
                     prompt=[text_prompt],
                     negative_prompt=self.args.neg_prompt,

         path_reinit = self.x_cfg.path_reinit
         # init VPSD
+        pipeline = VectorizedParticleSDSPipeline(vpsd_model_cfg, self.args.diffuser, guidance_cfg,
+                                                 self.device, self.args.state.mprec)
         # init reward model
         reward_model = None
         if guidance_cfg.phi_ReFL:
                     self.frame_idx += 1
                 L_guide, grad, latents, t_step = pipeline.variational_score_distillation(
+                    raster_imgs.to(self.weight_dtype),
                     self.step,
                     prompt=[text_prompt],
                     negative_prompt=self.args.neg_prompt,