feat(method): support fp16
Browse files
ImageReward/ImageReward.py
CHANGED
@@ -81,6 +81,12 @@ class ImageReward(nn.Module):
|
|
81 |
self.mean = 0.16717362830052426
|
82 |
self.std = 1.0333394966054072
|
83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
def score_gard(self, prompt_ids, prompt_attention_mask, image):
|
85 |
|
86 |
image_embeds = self.blip.visual_encoder(image)
|
|
|
81 |
self.mean = 0.16717362830052426
|
82 |
self.std = 1.0333394966054072
|
83 |
|
84 |
+
def text_tokenizer(self, prompt):
|
85 |
+
# text encode
|
86 |
+
text_input = self.blip.tokenizer(prompt, padding='max_length', truncation=True, max_length=35,
|
87 |
+
return_tensors="pt").to(self.device)
|
88 |
+
return text_input
|
89 |
+
|
90 |
def score_gard(self, prompt_ids, prompt_attention_mask, image):
|
91 |
|
92 |
image_embeds = self.blip.visual_encoder(image)
|
README.md
CHANGED
@@ -215,12 +215,18 @@ python svgdreamer.py x=ink "prompt='Big Wild Goose Pagoda. ink style. Minimalist
|
|
215 |
|
216 |
- I highly recommend turning on xformer `enable_xformers=True` to speed up optimization.
|
217 |
- `x.vpsd.t_schedule` greatly affects the style of the result. Please try more.
|
218 |
-
- `neg_prompt` negative prompts affect the quality of the results
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
## ๐ TODO
|
221 |
|
222 |
-
- [x] Release the code
|
223 |
-
- [x] Add docker image
|
|
|
224 |
|
225 |
## :books: Acknowledgement
|
226 |
|
|
|
215 |
|
216 |
- I highly recommend turning on xformer `enable_xformers=True` to speed up optimization.
|
217 |
- `x.vpsd.t_schedule` greatly affects the style of the result. Please try more.
|
218 |
+
- `neg_prompt` negative prompts affect the quality of the results
|
219 |
+
- By setting `state.mprec='fp16'`, you can significantly reduce GPU memory usage.
|
220 |
+
|
221 |
+
```shell
|
222 |
+
CUDA_VISIBLE_DEVICES=0 python svgdreamer.py x=iconography skip_sive=True "prompt='Sydney opera house. oil painting. by Van Gogh'" result_path='./logs-reward/Sydney-reward' state.mprec='fp16' x.vpsd.phi_ReFL=True multirun=True srange='[10,12]'
|
223 |
+
```
|
224 |
|
225 |
## ๐ TODO
|
226 |
|
227 |
+
- [x] Release the code.
|
228 |
+
- [x] Add docker image.
|
229 |
+
- [x] Support fp16 optimization.
|
230 |
|
231 |
## :books: Acknowledgement
|
232 |
|
svgdreamer/painter/VPSD_pipeline.py
CHANGED
@@ -24,7 +24,12 @@ from svgdreamer.diffusers_warp import init_StableDiffusion_pipeline, init_diffus
|
|
24 |
|
25 |
class VectorizedParticleSDSPipeline(torch.nn.Module):
|
26 |
|
27 |
-
def __init__(self,
|
|
|
|
|
|
|
|
|
|
|
28 |
super().__init__()
|
29 |
self.device = device
|
30 |
assert guidance_cfg.n_particle >= guidance_cfg.vsd_n_particle
|
@@ -32,7 +37,7 @@ class VectorizedParticleSDSPipeline(torch.nn.Module):
|
|
32 |
|
33 |
pipe_kwargs = {
|
34 |
"device": self.device,
|
35 |
-
"torch_dtype": torch.float32,
|
36 |
"local_files_only": not diffuser_cfg.download,
|
37 |
"force_download": diffuser_cfg.force_download,
|
38 |
"resume_download": diffuser_cfg.resume_download,
|
@@ -569,7 +574,7 @@ class VectorizedParticleSDSPipeline(torch.nn.Module):
|
|
569 |
noise_pred_est = self.get_noise_map(noise_pred_est, self.guidance_scale_lora, use_cfg=False)
|
570 |
|
571 |
# w(t), sigma_t^2
|
572 |
-
w = (1 - self.alphas[self.t])
|
573 |
grad = grad_scale * w * (noise_pred_pretrain - noise_pred_est.detach())
|
574 |
grad = torch.nan_to_num(grad)
|
575 |
|
|
|
24 |
|
25 |
class VectorizedParticleSDSPipeline(torch.nn.Module):
|
26 |
|
27 |
+
def __init__(self,
|
28 |
+
model_cfg: DictConfig,
|
29 |
+
diffuser_cfg: DictConfig,
|
30 |
+
guidance_cfg: DictConfig,
|
31 |
+
device: torch.device,
|
32 |
+
dtype):
|
33 |
super().__init__()
|
34 |
self.device = device
|
35 |
assert guidance_cfg.n_particle >= guidance_cfg.vsd_n_particle
|
|
|
37 |
|
38 |
pipe_kwargs = {
|
39 |
"device": self.device,
|
40 |
+
"torch_dtype": torch.float16 if dtype == 'fp16' else torch.float32,
|
41 |
"local_files_only": not diffuser_cfg.download,
|
42 |
"force_download": diffuser_cfg.force_download,
|
43 |
"resume_download": diffuser_cfg.resume_download,
|
|
|
574 |
noise_pred_est = self.get_noise_map(noise_pred_est, self.guidance_scale_lora, use_cfg=False)
|
575 |
|
576 |
# w(t), sigma_t^2
|
577 |
+
w = (1 - self.alphas[self.t]).to(pred_rgb.dtype)
|
578 |
grad = grad_scale * w * (noise_pred_pretrain - noise_pred_est.detach())
|
579 |
grad = torch.nan_to_num(grad)
|
580 |
|
svgdreamer/pipelines/SVGDreamer_pipeline.py
CHANGED
@@ -445,7 +445,8 @@ class SVGDreamerPipeline(ModelState):
|
|
445 |
path_reinit = self.x_cfg.path_reinit
|
446 |
|
447 |
# init VPSD
|
448 |
-
pipeline = VectorizedParticleSDSPipeline(vpsd_model_cfg, self.args.diffuser, guidance_cfg,
|
|
|
449 |
# init reward model
|
450 |
reward_model = None
|
451 |
if guidance_cfg.phi_ReFL:
|
@@ -522,7 +523,7 @@ class SVGDreamerPipeline(ModelState):
|
|
522 |
self.frame_idx += 1
|
523 |
|
524 |
L_guide, grad, latents, t_step = pipeline.variational_score_distillation(
|
525 |
-
raster_imgs,
|
526 |
self.step,
|
527 |
prompt=[text_prompt],
|
528 |
negative_prompt=self.args.neg_prompt,
|
|
|
445 |
path_reinit = self.x_cfg.path_reinit
|
446 |
|
447 |
# init VPSD
|
448 |
+
pipeline = VectorizedParticleSDSPipeline(vpsd_model_cfg, self.args.diffuser, guidance_cfg,
|
449 |
+
self.device, self.args.state.mprec)
|
450 |
# init reward model
|
451 |
reward_model = None
|
452 |
if guidance_cfg.phi_ReFL:
|
|
|
523 |
self.frame_idx += 1
|
524 |
|
525 |
L_guide, grad, latents, t_step = pipeline.variational_score_distillation(
|
526 |
+
raster_imgs.to(self.weight_dtype),
|
527 |
self.step,
|
528 |
prompt=[text_prompt],
|
529 |
negative_prompt=self.args.neg_prompt,
|