fix(method): if attn_map is None

Browse files

Files changed (6) hide show

Examples.md +7 -1
README.md +1 -1
conf/x/iconography.yaml +2 -2
svgdreamer/painter/__init__.py +2 -2
svgdreamer/pipelines/SVGDreamer_pipeline.py +73 -40
svgdreamer/token2attn/attn_control.py +1 -1

Examples.md CHANGED Viewed

@@ -160,4 +160,10 @@ expressive eyes. <br/>
 ````shell
 python svgdreamer.py x=painting "prompt='self portrait of Van Gogh. oil painting. cmyk portrait. multi colored. defiant and beautiful. cmyk. expressive eyes.'" x.num_paths=256 result_path='./logs/VanGogh-Portrait'
-````

 ````shell
 python svgdreamer.py x=painting "prompt='self portrait of Van Gogh. oil painting. cmyk portrait. multi colored. defiant and beautiful. cmyk. expressive eyes.'" x.num_paths=256 result_path='./logs/VanGogh-Portrait'
+````
+### Case: planet Saturn
+```shell
+python svgdreamer.py x=iconography-s1 skip_sive=False "prompt='An icon of the planet Saturn. minimal flat 2D vector icon. plain color background. trending on ArtStation.'" token_ind=6 x.sive.bg.num_iter=50 x.sive.fg.num_iter=50 x.vpsd.t_schedule='randint' result_path='./logs/Saturn' multirun=True state.mprec='fp16
+```

README.md CHANGED Viewed

@@ -80,7 +80,7 @@ realistic <br/>
 **Script:**
 ```shell
-python svgdreamer.py x=iconography skip_sive=False "prompt='an image of Batman. full body action pose, complete detailed body. white background. empty background, high quality, 4K, ultra realistic'" token_ind=4 x.sive.bg.num_iter=10 x.sive.fg.num_iter=10 x.vpsd.t_schedule='randint' result_path='./logs/batman' multirun=True
 ```
 🔹Parameter:

 **Script:**
 ```shell
+python svgdreamer.py x=iconography skip_sive=False "prompt='an image of Batman. full body action pose, complete detailed body. white background. empty background, high quality, 4K, ultra realistic'" token_ind=4 x.vpsd.t_schedule='randint' result_path='./logs/batman' multirun=True
 ```
 🔹Parameter:

conf/x/iconography.yaml CHANGED Viewed

@@ -41,7 +41,7 @@ sive:
   mask_tau: 0.3 # the threshold used to convert the attention map into a mask
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
-    num_iter: 10
     num_paths: 256
     path_schedule: 'repeat' # 'repeat', 'list'
     schedule_each: 128
@@ -61,7 +61,7 @@ sive:
     xing_loss_weight: 0.001
   fg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
-    num_iter: 10
     num_paths: 256 # number of strokes
     path_schedule: 'repeat' # 'repeat', 'list'
     schedule_each: 128

   mask_tau: 0.3 # the threshold used to convert the attention map into a mask
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
+    num_iter: 50
     num_paths: 256
     path_schedule: 'repeat' # 'repeat', 'list'
     schedule_each: 128
     xing_loss_weight: 0.001
   fg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
+    num_iter: 50
     num_paths: 256 # number of strokes
     path_schedule: 'repeat' # 'repeat', 'list'
     schedule_each: 128

svgdreamer/painter/__init__.py CHANGED Viewed

@@ -2,8 +2,8 @@
 # Copyright (c) XiMing Xing. All rights reserved.
 # Description:
-from .painter_params import (
-    Painter, PainterOptimizer, CosineWithWarmupLRLambda, RandomCoordInit, NaiveCoordInit, SparseCoordInit, get_sdf)
 from .component_painter_params import CompPainter, CompPainterOptimizer
 from .loss import xing_loss_fn
 from .VPSD_pipeline import VectorizedParticleSDSPipeline

 # Copyright (c) XiMing Xing. All rights reserved.
 # Description:
+from .painter_params import Painter, PainterOptimizer, CosineWithWarmupLRLambda, RandomCoordInit, NaiveCoordInit, \
+    SparseCoordInit, get_sdf
 from .component_painter_params import CompPainter, CompPainterOptimizer
 from .loss import xing_loss_fn
 from .VPSD_pipeline import VectorizedParticleSDSPipeline

svgdreamer/pipelines/SVGDreamer_pipeline.py CHANGED Viewed

@@ -20,8 +20,8 @@ from torchvision import transforms
 from skimage.color import rgb2gray
 from svgdreamer.libs import ModelState, get_optimizer
-from svgdreamer.painter import (CompPainter, CompPainterOptimizer, xing_loss_fn, Painter, PainterOptimizer,
-                                CosineWithWarmupLRLambda, VectorizedParticleSDSPipeline, DiffusionPipeline)
 from svgdreamer.token2attn.attn_control import EmptyControl, AttentionStore
 from svgdreamer.token2attn.ptp_utils import view_images
 from svgdreamer.utils.plot import plot_img, plot_couple, plot_attn, save_image
@@ -38,8 +38,10 @@ class SVGDreamerPipeline(ModelState):
         # assert
         assert args.x.style in ["iconography", "pixelart", "low-poly", "painting", "sketch", "ink"]
         args.skip_sive = True if args.x.style in ["pixelart", "low-poly"] else args.skip_sive
-        assert args.x.vpsd.n_particle >= args.x.vpsd.vsd_n_particle
-        assert args.x.vpsd.n_particle >= args.x.vpsd.phi_n_particle
         assert args.x.vpsd.n_phi_sample >= 1
         logdir_ = f"sd{args.seed}" \
@@ -123,15 +125,26 @@ class SVGDreamerPipeline(ModelState):
         self.close(msg="painterly rendering complete.")
     def SIVE_stage(self, text_prompt: str):
-        # init diffusion model
         pipeline = DiffusionPipeline(self.x_cfg.sive_model_cfg, self.args.diffuser, self.device)
         merged_svg_paths = []
         merged_images = []
-        for i in range(self.vpsd_cfg.n_particle):
-            select_sample_path = self.result_path / f'select_sample_{i}.png'
-            # generate sample and attention map
-            fg_attn_map, bg_attn_map, controller = self.extract_ldm_attn(i,
                                                                          self.x_cfg.sive_model_cfg,
                                                                          pipeline,
                                                                          text_prompt,
@@ -139,18 +152,18 @@ class SVGDreamerPipeline(ModelState):
                                                                          self.sive_cfg.attn_cfg,
                                                                          self.im_size,
                                                                          self.args.token_ind)
-            # load selected file
             select_img = self.target_file_preprocess(select_sample_path.as_posix())
             self.print(f"load target file from: {select_sample_path.as_posix()}")
-            # get objects by attention map
-            fg_img, bg_img, fg_mask, bg_mask = self.extract_object(i, select_img, fg_attn_map, bg_attn_map,
                                                                    tau=self.sive_cfg.mask_tau)
-            self.print(f"fg_img shape: {fg_img.shape}, bg_img: {bg_img.shape}")
-            # background rendering
-            self.print(f"-> background rendering: ")
-            bg_render_path = self.component_rendering(tag=f'{i}_bg',
                                                       prompt=text_prompt,
                                                       target_img=bg_img,
                                                       mask=bg_mask,
@@ -160,9 +173,14 @@ class SVGDreamerPipeline(ModelState):
                                                       optim_cfg=self.sive_optim,
                                                       log_png_dir=self.bg_png_logs_dir,
                                                       log_svg_dir=self.bg_svg_logs_dir)
-            # foreground rendering
-            self.print(f"-> foreground rendering: ")
-            fg_render_path = self.component_rendering(tag=f'{i}_fg',
                                                       prompt=text_prompt,
                                                       target_img=fg_img,
                                                       mask=fg_mask,
@@ -172,8 +190,16 @@ class SVGDreamerPipeline(ModelState):
                                                       optim_cfg=self.sive_optim,
                                                       log_png_dir=self.fg_png_logs_dir,
                                                       log_svg_dir=self.fg_svg_logs_dir)
-            # merge foreground and background
-            merged_svg_path = self.result_path / f'SIVE_render_final_{i}.svg'
             merge_svg_files(
                 svg_path_1=bg_render_path,
                 svg_path_2=fg_render_path,
@@ -182,11 +208,11 @@ class SVGDreamerPipeline(ModelState):
                 out_size=(self.im_size, self.im_size)
             )
-            # foreground and background refinement
             # Note: you are not allowed to add further paths here
             if self.sive_cfg.tog.reinit:
-                self.print("-> enable vector graphic refinement:")
-                merged_svg_path = self.refine_rendering(tag=f'{i}_refine',
                                                         prompt=text_prompt,
                                                         target_img=select_img,
                                                         canvas_size=(self.im_size, self.im_size),
@@ -194,22 +220,21 @@ class SVGDreamerPipeline(ModelState):
                                                         optim_cfg=self.sive_optim,
                                                         init_svg_path=merged_svg_path)
-            # svg-to-png, to tensor
-            merged_png_path = self.result_path / f'SIVE_render_final_{i}.png'
             cairosvg.svg2png(url=merged_svg_path.as_posix(), write_to=merged_png_path.as_posix())
-            # collect paths
-            merged_svg_paths.append(merged_svg_path)
             merged_images.append(self.target_file_preprocess(merged_png_path))
-            # empty attention record
             controller.reset()
-            self.print(f"Vector Particle {i} Rendering End...\n")
-        # free the VRAM
         del pipeline
         torch.cuda.empty_cache()
-        # update paths
         self.x_cfg.num_paths = self.sive_cfg.bg.num_paths + self.sive_cfg.fg.num_paths
         return merged_svg_paths, merged_images
@@ -257,6 +282,9 @@ class SVGDreamerPipeline(ModelState):
         if attention_map is not None:
             # init fist control points by attention_map
             attn_thresh, select_inds = renderer.attn_init_points(num_paths=sum(path_schedule), mask=mask)
             # log attention, just once
             plot_attn(attention_map, attn_thresh, target_img, select_inds,
                       (self.sive_attn_dir / f"attention_{tag}_map.jpg").as_posix())
@@ -381,14 +409,16 @@ class SVGDreamerPipeline(ModelState):
         plot_img(img, self.refine_dir, fname=f"{tag}_before_refined")
         n_iter = render_cfg.num_iter
         # build painter optimizer
         optimizer = CompPainterOptimizer(content_renderer, self.style, n_iter, optim_cfg)
         # init optimizer
         optimizer.init_optimizers()
-        print(f"=> n_point: {len(content_renderer.get_point_params())}, "
-              f"n_width: {len(content_renderer.get_width_params())}, "
-              f"n_color: {len(content_renderer.get_color_params())}")
         step = 0
         with tqdm(initial=step, total=n_iter, disable=not self.accelerator.is_main_process) as pbar:
@@ -434,7 +464,8 @@ class SVGDreamerPipeline(ModelState):
                    text_prompt: AnyStr,
                    init_svg_path: Union[List[AnyPath], AnyPath] = None,
                    init_image: Union[List[torch.Tensor], torch.Tensor] = None):
-        if not self.vpsd_cfg.use:
             return
         # for convenience
@@ -784,10 +815,12 @@ class SVGDreamerPipeline(ModelState):
                                   generator=self.g_device)
         outputs_np = [np.array(img) for img in outputs.images]
         view_images(outputs_np, save_image=True, fp=gen_sample_path)
-        self.print(f"select_sample shape: {outputs_np[0].shape}")
         if attn_init:
-            """ldm cross-attention map"""
             cross_attention_maps, tokens = \
                 pipeline.get_cross_attention([prompts],
                                              controller,
@@ -862,7 +895,7 @@ class SVGDreamerPipeline(ModelState):
             view_images(reversed_attn_map_vis, save_image=True,
                         fp=self.sive_attn_dir / f'reversed-fusion-attn-{iter}.png')
-            self.print(f"-> fusion attn_map: {attn_map.shape}")
         else:
             attn_map = None
             inverse_attn = None

 from skimage.color import rgb2gray
 from svgdreamer.libs import ModelState, get_optimizer
+from svgdreamer.painter import CompPainter, CompPainterOptimizer, xing_loss_fn, Painter, PainterOptimizer, \
+    CosineWithWarmupLRLambda, VectorizedParticleSDSPipeline, DiffusionPipeline
 from svgdreamer.token2attn.attn_control import EmptyControl, AttentionStore
 from svgdreamer.token2attn.ptp_utils import view_images
 from svgdreamer.utils.plot import plot_img, plot_couple, plot_attn, save_image
         # assert
         assert args.x.style in ["iconography", "pixelart", "low-poly", "painting", "sketch", "ink"]
         args.skip_sive = True if args.x.style in ["pixelart", "low-poly"] else args.skip_sive
+        # assert args.x.vpsd.n_particle >= args.x.vpsd.vsd_n_particle
+        if args.x.vpsd.vsd_n_particle > args.x.vpsd.n_particle: args.x.vpsd.vsd_n_particle = args.x.vpsd.n_particle
+        # assert args.x.vpsd.n_particle >= args.x.vpsd.phi_n_particle
+        if args.x.vpsd.phi_n_particle > args.x.vpsd.n_particle: args.x.vpsd.phi_n_particle = args.x.vpsd.n_particle
         assert args.x.vpsd.n_phi_sample >= 1
         logdir_ = f"sd{args.seed}" \
         self.close(msg="painterly rendering complete.")
     def SIVE_stage(self, text_prompt: str):
+        # Init diffusion model
         pipeline = DiffusionPipeline(self.x_cfg.sive_model_cfg, self.args.diffuser, self.device)
         merged_svg_paths = []
         merged_images = []
+        successful_particles = 0
+        cur_idx = 0
+        while successful_particles < self.vpsd_cfg.n_particle:
+            if cur_idx >= self.vpsd_cfg.n_particle + 10:  # max attempts
+                self.print(f"Reached maximum attempts ({cur_idx}). "
+                           f"Only processed {successful_particles} particles successfully.")
+                break
+            self.print(f"Processing particle {cur_idx} "
+                       f"(successful so far: {successful_particles}/{self.vpsd_cfg.n_particle})")
+            select_sample_path = self.result_path / f'select_sample_{cur_idx}.png'
+            # Generate sample and attention map
+            fg_attn_map, bg_attn_map, controller = self.extract_ldm_attn(cur_idx,
                                                                          self.x_cfg.sive_model_cfg,
                                                                          pipeline,
                                                                          text_prompt,
                                                                          self.sive_cfg.attn_cfg,
                                                                          self.im_size,
                                                                          self.args.token_ind)
+            # Load selected file
             select_img = self.target_file_preprocess(select_sample_path.as_posix())
             self.print(f"load target file from: {select_sample_path.as_posix()}")
+            # Get objects by attention map
+            fg_img, bg_img, fg_mask, bg_mask = self.extract_object(cur_idx, select_img, fg_attn_map, bg_attn_map,
                                                                    tau=self.sive_cfg.mask_tau)
+            # self.print(f"fg_img shape: {fg_img.shape}, bg_img: {bg_img.shape}")
+            # Background rendering
+            self.print(f"-> Background rendering: ")
+            bg_render_path = self.component_rendering(tag=f'{cur_idx}_bg',
                                                       prompt=text_prompt,
                                                       target_img=bg_img,
                                                       mask=bg_mask,
                                                       optim_cfg=self.sive_optim,
                                                       log_png_dir=self.bg_png_logs_dir,
                                                       log_svg_dir=self.bg_svg_logs_dir)
+            if bg_render_path == 0:
+                self.print(f"Background rendering failed for particle {cur_idx}, trying next particle")
+                cur_idx += 1
+                continue
+            # Foreground rendering
+            self.print(f"-> Foreground rendering: ")
+            fg_render_path = self.component_rendering(tag=f'{cur_idx}_fg',
                                                       prompt=text_prompt,
                                                       target_img=fg_img,
                                                       mask=fg_mask,
                                                       optim_cfg=self.sive_optim,
                                                       log_png_dir=self.fg_png_logs_dir,
                                                       log_svg_dir=self.fg_svg_logs_dir)
+            if fg_render_path == 0:
+                self.print(f"Foreground rendering failed for particle {cur_idx}, trying next particle")
+                cur_idx += 1
+                continue
+            successful_particles += 1
+            cur_idx += 1
+            # Merge foreground and background
+            merged_svg_path = self.result_path / f'SIVE_render_final_{cur_idx}.svg'
             merge_svg_files(
                 svg_path_1=bg_render_path,
                 svg_path_2=fg_render_path,
                 out_size=(self.im_size, self.im_size)
             )
+            # Foreground and background refinement
             # Note: you are not allowed to add further paths here
             if self.sive_cfg.tog.reinit:
+                self.print("-> Enable vector graphic refinement:")
+                merged_svg_path = self.refine_rendering(tag=f'{cur_idx}_refine',
                                                         prompt=text_prompt,
                                                         target_img=select_img,
                                                         canvas_size=(self.im_size, self.im_size),
                                                         optim_cfg=self.sive_optim,
                                                         init_svg_path=merged_svg_path)
+            # Postprocess: svg-to-png & to tensor
+            merged_png_path = self.result_path / f'SIVE_render_final_{cur_idx}.png'
             cairosvg.svg2png(url=merged_svg_path.as_posix(), write_to=merged_png_path.as_posix())
+            merged_svg_paths.append(merged_svg_path)  # collect paths
             merged_images.append(self.target_file_preprocess(merged_png_path))
+            # Clear attention recorder
             controller.reset()
+            self.print(f"Vector Particle {cur_idx} Rendering End...\n")
+        # Free the VRAM
         del pipeline
         torch.cuda.empty_cache()
+        # Update paths
         self.x_cfg.num_paths = self.sive_cfg.bg.num_paths + self.sive_cfg.fg.num_paths
         return merged_svg_paths, merged_images
         if attention_map is not None:
             # init fist control points by attention_map
             attn_thresh, select_inds = renderer.attn_init_points(num_paths=sum(path_schedule), mask=mask)
+            # Warning: attention map failure
+            if len(select_inds) == 0: return 0
             # log attention, just once
             plot_attn(attention_map, attn_thresh, target_img, select_inds,
                       (self.sive_attn_dir / f"attention_{tag}_map.jpg").as_posix())
         plot_img(img, self.refine_dir, fname=f"{tag}_before_refined")
         n_iter = render_cfg.num_iter
+        self.print(f"Total iters: {n_iter}")
         # build painter optimizer
         optimizer = CompPainterOptimizer(content_renderer, self.style, n_iter, optim_cfg)
         # init optimizer
         optimizer.init_optimizers()
+        self.print(f"=> n_point: {len(content_renderer.get_point_params())}, "
+                   f"n_width: {len(content_renderer.get_width_params())}, "
+                   f"n_color: {len(content_renderer.get_color_params())}")
         step = 0
         with tqdm(initial=step, total=n_iter, disable=not self.accelerator.is_main_process) as pbar:
                    text_prompt: AnyStr,
                    init_svg_path: Union[List[AnyPath], AnyPath] = None,
                    init_image: Union[List[torch.Tensor], torch.Tensor] = None):
+        # print(f"self.vpsd_cfg.use: {self.vpsd_cfg.use}")
+        if self.vpsd_cfg.use is False:
             return
         # for convenience
                                   generator=self.g_device)
         outputs_np = [np.array(img) for img in outputs.images]
         view_images(outputs_np, save_image=True, fp=gen_sample_path)
+        # self.print(f"select_sample shape: {outputs_np[0].shape}")
         if attn_init:
+            self.print(f"\nLDM attn-map logging:")
+            # Cross-attention map
             cross_attention_maps, tokens = \
                 pipeline.get_cross_attention([prompts],
                                              controller,
             view_images(reversed_attn_map_vis, save_image=True,
                         fp=self.sive_attn_dir / f'reversed-fusion-attn-{iter}.png')
+            self.print(f"-> fusion attn_map: {attn_map.shape} \n")
         else:
             attn_map = None
             inverse_attn = None

svgdreamer/token2attn/attn_control.py CHANGED Viewed

@@ -85,7 +85,7 @@ class AttentionStore(AttentionControl):
         self.step_store = self.get_empty_store()
     def get_average_attention(self):
-        print(f"step count: {self.cur_step}")
         average_attention = {
             key: [item / self.cur_step for item in self.attention_store[key]]
             for key in self.attention_store

         self.step_store = self.get_empty_store()
     def get_average_attention(self):
+        # print(f"step count: {self.cur_step}")
         average_attention = {
             key: [item / self.cur_step for item in self.attention_store[key]]
             for key in self.attention_store