xingxm commited on Mar 23, 2024

Commit

51615ef

1 Parent(s): b70838a

feat(build): add install script and docker image

Browse files

Files changed (41) hide show

README.md +69 -11
assets/Painting-Elephant/init_p0.svg +0 -0
assets/Painting-Elephant/init_p1.svg +0 -0
assets/Painting-Elephant/init_p2.svg +0 -0
assets/Painting-Elephant/init_p3.svg +0 -0
assets/Painting-Elephant/init_p4.svg +0 -0
assets/Painting-Elephant/init_p5.svg +0 -0
assets/Painting-Elephant/p_0.svg +0 -0
assets/Painting-Elephant/p_1.svg +0 -0
assets/Painting-Elephant/p_2.svg +0 -0
assets/Painting-Elephant/p_3.svg +0 -0
assets/Painting-Elephant/p_4.svg +0 -0
assets/Painting-Elephant/p_5.svg +0 -0
assets/Pixelart-DarthVader/init_p0.svg +0 -0
assets/Pixelart-DarthVader/init_p1.svg +0 -0
assets/Pixelart-DarthVader/init_p2.svg +0 -0
assets/Pixelart-DarthVader/init_p3.svg +0 -0
assets/Pixelart-DarthVader/init_p4.svg +0 -0
assets/Pixelart-DarthVader/init_p5.svg +0 -0
assets/Pixelart-DarthVader/p0.svg +0 -0
assets/Pixelart-DarthVader/p1.svg +0 -0
assets/Pixelart-DarthVader/p2.svg +0 -0
assets/Pixelart-DarthVader/p3.svg +0 -0
assets/Pixelart-DarthVader/p4.svg +0 -0
assets/Pixelart-DarthVader/p5.svg +0 -0
assets/SIVE-astronaut-1/attn.png +0 -0
assets/SIVE-astronaut-1/final_bg.svg +0 -0
assets/SIVE-astronaut-1/final_fg.svg +0 -0
assets/SIVE-astronaut-1/init_bg.svg +134 -0
assets/SIVE-astronaut-1/init_fg.svg +134 -0
assets/SIVE-astronaut-1/result.svg +0 -0
conf/x/{iconography_s1.yaml → iconography-s1.yaml} +2 -1
conf/x/iconography.yaml +1 -6
conf/x/ink.yaml +1 -6
conf/x/lowpoly.yaml +1 -6
conf/x/painting.yaml +1 -6
conf/x/pixelart.yaml +2 -7
conf/x/sketch.yaml +1 -6
script/install.sh +47 -0
svgdreamer/painter/painter_params.py +9 -8
svgdreamer/pipelines/SVGDreamer_pipeline.py +12 -13

README.md CHANGED Viewed

@@ -3,7 +3,7 @@
 [![cvpr24](https://img.shields.io/badge/CVPR-2024-387ADF.svg)](https://arxiv.org/abs/2312.16476)
 [![arXiv](https://img.shields.io/badge/arXiv-2312.16476-b31b1b.svg)](https://arxiv.org/abs/2312.16476)
 [![website](https://img.shields.io/badge/Website-Gitpage-4CCD99)](https://ximinng.github.io/SVGDreamer-project/)
-[![blog](https://img.shields.io/badge/Blog-ENG-9195F6)](https://huggingface.co/blog/xingxm/svgdreamer)
 [![blog](https://img.shields.io/badge/Blog-CN-9195F6)](https://huggingface.co/blog/xingxm/svgdreamer)
 This repository contains our official implementation of the CVPR 2024 paper: SVGDreamer: Text-Guided SVG Generation with
@@ -20,12 +20,37 @@ Diffusion Model. It can generate high-quality SVGs based on text prompts.
   a novel text-guided vector graphics synthesis method. This method considers both the editing of vector graphics and
   the quality of the synthesis.
 ## 🔥 Quickstart
 Before running the code, download the stable diffusion model. Append `diffuser.download=True` to the end of the script.
 ### SIVE + VPSD
 **Script:**
 ```shell
@@ -52,18 +77,17 @@ python svgdreamer.py x=iconography_s1 skip_sive=False "prompt='a man in an astro
 ### VPSD
-#### Case 1
 **Prompt:** Sydney opera house. oil painting. by Van Gogh <br/>
-**Style:** iconography <br/>
 **Preview:**
 | Particle 1                                             | Particle 2                                             | Particle 3                                             | Particle 4                                             | Particle 5                                             | Particle 6                                             |
 |--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|
-| init p1                                                | init p2                                                | init p3                                                | init p4                                                | init p5                                                | init p6                                                |
 | <img src="./assets/Icon-SydneyOperaHouse/init_p0.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p1.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p2.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p3.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p4.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p5.svg"> |
 | final p1                                               | final p2                                               | final p3                                               | final p4                                               | final p5                                               | final p6                                               |
-| <img src="./assets/Icon-SydneyOperaHouse/p_0.svg">     | <img src="assets/Icon-SydneyOperaHouse/p_1.svg">       | <img src="assets/Icon-SydneyOperaHouse/p_2.svg">       | <img src="assets/Icon-SydneyOperaHouse/p_3.svg">       | <img src="assets/Icon-SydneyOperaHouse/p_4.svg">       | <img src="assets/Icon-SydneyOperaHouse/p_5.svg">       |
 **Script:**
@@ -71,19 +95,53 @@ python svgdreamer.py x=iconography_s1 skip_sive=False "prompt='a man in an astro
 python svgdreamer.py x=iconography "prompt='Sydney opera house. oil painting. by Van Gogh'" result_path='./logs/SydneyOperaHouse-OilPainting'
 ```
-**Other Styles:**
 ```shell
 # Style: low-ploy
 python svgdreamer.py x=lowpoly "prompt='A picture of a bald eagle. low-ploy. polygon'" result_path='./logs/BaldEagle'
-# Style: pixel-art
-python svgdreamer.py x=pixelart "prompt='Darth vader with lightsaber.'" result_path='./log/DarthVader'
-# Style: painting
-python svgdreamer.py x=painting "prompt='self portrait of Van Gogh. oil painting. cmyk portrait. multi colored. defiant and beautiful. cmyk. expressive eyes.'" result_path='./logs/VanGogh-Portrait'
 # Style: sketch
 python svgdreamer.py x=sketch "prompt='A free-hand drawing of A speeding Lamborghini. black and white drawing.'" result_path='./logs/Lamborghini'
 # Style: ink and wash
 python svgdreamer.py x=ink "prompt='Big Wild Goose Pagoda. ink style. Minimalist abstract art grayscale watercolor.'" result_path='./logs/BigWildGoosePagoda'
 ```
 ## 🔑 Tips
@@ -94,7 +152,7 @@ python svgdreamer.py x=ink "prompt='Big Wild Goose Pagoda. ink style. Minimalist
 ## 📋 TODO
 - [x] Release the code
-- [ ] Add docker image
 ## :books: Acknowledgement

 [![cvpr24](https://img.shields.io/badge/CVPR-2024-387ADF.svg)](https://arxiv.org/abs/2312.16476)
 [![arXiv](https://img.shields.io/badge/arXiv-2312.16476-b31b1b.svg)](https://arxiv.org/abs/2312.16476)
 [![website](https://img.shields.io/badge/Website-Gitpage-4CCD99)](https://ximinng.github.io/SVGDreamer-project/)
+[![blog](https://img.shields.io/badge/Blog-EN-9195F6)](https://huggingface.co/blog/xingxm/svgdreamer)
 [![blog](https://img.shields.io/badge/Blog-CN-9195F6)](https://huggingface.co/blog/xingxm/svgdreamer)
 This repository contains our official implementation of the CVPR 2024 paper: SVGDreamer: Text-Guided SVG Generation with
   a novel text-guided vector graphics synthesis method. This method considers both the editing of vector graphics and
   the quality of the synthesis.
+## Installation
+You can follow the steps below to quickly get up and running with SVGDreamer.
+These steps will let you run quick inference locally.
+In the top level directory run,
+```bash
+sh script/install.sh
+```
+or using docker images,
+```shell
+docker run --name svgdreamer --gpus all -it --ipc=host ximingxing/svgrender:v1 /bin/bash
+```
 ## 🔥 Quickstart
 Before running the code, download the stable diffusion model. Append `diffuser.download=True` to the end of the script.
 ### SIVE + VPSD
+**Prompt:** An image of Batman. full body action pose, complete detailed body. white background. empty background, high
+quality, 4K, ultra realistic <br/>
+**Preview:**
+| attn-map                                       | bg init                                           | fg init                                           | bg final                                           | fg final                                           | final                                            |
+|------------------------------------------------|---------------------------------------------------|---------------------------------------------------|----------------------------------------------------|----------------------------------------------------|--------------------------------------------------|
+| <img src="./assets/SIVE-astronaut-1/attn.png"> | <img src="./assets/SIVE-astronaut-1/init_bg.svg"> | <img src="./assets/SIVE-astronaut-1/init_fg.svg"> | <img src="./assets/SIVE-astronaut-1/final_bg.svg"> | <img src="./assets/SIVE-astronaut-1/final_fg.svg"> | <img src="./assets/SIVE-astronaut-1/result.svg"> |
 **Script:**
 ```shell
 ### VPSD
+#### Iconography style
 **Prompt:** Sydney opera house. oil painting. by Van Gogh <br/>
 **Preview:**
 | Particle 1                                             | Particle 2                                             | Particle 3                                             | Particle 4                                             | Particle 5                                             | Particle 6                                             |
 |--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|
+| randomly init p1                                       | randomly init p2                                       | randomly init p3                                       | randomly init p4                                       | randomly init p5                                       | randomly init p6                                       |
 | <img src="./assets/Icon-SydneyOperaHouse/init_p0.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p1.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p2.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p3.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p4.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p5.svg"> |
 | final p1                                               | final p2                                               | final p3                                               | final p4                                               | final p5                                               | final p6                                               |
+| <img src="./assets/Icon-SydneyOperaHouse/p_0.svg">     | <img src="./assets/Icon-SydneyOperaHouse/p_1.svg">     | <img src="./assets/Icon-SydneyOperaHouse/p_2.svg">     | <img src="assets/Icon-SydneyOperaHouse/p_3.svg">       | <img src="./assets/Icon-SydneyOperaHouse/p_4.svg">     | <img src="./assets/Icon-SydneyOperaHouse/p_5.svg">     |
 **Script:**
 python svgdreamer.py x=iconography "prompt='Sydney opera house. oil painting. by Van Gogh'" result_path='./logs/SydneyOperaHouse-OilPainting'
 ```
+#### Painting style
+**Prompt:** Abstract Vincent van Gogh Oil Painting Elephant, featuring earthy tones of green and brown <br/>
+**Preview:**
+| Particle 1                                         | Particle 2                                         | Particle 3                                         | Particle 4                                         | Particle 5                                         | Particle 6                                         |
+|----------------------------------------------------|----------------------------------------------------|----------------------------------------------------|----------------------------------------------------|----------------------------------------------------|----------------------------------------------------|
+| randomly init p1                                   | randomly init p2                                   | randomly init p3                                   | randomly init p4                                   | randomly init p5                                   | randomly init p6                                   |
+| <img src="./assets/Painting-Elephant/init_p0.svg"> | <img src="./assets/Painting-Elephant/init_p1.svg"> | <img src="./assets/Painting-Elephant/init_p2.svg"> | <img src="./assets/Painting-Elephant/init_p3.svg"> | <img src="./assets/Painting-Elephant/init_p4.svg"> | <img src="./assets/Painting-Elephant/init_p5.svg"> |
+| final p1                                           | final p2                                           | final p3                                           | final p4                                           | final p5                                           | final p6                                           |
+| <img src="./assets/Painting-Elephant/p_0.svg">     | <img src="./assets/Painting-Elephant/p_1.svg">     | <img src="./assets/Painting-Elephant/p_2.svg">     | <img src="./assets/Painting-Elephant/p_3.svg">     | <img src="./assets/Painting-Elephant/p_4.svg">     | <img src="./assets/Painting-Elephant/p_5.svg">     |
+**Script:**
+```shell
+python svgdreamer.py x=painting "prompt='Abstract Vincent van Gogh Oil Painting Elephant, featuring earthy tones of green and brown.'" x.num_paths=500 result_path='./logs/Elephant-OilPainting'
+```
+#### Pixel-Art style
+**Prompt:** Darth vader with lightsaber <br/>
+**Preview:**
+| Particle 1                                           | Particle 2                                           | Particle 3                                           | Particle 4                                           | Particle 5                                           | Particle 6                                           |
+|------------------------------------------------------|------------------------------------------------------|------------------------------------------------------|------------------------------------------------------|------------------------------------------------------|------------------------------------------------------|
+| randomly init p1                                     | randomly init p2                                     | randomly init p3                                     | randomly init p4                                     | randomly init p5                                     | randomly init p6                                     |
+| <img src="./assets/Pixelart-DarthVader/init_p0.svg"> | <img src="./assets/Pixelart-DarthVader/init_p1.svg"> | <img src="./assets/Pixelart-DarthVader/init_p2.svg"> | <img src="./assets/Pixelart-DarthVader/init_p3.svg"> | <img src="./assets/Pixelart-DarthVader/init_p4.svg"> | <img src="./assets/Pixelart-DarthVader/init_p5.svg"> |
+| final p1                                             | final p2                                             | final p3                                             | final p4                                             | final p5                                             | final p6                                             |
+| <img src="./assets/Pixelart-DarthVader/p0.svg">      | <img src="./assets/Pixelart-DarthVader/p1.svg">      | <img src="./assets/Pixelart-DarthVader/p2.svg">      | <img src="./assets/Pixelart-DarthVader/p3.svg">      | <img src="./assets/Pixelart-DarthVader/p4.svg">      | <img src="./assets/Pixelart-DarthVader/p5.svg">      |
+**Script:**
+```shell
+python svgdreamer.py x=pixelart "prompt='Darth vader with lightsaber.'" result_path='./logs/DarthVader'
+```
+#### Other Styles
 ```shell
 # Style: low-ploy
 python svgdreamer.py x=lowpoly "prompt='A picture of a bald eagle. low-ploy. polygon'" result_path='./logs/BaldEagle'
 # Style: sketch
 python svgdreamer.py x=sketch "prompt='A free-hand drawing of A speeding Lamborghini. black and white drawing.'" result_path='./logs/Lamborghini'
 # Style: ink and wash
 python svgdreamer.py x=ink "prompt='Big Wild Goose Pagoda. ink style. Minimalist abstract art grayscale watercolor.'" result_path='./logs/BigWildGoosePagoda'
+# Style: painting
+python svgdreamer.py x=painting "prompt='self portrait of Van Gogh. oil painting. cmyk portrait. multi colored. defiant and beautiful. cmyk. expressive eyes.'" result_path='./logs/VanGogh-Portrait'
 ```
 ## 🔑 Tips
 ## 📋 TODO
 - [x] Release the code
+- [x] Add docker image
 ## :books: Acknowledgement

assets/Painting-Elephant/init_p0.svg ADDED Viewed

assets/Painting-Elephant/init_p1.svg ADDED Viewed

assets/Painting-Elephant/init_p2.svg ADDED Viewed

assets/Painting-Elephant/init_p3.svg ADDED Viewed

assets/Painting-Elephant/init_p4.svg ADDED Viewed

assets/Painting-Elephant/init_p5.svg ADDED Viewed

assets/Painting-Elephant/p_0.svg ADDED Viewed

assets/Painting-Elephant/p_1.svg ADDED Viewed

assets/Painting-Elephant/p_2.svg ADDED Viewed

assets/Painting-Elephant/p_3.svg ADDED Viewed

assets/Painting-Elephant/p_4.svg ADDED Viewed

assets/Painting-Elephant/p_5.svg ADDED Viewed

assets/Pixelart-DarthVader/init_p0.svg ADDED Viewed

assets/Pixelart-DarthVader/init_p1.svg ADDED Viewed

assets/Pixelart-DarthVader/init_p2.svg ADDED Viewed

assets/Pixelart-DarthVader/init_p3.svg ADDED Viewed

assets/Pixelart-DarthVader/init_p4.svg ADDED Viewed

assets/Pixelart-DarthVader/init_p5.svg ADDED Viewed

assets/Pixelart-DarthVader/p0.svg ADDED Viewed

assets/Pixelart-DarthVader/p1.svg ADDED Viewed

assets/Pixelart-DarthVader/p2.svg ADDED Viewed

assets/Pixelart-DarthVader/p3.svg ADDED Viewed

assets/Pixelart-DarthVader/p4.svg ADDED Viewed

assets/Pixelart-DarthVader/p5.svg ADDED Viewed

assets/SIVE-astronaut-1/attn.png ADDED Viewed

assets/SIVE-astronaut-1/final_bg.svg ADDED Viewed

assets/SIVE-astronaut-1/final_fg.svg ADDED Viewed

assets/SIVE-astronaut-1/init_bg.svg ADDED Viewed

assets/SIVE-astronaut-1/init_fg.svg ADDED Viewed

assets/SIVE-astronaut-1/result.svg ADDED Viewed

conf/x/{iconography_s1.yaml → iconography-s1.yaml} RENAMED Viewed

@@ -38,6 +38,7 @@ sive:
     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 500
@@ -131,7 +132,7 @@ vpsd_model_cfg:
 vpsd:
   use: False
   type: 'vpsd'
-  n_particle: 4 # 4, 8, 16
   vsd_n_particle: 4 # the batch size of particles
   particle_aug: False # do data enhancement for the input particles
   num_iter: 1 # total iterations

     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
+  mask_tau: 0.3 # the threshold used to convert the attention map into a mask
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 500
 vpsd:
   use: False
   type: 'vpsd'
+  n_particle: 6 # 4, 8, 16
   vsd_n_particle: 4 # the batch size of particles
   particle_aug: False # do data enhancement for the input particles
   num_iter: 1 # total iterations

conf/x/iconography.yaml CHANGED Viewed

@@ -38,6 +38,7 @@ sive:
     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 10
@@ -81,12 +82,6 @@ sive:
   tog: # for refinement
     reinit: False # if False, use fg params to init content
     num_iter: 10
-    # optim
-    lr_schedule: False # enable lr_scheduler or not
-    # loss
-    bg_lam: 0
-    fg_lam: 1
-    xing_loss_weight: 0
 # VPSD primitives
 num_paths: 512 # number of strokes

     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
+  mask_tau: 0.3 # the threshold used to convert the attention map into a mask
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 10
   tog: # for refinement
     reinit: False # if False, use fg params to init content
     num_iter: 10
 # VPSD primitives
 num_paths: 512 # number of strokes

conf/x/ink.yaml CHANGED Viewed

@@ -38,6 +38,7 @@ sive:
     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 10
@@ -81,12 +82,6 @@ sive:
   tog: # for refinement
     reinit: False # if False, use fg params to init content
     num_iter: 10
-    # optim
-    lr_schedule: False # enable lr_scheduler or not
-    # loss
-    bg_lam: 0
-    fg_lam: 1
-    xing_loss_weight: 0
 # VPSD primitives
 num_paths: 128 # number of strokes

     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
+  mask_tau: 0.3 # the threshold used to convert the attention map into a mask
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 10
   tog: # for refinement
     reinit: False # if False, use fg params to init content
     num_iter: 10
 # VPSD primitives
 num_paths: 128 # number of strokes

conf/x/lowpoly.yaml CHANGED Viewed

@@ -38,6 +38,7 @@ sive:
     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 10
@@ -81,12 +82,6 @@ sive:
   tog: # for refinement
     reinit: False # if False, use fg params to init content
     num_iter: 10
-    # optim
-    lr_schedule: False # enable lr_scheduler or not
-    # loss
-    bg_lam: 0
-    fg_lam: 1
-    xing_loss_weight: 0
 # VPSD primitives
 num_paths: 512 # number of strokes

     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
+  mask_tau: 0.3 # the threshold used to convert the attention map into a mask
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 10
   tog: # for refinement
     reinit: False # if False, use fg params to init content
     num_iter: 10
 # VPSD primitives
 num_paths: 512 # number of strokes

conf/x/painting.yaml CHANGED Viewed

@@ -38,6 +38,7 @@ sive:
     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 10
@@ -81,12 +82,6 @@ sive:
   tog: # for refinement
     reinit: False # if False, use fg params to init content
     num_iter: 10
-    # optim
-    lr_schedule: False # enable lr_scheduler or not
-    # loss
-    bg_lam: 0
-    fg_lam: 1
-    xing_loss_weight: 0
 # VPSD primitives
 num_paths: 1500 # number of strokes

     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
+  mask_tau: 0.3 # the threshold used to convert the attention map into a mask
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 10
   tog: # for refinement
     reinit: False # if False, use fg params to init content
     num_iter: 10
 # VPSD primitives
 num_paths: 1500 # number of strokes

conf/x/pixelart.yaml CHANGED Viewed

@@ -38,6 +38,7 @@ sive:
     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 10
@@ -81,12 +82,6 @@ sive:
   tog: # for refinement
     reinit: False # if False, use fg params to init content
     num_iter: 10
-    # optim
-    lr_schedule: False # enable lr_scheduler or not
-    # loss
-    bg_lam: 0
-    fg_lam: 1
-    xing_loss_weight: 0
 # VPSD primitives
 num_paths: 512 # number of strokes
@@ -110,7 +105,7 @@ vpsd_stage_optim:
   width: 0.1
   color: 0.01
   bg: 0.01
-  lr_schedule: True # use lr_scheduler
   optim:
     name: 'adam'
     betas: [ 0.9, 0.9 ]

     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
+  mask_tau: 0.3 # the threshold used to convert the attention map into a mask
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 10
   tog: # for refinement
     reinit: False # if False, use fg params to init content
     num_iter: 10
 # VPSD primitives
 num_paths: 512 # number of strokes
   width: 0.1
   color: 0.01
   bg: 0.01
+  lr_schedule: False
   optim:
     name: 'adam'
     betas: [ 0.9, 0.9 ]

conf/x/sketch.yaml CHANGED Viewed

@@ -38,6 +38,7 @@ sive:
     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 10
@@ -81,12 +82,6 @@ sive:
   tog: # for refinement
     reinit: False # if False, use fg params to init content
     num_iter: 10
-    # optim
-    lr_schedule: False # enable lr_scheduler or not
-    # loss
-    bg_lam: 0
-    fg_lam: 1
-    xing_loss_weight: 0
 # VPSD primitives
 num_paths: 128 # number of strokes

     mean_comp: False
     comp_idx: 0
     attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
+  mask_tau: 0.3 # the threshold used to convert the attention map into a mask
   bg:
     style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
     num_iter: 10
   tog: # for refinement
     reinit: False # if False, use fg params to init content
     num_iter: 10
 # VPSD primitives
 num_paths: 128 # number of strokes

script/install.sh ADDED Viewed

	@@ -0,0 +1,47 @@

+#!/bin/bash
+eval "$(conda shell.bash hook)"
+conda create --name svgrender python=3.10
+conda activate svgrender
+echo "The conda environment was successfully created"
+conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch
+echo "Pytorch installation is complete. version: 1.12.1"
+pip install hydra-core omegaconf
+pip install freetype-py shapely svgutils
+pip install opencv-python scikit-image matplotlib visdom wandb BeautifulSoup4
+pip install triton numba
+pip install numpy scipy scikit-fmm einops timm fairscale==0.4.13
+pip install accelerate transformers safetensors datasets
+pip install easydict scikit-learn pytorch_lightning==2.1.0 webdataset
+echo "The basic dependency library is installed."
+pip install ftfy regex tqdm
+pip install git+https://github.com/openai/CLIP.git
+echo "CLIP installation is complete."
+pip install diffusers==0.20.2
+echo "Diffusers installation is complete. version: 0.20.2"
+# if xformers doesnt install properly with conda try installing with pip using the code below
+# pip install --pre -U xformers
+conda install xformers -c xformers
+echo "xformers installation is complete."
+git clone https://github.com/BachiLi/diffvg.git
+cd diffvg
+git submodule update --init --recursive
+conda install -y -c anaconda cmake
+conda install -y -c conda-forge ffmpeg
+pip install svgwrite svgpathtools cssutils torch-tools
+python setup.py install
+echo "DiffVG installation is complete."
+echo "the running environment has been successfully installed!!!"

svgdreamer/painter/painter_params.py CHANGED Viewed

@@ -301,7 +301,7 @@ class Painter(DiffVGState):
             fpath: The path to save the reinitialized SVG.
         """
         if self.style not in ['iconography', 'low-poly', 'painting', 'ink']:
-            return
         def get_keys_below_threshold(my_dict, threshold):
             keys_below_threshold = [key for key, value in my_dict.items() if value < threshold]
@@ -360,7 +360,8 @@ class Painter(DiffVGState):
                 if path.id in reinit_union:
                     coord = [i, i] if self.style == 'low-poly' else None
                     self.shapes[i] = self.get_path(coord=coord)
-                    # update coords
                     self.shapes[i].points.requires_grad = True
                     extra_point_params.append(self.shapes[i].points)
                     if self.style == 'painting':
@@ -377,7 +378,7 @@ class Painter(DiffVGState):
                             shape_ids=torch.tensor(list(shp_ids)),
                             fill_color=fill_color_init,
                             stroke_color=None)
-                        # requires gradients
                         self.shape_groups[i].fill_color.requires_grad = True
                         extra_color_params.append(self.shape_groups[i].fill_color)
                     elif self.style in ['painting']:
@@ -387,7 +388,7 @@ class Painter(DiffVGState):
                             shape_ids=torch.tensor([len(self.shapes) - 1]),
                             fill_color=None,
                             stroke_color=stroke_color_init)
-                        # requires gradients
                         self.shape_groups[i].stroke_color.requires_grad = True
                         extra_color_params.append(self.shape_groups[i].stroke_color)
                     elif self.style in ['ink']:
@@ -397,7 +398,7 @@ class Painter(DiffVGState):
                             shape_ids=torch.tensor([len(self.shapes) - 1]),
                             fill_color=None,
                             stroke_color=stroke_color_init)
-                        # requires gradients
                         self.shape_groups[i].stroke_color.requires_grad = True
                         extra_color_params.append(self.shape_groups[i].stroke_color)
@@ -685,11 +686,11 @@ class PainterOptimizer:
             self.point_scheduler = LambdaLR(self.point_optimizer, lr_lambda=self.lr_lambda, last_epoch=-1)
     def add_params(self, point_params, color_params, width_params):
-        if len(point_params) > 0:
             self.point_optimizer.add_param_group({f'params': point_params})
-        if len(color_params) > 0:
             self.color_optimizer.add_param_group({f'params': color_params})
-        if len(width_params) > 0:
             self.width_optimizer.add_param_group({f'params': width_params})
     def update_lr(self):

             fpath: The path to save the reinitialized SVG.
         """
         if self.style not in ['iconography', 'low-poly', 'painting', 'ink']:
+            return None, None, None
         def get_keys_below_threshold(my_dict, threshold):
             keys_below_threshold = [key for key, value in my_dict.items() if value < threshold]
                 if path.id in reinit_union:
                     coord = [i, i] if self.style == 'low-poly' else None
                     self.shapes[i] = self.get_path(coord=coord)
+                    # new point
+                    self.shapes[i].id = path.id
                     self.shapes[i].points.requires_grad = True
                     extra_point_params.append(self.shapes[i].points)
                     if self.style == 'painting':
                             shape_ids=torch.tensor(list(shp_ids)),
                             fill_color=fill_color_init,
                             stroke_color=None)
+                        # new shape
                         self.shape_groups[i].fill_color.requires_grad = True
                         extra_color_params.append(self.shape_groups[i].fill_color)
                     elif self.style in ['painting']:
                             shape_ids=torch.tensor([len(self.shapes) - 1]),
                             fill_color=None,
                             stroke_color=stroke_color_init)
+                        # new shape
                         self.shape_groups[i].stroke_color.requires_grad = True
                         extra_color_params.append(self.shape_groups[i].stroke_color)
                     elif self.style in ['ink']:
                             shape_ids=torch.tensor([len(self.shapes) - 1]),
                             fill_color=None,
                             stroke_color=stroke_color_init)
+                        # new shape
                         self.shape_groups[i].stroke_color.requires_grad = True
                         extra_color_params.append(self.shape_groups[i].stroke_color)
             self.point_scheduler = LambdaLR(self.point_optimizer, lr_lambda=self.lr_lambda, last_epoch=-1)
     def add_params(self, point_params, color_params, width_params):
+        if point_params is not None and len(point_params) > 0:
             self.point_optimizer.add_param_group({f'params': point_params})
+        if color_params is not None and len(color_params) > 0:
             self.color_optimizer.add_param_group({f'params': color_params})
+        if width_params is not None and len(width_params) > 0:
             self.width_optimizer.add_param_group({f'params': width_params})
     def update_lr(self):

svgdreamer/pipelines/SVGDreamer_pipeline.py CHANGED Viewed

@@ -92,10 +92,6 @@ class SVGDreamerPipeline(ModelState):
         self.vpsd_cfg = self.x_cfg.vpsd
         self.vpsd_optim = self.x_cfg.vpsd_stage_optim
-        if self.style == "pixelart":
-            self.x_cfg.sive_stage_optim.lr_schedule = False
-            self.x_cfg.vpsd_stage_optim.lr_schedule = False
     def painterly_rendering(self, text_prompt: str, target_file: AnyPath = None):
         # log prompts
         self.print(f"prompt: {text_prompt}")
@@ -132,9 +128,9 @@ class SVGDreamerPipeline(ModelState):
         merged_images = []
         for i in range(self.vpsd_cfg.n_particle):
             select_sample_path = self.result_path / f'select_sample_{i}.png'
             # generate sample and attention map
-            fg_attn_map, bg_attn_map, controller = self.extract_ldm_attn(self.x_cfg.sive_model_cfg,
                                                                          pipeline,
                                                                          text_prompt,
                                                                          select_sample_path,
@@ -146,7 +142,8 @@ class SVGDreamerPipeline(ModelState):
             self.print(f"load target file from: {select_sample_path.as_posix()}")
             # get objects by attention map
-            fg_img, bg_img, fg_mask, bg_mask = self.extract_object(select_img, fg_attn_map, bg_attn_map, iter=i)
             self.print(f"fg_img shape: {fg_img.shape}, bg_img: {bg_img.shape}")
             # background rendering
@@ -641,7 +638,7 @@ class SVGDreamerPipeline(ModelState):
         # save final
         for i, r in enumerate(renderers):
-            ft_svg_path = self.result_path / f"finetune_final_p_{i}.svg"
             r.pretty_save_svg(ft_svg_path)
         # save SVGs
         torchvision.utils.save_image(raster_imgs, fp=self.result_path / f'all_particles.png')
@@ -683,10 +680,10 @@ class SVGDreamerPipeline(ModelState):
         return target_img
     def extract_object(self,
                        select_img: torch.Tensor,
                        fg_attn_map: np.ndarray,
                        bg_attn_map: np.ndarray,
-                       iter: Union[str, int],
                        tau: float = 0.2):
         # attention to mask
         bool_fg_attn_map = fg_attn_map > tau
@@ -755,6 +752,7 @@ class SVGDreamerPipeline(ModelState):
         return fg_img_final, bg_img_final, fg_mask, bg_mask
     def extract_ldm_attn(self,
                          model_cfg: omegaconf.DictConfig,
                          pipeline: DiffusionPipeline,
                          prompts: str,
@@ -762,7 +760,7 @@ class SVGDreamerPipeline(ModelState):
                          attn_init_cfg: omegaconf.DictConfig,
                          image_size: int,
                          token_ind: int,
-                         attn_init: bool = True, ):
         if token_ind <= 0:
             raise ValueError("The 'token_ind' should be greater than 0")
@@ -837,7 +835,7 @@ class SVGDreamerPipeline(ModelState):
             self_attn_vis = np.copy(self_attn)
             self_attn_vis = self_attn_vis * 255
             self_attn_vis = np.repeat(np.expand_dims(self_attn_vis, axis=2), 3, axis=2).astype(np.uint8)
-            view_images(self_attn_vis, save_image=True, fp=self.sive_attn_dir / "self-attn-final.png")
             """get final attention map"""
             attn_map = attn_init_cfg.attn_coeff * cross_attn_map + (1 - attn_init_cfg.attn_coeff) * self_attn
@@ -847,7 +845,7 @@ class SVGDreamerPipeline(ModelState):
             attn_map_vis = np.copy(attn_map)
             attn_map_vis = attn_map_vis * 255
             attn_map_vis = np.repeat(np.expand_dims(attn_map_vis, axis=2), 3, axis=2).astype(np.uint8)
-            view_images(attn_map_vis, save_image=True, fp=self.sive_attn_dir / 'fusion-attn.png')
             # inverse fusion-attention to [0, 1]
             inverse_attn = 1 - attn_map
@@ -855,7 +853,8 @@ class SVGDreamerPipeline(ModelState):
             reversed_attn_map_vis = np.copy(inverse_attn)
             reversed_attn_map_vis = reversed_attn_map_vis * 255
             reversed_attn_map_vis = np.repeat(np.expand_dims(reversed_attn_map_vis, axis=2), 3, axis=2).astype(np.uint8)
-            view_images(reversed_attn_map_vis, save_image=True, fp=self.sive_attn_dir / 'reversed-fusion-attn.png')
             self.print(f"-> fusion attn_map: {attn_map.shape}")
         else:

         self.vpsd_cfg = self.x_cfg.vpsd
         self.vpsd_optim = self.x_cfg.vpsd_stage_optim
     def painterly_rendering(self, text_prompt: str, target_file: AnyPath = None):
         # log prompts
         self.print(f"prompt: {text_prompt}")
         merged_images = []
         for i in range(self.vpsd_cfg.n_particle):
             select_sample_path = self.result_path / f'select_sample_{i}.png'
             # generate sample and attention map
+            fg_attn_map, bg_attn_map, controller = self.extract_ldm_attn(i,
+                                                                         self.x_cfg.sive_model_cfg,
                                                                          pipeline,
                                                                          text_prompt,
                                                                          select_sample_path,
             self.print(f"load target file from: {select_sample_path.as_posix()}")
             # get objects by attention map
+            fg_img, bg_img, fg_mask, bg_mask = self.extract_object(i, select_img, fg_attn_map, bg_attn_map,
+                                                                   tau=self.sive_cfg.mask_tau)
             self.print(f"fg_img shape: {fg_img.shape}, bg_img: {bg_img.shape}")
             # background rendering
         # save final
         for i, r in enumerate(renderers):
+            ft_svg_path = self.result_path / f"finetune_final_p{i}.svg"
             r.pretty_save_svg(ft_svg_path)
         # save SVGs
         torchvision.utils.save_image(raster_imgs, fp=self.result_path / f'all_particles.png')
         return target_img
     def extract_object(self,
+                       iter: Union[str, int],
                        select_img: torch.Tensor,
                        fg_attn_map: np.ndarray,
                        bg_attn_map: np.ndarray,
                        tau: float = 0.2):
         # attention to mask
         bool_fg_attn_map = fg_attn_map > tau
         return fg_img_final, bg_img_final, fg_mask, bg_mask
     def extract_ldm_attn(self,
+                         iter: int,
                          model_cfg: omegaconf.DictConfig,
                          pipeline: DiffusionPipeline,
                          prompts: str,
                          attn_init_cfg: omegaconf.DictConfig,
                          image_size: int,
                          token_ind: int,
+                         attn_init: bool = True):
         if token_ind <= 0:
             raise ValueError("The 'token_ind' should be greater than 0")
             self_attn_vis = np.copy(self_attn)
             self_attn_vis = self_attn_vis * 255
             self_attn_vis = np.repeat(np.expand_dims(self_attn_vis, axis=2), 3, axis=2).astype(np.uint8)
+            view_images(self_attn_vis, save_image=True, fp=self.sive_attn_dir / f"self-attn-final-{iter}.png")
             """get final attention map"""
             attn_map = attn_init_cfg.attn_coeff * cross_attn_map + (1 - attn_init_cfg.attn_coeff) * self_attn
             attn_map_vis = np.copy(attn_map)
             attn_map_vis = attn_map_vis * 255
             attn_map_vis = np.repeat(np.expand_dims(attn_map_vis, axis=2), 3, axis=2).astype(np.uint8)
+            view_images(attn_map_vis, save_image=True, fp=self.sive_attn_dir / f'fusion-attn-{iter}.png')
             # inverse fusion-attention to [0, 1]
             inverse_attn = 1 - attn_map
             reversed_attn_map_vis = np.copy(inverse_attn)
             reversed_attn_map_vis = reversed_attn_map_vis * 255
             reversed_attn_map_vis = np.repeat(np.expand_dims(reversed_attn_map_vis, axis=2), 3, axis=2).astype(np.uint8)
+            view_images(reversed_attn_map_vis, save_image=True,
+                        fp=self.sive_attn_dir / f'reversed-fusion-attn-{iter}.png')
             self.print(f"-> fusion attn_map: {attn_map.shape}")
         else: