xingxm commited on
Commit
51615ef
·
1 Parent(s): b70838a

feat(build): add install script and docker image

Browse files
Files changed (41) hide show
  1. README.md +69 -11
  2. assets/Painting-Elephant/init_p0.svg +0 -0
  3. assets/Painting-Elephant/init_p1.svg +0 -0
  4. assets/Painting-Elephant/init_p2.svg +0 -0
  5. assets/Painting-Elephant/init_p3.svg +0 -0
  6. assets/Painting-Elephant/init_p4.svg +0 -0
  7. assets/Painting-Elephant/init_p5.svg +0 -0
  8. assets/Painting-Elephant/p_0.svg +0 -0
  9. assets/Painting-Elephant/p_1.svg +0 -0
  10. assets/Painting-Elephant/p_2.svg +0 -0
  11. assets/Painting-Elephant/p_3.svg +0 -0
  12. assets/Painting-Elephant/p_4.svg +0 -0
  13. assets/Painting-Elephant/p_5.svg +0 -0
  14. assets/Pixelart-DarthVader/init_p0.svg +0 -0
  15. assets/Pixelart-DarthVader/init_p1.svg +0 -0
  16. assets/Pixelart-DarthVader/init_p2.svg +0 -0
  17. assets/Pixelart-DarthVader/init_p3.svg +0 -0
  18. assets/Pixelart-DarthVader/init_p4.svg +0 -0
  19. assets/Pixelart-DarthVader/init_p5.svg +0 -0
  20. assets/Pixelart-DarthVader/p0.svg +0 -0
  21. assets/Pixelart-DarthVader/p1.svg +0 -0
  22. assets/Pixelart-DarthVader/p2.svg +0 -0
  23. assets/Pixelart-DarthVader/p3.svg +0 -0
  24. assets/Pixelart-DarthVader/p4.svg +0 -0
  25. assets/Pixelart-DarthVader/p5.svg +0 -0
  26. assets/SIVE-astronaut-1/attn.png +0 -0
  27. assets/SIVE-astronaut-1/final_bg.svg +0 -0
  28. assets/SIVE-astronaut-1/final_fg.svg +0 -0
  29. assets/SIVE-astronaut-1/init_bg.svg +134 -0
  30. assets/SIVE-astronaut-1/init_fg.svg +134 -0
  31. assets/SIVE-astronaut-1/result.svg +0 -0
  32. conf/x/{iconography_s1.yaml → iconography-s1.yaml} +2 -1
  33. conf/x/iconography.yaml +1 -6
  34. conf/x/ink.yaml +1 -6
  35. conf/x/lowpoly.yaml +1 -6
  36. conf/x/painting.yaml +1 -6
  37. conf/x/pixelart.yaml +2 -7
  38. conf/x/sketch.yaml +1 -6
  39. script/install.sh +47 -0
  40. svgdreamer/painter/painter_params.py +9 -8
  41. svgdreamer/pipelines/SVGDreamer_pipeline.py +12 -13
README.md CHANGED
@@ -3,7 +3,7 @@
3
  [![cvpr24](https://img.shields.io/badge/CVPR-2024-387ADF.svg)](https://arxiv.org/abs/2312.16476)
4
  [![arXiv](https://img.shields.io/badge/arXiv-2312.16476-b31b1b.svg)](https://arxiv.org/abs/2312.16476)
5
  [![website](https://img.shields.io/badge/Website-Gitpage-4CCD99)](https://ximinng.github.io/SVGDreamer-project/)
6
- [![blog](https://img.shields.io/badge/Blog-ENG-9195F6)](https://huggingface.co/blog/xingxm/svgdreamer)
7
  [![blog](https://img.shields.io/badge/Blog-CN-9195F6)](https://huggingface.co/blog/xingxm/svgdreamer)
8
 
9
  This repository contains our official implementation of the CVPR 2024 paper: SVGDreamer: Text-Guided SVG Generation with
@@ -20,12 +20,37 @@ Diffusion Model. It can generate high-quality SVGs based on text prompts.
20
  a novel text-guided vector graphics synthesis method. This method considers both the editing of vector graphics and
21
  the quality of the synthesis.
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  ## 🔥 Quickstart
24
 
25
  Before running the code, download the stable diffusion model. Append `diffuser.download=True` to the end of the script.
26
 
27
  ### SIVE + VPSD
28
 
 
 
 
 
 
 
 
 
29
  **Script:**
30
 
31
  ```shell
@@ -52,18 +77,17 @@ python svgdreamer.py x=iconography_s1 skip_sive=False "prompt='a man in an astro
52
 
53
  ### VPSD
54
 
55
- #### Case 1
56
 
57
  **Prompt:** Sydney opera house. oil painting. by Van Gogh <br/>
58
- **Style:** iconography <br/>
59
  **Preview:**
60
 
61
  | Particle 1 | Particle 2 | Particle 3 | Particle 4 | Particle 5 | Particle 6 |
62
  |--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|
63
- | init p1 | init p2 | init p3 | init p4 | init p5 | init p6 |
64
  | <img src="./assets/Icon-SydneyOperaHouse/init_p0.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p1.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p2.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p3.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p4.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p5.svg"> |
65
  | final p1 | final p2 | final p3 | final p4 | final p5 | final p6 |
66
- | <img src="./assets/Icon-SydneyOperaHouse/p_0.svg"> | <img src="assets/Icon-SydneyOperaHouse/p_1.svg"> | <img src="assets/Icon-SydneyOperaHouse/p_2.svg"> | <img src="assets/Icon-SydneyOperaHouse/p_3.svg"> | <img src="assets/Icon-SydneyOperaHouse/p_4.svg"> | <img src="assets/Icon-SydneyOperaHouse/p_5.svg"> |
67
 
68
  **Script:**
69
 
@@ -71,19 +95,53 @@ python svgdreamer.py x=iconography_s1 skip_sive=False "prompt='a man in an astro
71
  python svgdreamer.py x=iconography "prompt='Sydney opera house. oil painting. by Van Gogh'" result_path='./logs/SydneyOperaHouse-OilPainting'
72
  ```
73
 
74
- **Other Styles:**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  ```shell
77
  # Style: low-ploy
78
  python svgdreamer.py x=lowpoly "prompt='A picture of a bald eagle. low-ploy. polygon'" result_path='./logs/BaldEagle'
79
- # Style: pixel-art
80
- python svgdreamer.py x=pixelart "prompt='Darth vader with lightsaber.'" result_path='./log/DarthVader'
81
- # Style: painting
82
- python svgdreamer.py x=painting "prompt='self portrait of Van Gogh. oil painting. cmyk portrait. multi colored. defiant and beautiful. cmyk. expressive eyes.'" result_path='./logs/VanGogh-Portrait'
83
  # Style: sketch
84
  python svgdreamer.py x=sketch "prompt='A free-hand drawing of A speeding Lamborghini. black and white drawing.'" result_path='./logs/Lamborghini'
85
  # Style: ink and wash
86
  python svgdreamer.py x=ink "prompt='Big Wild Goose Pagoda. ink style. Minimalist abstract art grayscale watercolor.'" result_path='./logs/BigWildGoosePagoda'
 
 
87
  ```
88
 
89
  ## 🔑 Tips
@@ -94,7 +152,7 @@ python svgdreamer.py x=ink "prompt='Big Wild Goose Pagoda. ink style. Minimalist
94
  ## 📋 TODO
95
 
96
  - [x] Release the code
97
- - [ ] Add docker image
98
 
99
  ## :books: Acknowledgement
100
 
 
3
  [![cvpr24](https://img.shields.io/badge/CVPR-2024-387ADF.svg)](https://arxiv.org/abs/2312.16476)
4
  [![arXiv](https://img.shields.io/badge/arXiv-2312.16476-b31b1b.svg)](https://arxiv.org/abs/2312.16476)
5
  [![website](https://img.shields.io/badge/Website-Gitpage-4CCD99)](https://ximinng.github.io/SVGDreamer-project/)
6
+ [![blog](https://img.shields.io/badge/Blog-EN-9195F6)](https://huggingface.co/blog/xingxm/svgdreamer)
7
  [![blog](https://img.shields.io/badge/Blog-CN-9195F6)](https://huggingface.co/blog/xingxm/svgdreamer)
8
 
9
  This repository contains our official implementation of the CVPR 2024 paper: SVGDreamer: Text-Guided SVG Generation with
 
20
  a novel text-guided vector graphics synthesis method. This method considers both the editing of vector graphics and
21
  the quality of the synthesis.
22
 
23
+ ## Installation
24
+
25
+ You can follow the steps below to quickly get up and running with SVGDreamer.
26
+ These steps will let you run quick inference locally.
27
+
28
+ In the top level directory run,
29
+
30
+ ```bash
31
+ sh script/install.sh
32
+ ```
33
+
34
+ or using docker images,
35
+
36
+ ```shell
37
+ docker run --name svgdreamer --gpus all -it --ipc=host ximingxing/svgrender:v1 /bin/bash
38
+ ```
39
+
40
  ## 🔥 Quickstart
41
 
42
  Before running the code, download the stable diffusion model. Append `diffuser.download=True` to the end of the script.
43
 
44
  ### SIVE + VPSD
45
 
46
+ **Prompt:** An image of Batman. full body action pose, complete detailed body. white background. empty background, high
47
+ quality, 4K, ultra realistic <br/>
48
+ **Preview:**
49
+
50
+ | attn-map | bg init | fg init | bg final | fg final | final |
51
+ |------------------------------------------------|---------------------------------------------------|---------------------------------------------------|----------------------------------------------------|----------------------------------------------------|--------------------------------------------------|
52
+ | <img src="./assets/SIVE-astronaut-1/attn.png"> | <img src="./assets/SIVE-astronaut-1/init_bg.svg"> | <img src="./assets/SIVE-astronaut-1/init_fg.svg"> | <img src="./assets/SIVE-astronaut-1/final_bg.svg"> | <img src="./assets/SIVE-astronaut-1/final_fg.svg"> | <img src="./assets/SIVE-astronaut-1/result.svg"> |
53
+
54
  **Script:**
55
 
56
  ```shell
 
77
 
78
  ### VPSD
79
 
80
+ #### Iconography style
81
 
82
  **Prompt:** Sydney opera house. oil painting. by Van Gogh <br/>
 
83
  **Preview:**
84
 
85
  | Particle 1 | Particle 2 | Particle 3 | Particle 4 | Particle 5 | Particle 6 |
86
  |--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|--------------------------------------------------------|
87
+ | randomly init p1 | randomly init p2 | randomly init p3 | randomly init p4 | randomly init p5 | randomly init p6 |
88
  | <img src="./assets/Icon-SydneyOperaHouse/init_p0.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p1.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p2.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p3.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p4.svg"> | <img src="./assets/Icon-SydneyOperaHouse/init_p5.svg"> |
89
  | final p1 | final p2 | final p3 | final p4 | final p5 | final p6 |
90
+ | <img src="./assets/Icon-SydneyOperaHouse/p_0.svg"> | <img src="./assets/Icon-SydneyOperaHouse/p_1.svg"> | <img src="./assets/Icon-SydneyOperaHouse/p_2.svg"> | <img src="assets/Icon-SydneyOperaHouse/p_3.svg"> | <img src="./assets/Icon-SydneyOperaHouse/p_4.svg"> | <img src="./assets/Icon-SydneyOperaHouse/p_5.svg"> |
91
 
92
  **Script:**
93
 
 
95
  python svgdreamer.py x=iconography "prompt='Sydney opera house. oil painting. by Van Gogh'" result_path='./logs/SydneyOperaHouse-OilPainting'
96
  ```
97
 
98
+ #### Painting style
99
+
100
+ **Prompt:** Abstract Vincent van Gogh Oil Painting Elephant, featuring earthy tones of green and brown <br/>
101
+ **Preview:**
102
+
103
+ | Particle 1 | Particle 2 | Particle 3 | Particle 4 | Particle 5 | Particle 6 |
104
+ |----------------------------------------------------|----------------------------------------------------|----------------------------------------------------|----------------------------------------------------|----------------------------------------------------|----------------------------------------------------|
105
+ | randomly init p1 | randomly init p2 | randomly init p3 | randomly init p4 | randomly init p5 | randomly init p6 |
106
+ | <img src="./assets/Painting-Elephant/init_p0.svg"> | <img src="./assets/Painting-Elephant/init_p1.svg"> | <img src="./assets/Painting-Elephant/init_p2.svg"> | <img src="./assets/Painting-Elephant/init_p3.svg"> | <img src="./assets/Painting-Elephant/init_p4.svg"> | <img src="./assets/Painting-Elephant/init_p5.svg"> |
107
+ | final p1 | final p2 | final p3 | final p4 | final p5 | final p6 |
108
+ | <img src="./assets/Painting-Elephant/p_0.svg"> | <img src="./assets/Painting-Elephant/p_1.svg"> | <img src="./assets/Painting-Elephant/p_2.svg"> | <img src="./assets/Painting-Elephant/p_3.svg"> | <img src="./assets/Painting-Elephant/p_4.svg"> | <img src="./assets/Painting-Elephant/p_5.svg"> |
109
+
110
+ **Script:**
111
+
112
+ ```shell
113
+ python svgdreamer.py x=painting "prompt='Abstract Vincent van Gogh Oil Painting Elephant, featuring earthy tones of green and brown.'" x.num_paths=500 result_path='./logs/Elephant-OilPainting'
114
+ ```
115
+
116
+ #### Pixel-Art style
117
+
118
+ **Prompt:** Darth vader with lightsaber <br/>
119
+ **Preview:**
120
+
121
+ | Particle 1 | Particle 2 | Particle 3 | Particle 4 | Particle 5 | Particle 6 |
122
+ |------------------------------------------------------|------------------------------------------------------|------------------------------------------------------|------------------------------------------------------|------------------------------------------------------|------------------------------------------------------|
123
+ | randomly init p1 | randomly init p2 | randomly init p3 | randomly init p4 | randomly init p5 | randomly init p6 |
124
+ | <img src="./assets/Pixelart-DarthVader/init_p0.svg"> | <img src="./assets/Pixelart-DarthVader/init_p1.svg"> | <img src="./assets/Pixelart-DarthVader/init_p2.svg"> | <img src="./assets/Pixelart-DarthVader/init_p3.svg"> | <img src="./assets/Pixelart-DarthVader/init_p4.svg"> | <img src="./assets/Pixelart-DarthVader/init_p5.svg"> |
125
+ | final p1 | final p2 | final p3 | final p4 | final p5 | final p6 |
126
+ | <img src="./assets/Pixelart-DarthVader/p0.svg"> | <img src="./assets/Pixelart-DarthVader/p1.svg"> | <img src="./assets/Pixelart-DarthVader/p2.svg"> | <img src="./assets/Pixelart-DarthVader/p3.svg"> | <img src="./assets/Pixelart-DarthVader/p4.svg"> | <img src="./assets/Pixelart-DarthVader/p5.svg"> |
127
+
128
+ **Script:**
129
+
130
+ ```shell
131
+ python svgdreamer.py x=pixelart "prompt='Darth vader with lightsaber.'" result_path='./logs/DarthVader'
132
+ ```
133
+
134
+ #### Other Styles
135
 
136
  ```shell
137
  # Style: low-ploy
138
  python svgdreamer.py x=lowpoly "prompt='A picture of a bald eagle. low-ploy. polygon'" result_path='./logs/BaldEagle'
 
 
 
 
139
  # Style: sketch
140
  python svgdreamer.py x=sketch "prompt='A free-hand drawing of A speeding Lamborghini. black and white drawing.'" result_path='./logs/Lamborghini'
141
  # Style: ink and wash
142
  python svgdreamer.py x=ink "prompt='Big Wild Goose Pagoda. ink style. Minimalist abstract art grayscale watercolor.'" result_path='./logs/BigWildGoosePagoda'
143
+ # Style: painting
144
+ python svgdreamer.py x=painting "prompt='self portrait of Van Gogh. oil painting. cmyk portrait. multi colored. defiant and beautiful. cmyk. expressive eyes.'" result_path='./logs/VanGogh-Portrait'
145
  ```
146
 
147
  ## 🔑 Tips
 
152
  ## 📋 TODO
153
 
154
  - [x] Release the code
155
+ - [x] Add docker image
156
 
157
  ## :books: Acknowledgement
158
 
assets/Painting-Elephant/init_p0.svg ADDED
assets/Painting-Elephant/init_p1.svg ADDED
assets/Painting-Elephant/init_p2.svg ADDED
assets/Painting-Elephant/init_p3.svg ADDED
assets/Painting-Elephant/init_p4.svg ADDED
assets/Painting-Elephant/init_p5.svg ADDED
assets/Painting-Elephant/p_0.svg ADDED
assets/Painting-Elephant/p_1.svg ADDED
assets/Painting-Elephant/p_2.svg ADDED
assets/Painting-Elephant/p_3.svg ADDED
assets/Painting-Elephant/p_4.svg ADDED
assets/Painting-Elephant/p_5.svg ADDED
assets/Pixelart-DarthVader/init_p0.svg ADDED
assets/Pixelart-DarthVader/init_p1.svg ADDED
assets/Pixelart-DarthVader/init_p2.svg ADDED
assets/Pixelart-DarthVader/init_p3.svg ADDED
assets/Pixelart-DarthVader/init_p4.svg ADDED
assets/Pixelart-DarthVader/init_p5.svg ADDED
assets/Pixelart-DarthVader/p0.svg ADDED
assets/Pixelart-DarthVader/p1.svg ADDED
assets/Pixelart-DarthVader/p2.svg ADDED
assets/Pixelart-DarthVader/p3.svg ADDED
assets/Pixelart-DarthVader/p4.svg ADDED
assets/Pixelart-DarthVader/p5.svg ADDED
assets/SIVE-astronaut-1/attn.png ADDED
assets/SIVE-astronaut-1/final_bg.svg ADDED
assets/SIVE-astronaut-1/final_fg.svg ADDED
assets/SIVE-astronaut-1/init_bg.svg ADDED
assets/SIVE-astronaut-1/init_fg.svg ADDED
assets/SIVE-astronaut-1/result.svg ADDED
conf/x/{iconography_s1.yaml → iconography-s1.yaml} RENAMED
@@ -38,6 +38,7 @@ sive:
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
 
41
  bg:
42
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
43
  num_iter: 500
@@ -131,7 +132,7 @@ vpsd_model_cfg:
131
  vpsd:
132
  use: False
133
  type: 'vpsd'
134
- n_particle: 4 # 4, 8, 16
135
  vsd_n_particle: 4 # the batch size of particles
136
  particle_aug: False # do data enhancement for the input particles
137
  num_iter: 1 # total iterations
 
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
41
+ mask_tau: 0.3 # the threshold used to convert the attention map into a mask
42
  bg:
43
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
44
  num_iter: 500
 
132
  vpsd:
133
  use: False
134
  type: 'vpsd'
135
+ n_particle: 6 # 4, 8, 16
136
  vsd_n_particle: 4 # the batch size of particles
137
  particle_aug: False # do data enhancement for the input particles
138
  num_iter: 1 # total iterations
conf/x/iconography.yaml CHANGED
@@ -38,6 +38,7 @@ sive:
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
 
41
  bg:
42
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
43
  num_iter: 10
@@ -81,12 +82,6 @@ sive:
81
  tog: # for refinement
82
  reinit: False # if False, use fg params to init content
83
  num_iter: 10
84
- # optim
85
- lr_schedule: False # enable lr_scheduler or not
86
- # loss
87
- bg_lam: 0
88
- fg_lam: 1
89
- xing_loss_weight: 0
90
 
91
  # VPSD primitives
92
  num_paths: 512 # number of strokes
 
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
41
+ mask_tau: 0.3 # the threshold used to convert the attention map into a mask
42
  bg:
43
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
44
  num_iter: 10
 
82
  tog: # for refinement
83
  reinit: False # if False, use fg params to init content
84
  num_iter: 10
 
 
 
 
 
 
85
 
86
  # VPSD primitives
87
  num_paths: 512 # number of strokes
conf/x/ink.yaml CHANGED
@@ -38,6 +38,7 @@ sive:
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
 
41
  bg:
42
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
43
  num_iter: 10
@@ -81,12 +82,6 @@ sive:
81
  tog: # for refinement
82
  reinit: False # if False, use fg params to init content
83
  num_iter: 10
84
- # optim
85
- lr_schedule: False # enable lr_scheduler or not
86
- # loss
87
- bg_lam: 0
88
- fg_lam: 1
89
- xing_loss_weight: 0
90
 
91
  # VPSD primitives
92
  num_paths: 128 # number of strokes
 
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
41
+ mask_tau: 0.3 # the threshold used to convert the attention map into a mask
42
  bg:
43
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
44
  num_iter: 10
 
82
  tog: # for refinement
83
  reinit: False # if False, use fg params to init content
84
  num_iter: 10
 
 
 
 
 
 
85
 
86
  # VPSD primitives
87
  num_paths: 128 # number of strokes
conf/x/lowpoly.yaml CHANGED
@@ -38,6 +38,7 @@ sive:
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
 
41
  bg:
42
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
43
  num_iter: 10
@@ -81,12 +82,6 @@ sive:
81
  tog: # for refinement
82
  reinit: False # if False, use fg params to init content
83
  num_iter: 10
84
- # optim
85
- lr_schedule: False # enable lr_scheduler or not
86
- # loss
87
- bg_lam: 0
88
- fg_lam: 1
89
- xing_loss_weight: 0
90
 
91
  # VPSD primitives
92
  num_paths: 512 # number of strokes
 
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
41
+ mask_tau: 0.3 # the threshold used to convert the attention map into a mask
42
  bg:
43
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
44
  num_iter: 10
 
82
  tog: # for refinement
83
  reinit: False # if False, use fg params to init content
84
  num_iter: 10
 
 
 
 
 
 
85
 
86
  # VPSD primitives
87
  num_paths: 512 # number of strokes
conf/x/painting.yaml CHANGED
@@ -38,6 +38,7 @@ sive:
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
 
41
  bg:
42
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
43
  num_iter: 10
@@ -81,12 +82,6 @@ sive:
81
  tog: # for refinement
82
  reinit: False # if False, use fg params to init content
83
  num_iter: 10
84
- # optim
85
- lr_schedule: False # enable lr_scheduler or not
86
- # loss
87
- bg_lam: 0
88
- fg_lam: 1
89
- xing_loss_weight: 0
90
 
91
  # VPSD primitives
92
  num_paths: 1500 # number of strokes
 
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
41
+ mask_tau: 0.3 # the threshold used to convert the attention map into a mask
42
  bg:
43
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
44
  num_iter: 10
 
82
  tog: # for refinement
83
  reinit: False # if False, use fg params to init content
84
  num_iter: 10
 
 
 
 
 
 
85
 
86
  # VPSD primitives
87
  num_paths: 1500 # number of strokes
conf/x/pixelart.yaml CHANGED
@@ -38,6 +38,7 @@ sive:
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
 
41
  bg:
42
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
43
  num_iter: 10
@@ -81,12 +82,6 @@ sive:
81
  tog: # for refinement
82
  reinit: False # if False, use fg params to init content
83
  num_iter: 10
84
- # optim
85
- lr_schedule: False # enable lr_scheduler or not
86
- # loss
87
- bg_lam: 0
88
- fg_lam: 1
89
- xing_loss_weight: 0
90
 
91
  # VPSD primitives
92
  num_paths: 512 # number of strokes
@@ -110,7 +105,7 @@ vpsd_stage_optim:
110
  width: 0.1
111
  color: 0.01
112
  bg: 0.01
113
- lr_schedule: True # use lr_scheduler
114
  optim:
115
  name: 'adam'
116
  betas: [ 0.9, 0.9 ]
 
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
41
+ mask_tau: 0.3 # the threshold used to convert the attention map into a mask
42
  bg:
43
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
44
  num_iter: 10
 
82
  tog: # for refinement
83
  reinit: False # if False, use fg params to init content
84
  num_iter: 10
 
 
 
 
 
 
85
 
86
  # VPSD primitives
87
  num_paths: 512 # number of strokes
 
105
  width: 0.1
106
  color: 0.01
107
  bg: 0.01
108
+ lr_schedule: False
109
  optim:
110
  name: 'adam'
111
  betas: [ 0.9, 0.9 ]
conf/x/sketch.yaml CHANGED
@@ -38,6 +38,7 @@ sive:
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
 
41
  bg:
42
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
43
  num_iter: 10
@@ -81,12 +82,6 @@ sive:
81
  tog: # for refinement
82
  reinit: False # if False, use fg params to init content
83
  num_iter: 10
84
- # optim
85
- lr_schedule: False # enable lr_scheduler or not
86
- # loss
87
- bg_lam: 0
88
- fg_lam: 1
89
- xing_loss_weight: 0
90
 
91
  # VPSD primitives
92
  num_paths: 128 # number of strokes
 
38
  mean_comp: False
39
  comp_idx: 0
40
  attn_coeff: 1.0 # attn fusion, w * cross-attn + (1-w) * self-attn
41
+ mask_tau: 0.3 # the threshold used to convert the attention map into a mask
42
  bg:
43
  style: "iconography" # 'iconography' ,"pixelart", "sketch", 'painting', 'ink'
44
  num_iter: 10
 
82
  tog: # for refinement
83
  reinit: False # if False, use fg params to init content
84
  num_iter: 10
 
 
 
 
 
 
85
 
86
  # VPSD primitives
87
  num_paths: 128 # number of strokes
script/install.sh ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ eval "$(conda shell.bash hook)"
3
+
4
+ conda create --name svgrender python=3.10
5
+ conda activate svgrender
6
+
7
+ echo "The conda environment was successfully created"
8
+
9
+ conda install pytorch==1.12.1 torchvision==0.13.1 torchaudio==0.12.1 cudatoolkit=11.3 -c pytorch
10
+
11
+ echo "Pytorch installation is complete. version: 1.12.1"
12
+
13
+ pip install hydra-core omegaconf
14
+ pip install freetype-py shapely svgutils
15
+ pip install opencv-python scikit-image matplotlib visdom wandb BeautifulSoup4
16
+ pip install triton numba
17
+ pip install numpy scipy scikit-fmm einops timm fairscale==0.4.13
18
+ pip install accelerate transformers safetensors datasets
19
+ pip install easydict scikit-learn pytorch_lightning==2.1.0 webdataset
20
+
21
+ echo "The basic dependency library is installed."
22
+
23
+ pip install ftfy regex tqdm
24
+ pip install git+https://github.com/openai/CLIP.git
25
+
26
+ echo "CLIP installation is complete."
27
+
28
+ pip install diffusers==0.20.2
29
+
30
+ echo "Diffusers installation is complete. version: 0.20.2"
31
+ # if xformers doesnt install properly with conda try installing with pip using the code below
32
+ # pip install --pre -U xformers
33
+ conda install xformers -c xformers
34
+
35
+ echo "xformers installation is complete."
36
+
37
+ git clone https://github.com/BachiLi/diffvg.git
38
+ cd diffvg
39
+ git submodule update --init --recursive
40
+ conda install -y -c anaconda cmake
41
+ conda install -y -c conda-forge ffmpeg
42
+ pip install svgwrite svgpathtools cssutils torch-tools
43
+ python setup.py install
44
+
45
+ echo "DiffVG installation is complete."
46
+
47
+ echo "the running environment has been successfully installed!!!"
svgdreamer/painter/painter_params.py CHANGED
@@ -301,7 +301,7 @@ class Painter(DiffVGState):
301
  fpath: The path to save the reinitialized SVG.
302
  """
303
  if self.style not in ['iconography', 'low-poly', 'painting', 'ink']:
304
- return
305
 
306
  def get_keys_below_threshold(my_dict, threshold):
307
  keys_below_threshold = [key for key, value in my_dict.items() if value < threshold]
@@ -360,7 +360,8 @@ class Painter(DiffVGState):
360
  if path.id in reinit_union:
361
  coord = [i, i] if self.style == 'low-poly' else None
362
  self.shapes[i] = self.get_path(coord=coord)
363
- # update coords
 
364
  self.shapes[i].points.requires_grad = True
365
  extra_point_params.append(self.shapes[i].points)
366
  if self.style == 'painting':
@@ -377,7 +378,7 @@ class Painter(DiffVGState):
377
  shape_ids=torch.tensor(list(shp_ids)),
378
  fill_color=fill_color_init,
379
  stroke_color=None)
380
- # requires gradients
381
  self.shape_groups[i].fill_color.requires_grad = True
382
  extra_color_params.append(self.shape_groups[i].fill_color)
383
  elif self.style in ['painting']:
@@ -387,7 +388,7 @@ class Painter(DiffVGState):
387
  shape_ids=torch.tensor([len(self.shapes) - 1]),
388
  fill_color=None,
389
  stroke_color=stroke_color_init)
390
- # requires gradients
391
  self.shape_groups[i].stroke_color.requires_grad = True
392
  extra_color_params.append(self.shape_groups[i].stroke_color)
393
  elif self.style in ['ink']:
@@ -397,7 +398,7 @@ class Painter(DiffVGState):
397
  shape_ids=torch.tensor([len(self.shapes) - 1]),
398
  fill_color=None,
399
  stroke_color=stroke_color_init)
400
- # requires gradients
401
  self.shape_groups[i].stroke_color.requires_grad = True
402
  extra_color_params.append(self.shape_groups[i].stroke_color)
403
 
@@ -685,11 +686,11 @@ class PainterOptimizer:
685
  self.point_scheduler = LambdaLR(self.point_optimizer, lr_lambda=self.lr_lambda, last_epoch=-1)
686
 
687
  def add_params(self, point_params, color_params, width_params):
688
- if len(point_params) > 0:
689
  self.point_optimizer.add_param_group({f'params': point_params})
690
- if len(color_params) > 0:
691
  self.color_optimizer.add_param_group({f'params': color_params})
692
- if len(width_params) > 0:
693
  self.width_optimizer.add_param_group({f'params': width_params})
694
 
695
  def update_lr(self):
 
301
  fpath: The path to save the reinitialized SVG.
302
  """
303
  if self.style not in ['iconography', 'low-poly', 'painting', 'ink']:
304
+ return None, None, None
305
 
306
  def get_keys_below_threshold(my_dict, threshold):
307
  keys_below_threshold = [key for key, value in my_dict.items() if value < threshold]
 
360
  if path.id in reinit_union:
361
  coord = [i, i] if self.style == 'low-poly' else None
362
  self.shapes[i] = self.get_path(coord=coord)
363
+ # new point
364
+ self.shapes[i].id = path.id
365
  self.shapes[i].points.requires_grad = True
366
  extra_point_params.append(self.shapes[i].points)
367
  if self.style == 'painting':
 
378
  shape_ids=torch.tensor(list(shp_ids)),
379
  fill_color=fill_color_init,
380
  stroke_color=None)
381
+ # new shape
382
  self.shape_groups[i].fill_color.requires_grad = True
383
  extra_color_params.append(self.shape_groups[i].fill_color)
384
  elif self.style in ['painting']:
 
388
  shape_ids=torch.tensor([len(self.shapes) - 1]),
389
  fill_color=None,
390
  stroke_color=stroke_color_init)
391
+ # new shape
392
  self.shape_groups[i].stroke_color.requires_grad = True
393
  extra_color_params.append(self.shape_groups[i].stroke_color)
394
  elif self.style in ['ink']:
 
398
  shape_ids=torch.tensor([len(self.shapes) - 1]),
399
  fill_color=None,
400
  stroke_color=stroke_color_init)
401
+ # new shape
402
  self.shape_groups[i].stroke_color.requires_grad = True
403
  extra_color_params.append(self.shape_groups[i].stroke_color)
404
 
 
686
  self.point_scheduler = LambdaLR(self.point_optimizer, lr_lambda=self.lr_lambda, last_epoch=-1)
687
 
688
  def add_params(self, point_params, color_params, width_params):
689
+ if point_params is not None and len(point_params) > 0:
690
  self.point_optimizer.add_param_group({f'params': point_params})
691
+ if color_params is not None and len(color_params) > 0:
692
  self.color_optimizer.add_param_group({f'params': color_params})
693
+ if width_params is not None and len(width_params) > 0:
694
  self.width_optimizer.add_param_group({f'params': width_params})
695
 
696
  def update_lr(self):
svgdreamer/pipelines/SVGDreamer_pipeline.py CHANGED
@@ -92,10 +92,6 @@ class SVGDreamerPipeline(ModelState):
92
  self.vpsd_cfg = self.x_cfg.vpsd
93
  self.vpsd_optim = self.x_cfg.vpsd_stage_optim
94
 
95
- if self.style == "pixelart":
96
- self.x_cfg.sive_stage_optim.lr_schedule = False
97
- self.x_cfg.vpsd_stage_optim.lr_schedule = False
98
-
99
  def painterly_rendering(self, text_prompt: str, target_file: AnyPath = None):
100
  # log prompts
101
  self.print(f"prompt: {text_prompt}")
@@ -132,9 +128,9 @@ class SVGDreamerPipeline(ModelState):
132
  merged_images = []
133
  for i in range(self.vpsd_cfg.n_particle):
134
  select_sample_path = self.result_path / f'select_sample_{i}.png'
135
-
136
  # generate sample and attention map
137
- fg_attn_map, bg_attn_map, controller = self.extract_ldm_attn(self.x_cfg.sive_model_cfg,
 
138
  pipeline,
139
  text_prompt,
140
  select_sample_path,
@@ -146,7 +142,8 @@ class SVGDreamerPipeline(ModelState):
146
  self.print(f"load target file from: {select_sample_path.as_posix()}")
147
 
148
  # get objects by attention map
149
- fg_img, bg_img, fg_mask, bg_mask = self.extract_object(select_img, fg_attn_map, bg_attn_map, iter=i)
 
150
  self.print(f"fg_img shape: {fg_img.shape}, bg_img: {bg_img.shape}")
151
 
152
  # background rendering
@@ -641,7 +638,7 @@ class SVGDreamerPipeline(ModelState):
641
 
642
  # save final
643
  for i, r in enumerate(renderers):
644
- ft_svg_path = self.result_path / f"finetune_final_p_{i}.svg"
645
  r.pretty_save_svg(ft_svg_path)
646
  # save SVGs
647
  torchvision.utils.save_image(raster_imgs, fp=self.result_path / f'all_particles.png')
@@ -683,10 +680,10 @@ class SVGDreamerPipeline(ModelState):
683
  return target_img
684
 
685
  def extract_object(self,
 
686
  select_img: torch.Tensor,
687
  fg_attn_map: np.ndarray,
688
  bg_attn_map: np.ndarray,
689
- iter: Union[str, int],
690
  tau: float = 0.2):
691
  # attention to mask
692
  bool_fg_attn_map = fg_attn_map > tau
@@ -755,6 +752,7 @@ class SVGDreamerPipeline(ModelState):
755
  return fg_img_final, bg_img_final, fg_mask, bg_mask
756
 
757
  def extract_ldm_attn(self,
 
758
  model_cfg: omegaconf.DictConfig,
759
  pipeline: DiffusionPipeline,
760
  prompts: str,
@@ -762,7 +760,7 @@ class SVGDreamerPipeline(ModelState):
762
  attn_init_cfg: omegaconf.DictConfig,
763
  image_size: int,
764
  token_ind: int,
765
- attn_init: bool = True, ):
766
  if token_ind <= 0:
767
  raise ValueError("The 'token_ind' should be greater than 0")
768
 
@@ -837,7 +835,7 @@ class SVGDreamerPipeline(ModelState):
837
  self_attn_vis = np.copy(self_attn)
838
  self_attn_vis = self_attn_vis * 255
839
  self_attn_vis = np.repeat(np.expand_dims(self_attn_vis, axis=2), 3, axis=2).astype(np.uint8)
840
- view_images(self_attn_vis, save_image=True, fp=self.sive_attn_dir / "self-attn-final.png")
841
 
842
  """get final attention map"""
843
  attn_map = attn_init_cfg.attn_coeff * cross_attn_map + (1 - attn_init_cfg.attn_coeff) * self_attn
@@ -847,7 +845,7 @@ class SVGDreamerPipeline(ModelState):
847
  attn_map_vis = np.copy(attn_map)
848
  attn_map_vis = attn_map_vis * 255
849
  attn_map_vis = np.repeat(np.expand_dims(attn_map_vis, axis=2), 3, axis=2).astype(np.uint8)
850
- view_images(attn_map_vis, save_image=True, fp=self.sive_attn_dir / 'fusion-attn.png')
851
 
852
  # inverse fusion-attention to [0, 1]
853
  inverse_attn = 1 - attn_map
@@ -855,7 +853,8 @@ class SVGDreamerPipeline(ModelState):
855
  reversed_attn_map_vis = np.copy(inverse_attn)
856
  reversed_attn_map_vis = reversed_attn_map_vis * 255
857
  reversed_attn_map_vis = np.repeat(np.expand_dims(reversed_attn_map_vis, axis=2), 3, axis=2).astype(np.uint8)
858
- view_images(reversed_attn_map_vis, save_image=True, fp=self.sive_attn_dir / 'reversed-fusion-attn.png')
 
859
 
860
  self.print(f"-> fusion attn_map: {attn_map.shape}")
861
  else:
 
92
  self.vpsd_cfg = self.x_cfg.vpsd
93
  self.vpsd_optim = self.x_cfg.vpsd_stage_optim
94
 
 
 
 
 
95
  def painterly_rendering(self, text_prompt: str, target_file: AnyPath = None):
96
  # log prompts
97
  self.print(f"prompt: {text_prompt}")
 
128
  merged_images = []
129
  for i in range(self.vpsd_cfg.n_particle):
130
  select_sample_path = self.result_path / f'select_sample_{i}.png'
 
131
  # generate sample and attention map
132
+ fg_attn_map, bg_attn_map, controller = self.extract_ldm_attn(i,
133
+ self.x_cfg.sive_model_cfg,
134
  pipeline,
135
  text_prompt,
136
  select_sample_path,
 
142
  self.print(f"load target file from: {select_sample_path.as_posix()}")
143
 
144
  # get objects by attention map
145
+ fg_img, bg_img, fg_mask, bg_mask = self.extract_object(i, select_img, fg_attn_map, bg_attn_map,
146
+ tau=self.sive_cfg.mask_tau)
147
  self.print(f"fg_img shape: {fg_img.shape}, bg_img: {bg_img.shape}")
148
 
149
  # background rendering
 
638
 
639
  # save final
640
  for i, r in enumerate(renderers):
641
+ ft_svg_path = self.result_path / f"finetune_final_p{i}.svg"
642
  r.pretty_save_svg(ft_svg_path)
643
  # save SVGs
644
  torchvision.utils.save_image(raster_imgs, fp=self.result_path / f'all_particles.png')
 
680
  return target_img
681
 
682
  def extract_object(self,
683
+ iter: Union[str, int],
684
  select_img: torch.Tensor,
685
  fg_attn_map: np.ndarray,
686
  bg_attn_map: np.ndarray,
 
687
  tau: float = 0.2):
688
  # attention to mask
689
  bool_fg_attn_map = fg_attn_map > tau
 
752
  return fg_img_final, bg_img_final, fg_mask, bg_mask
753
 
754
  def extract_ldm_attn(self,
755
+ iter: int,
756
  model_cfg: omegaconf.DictConfig,
757
  pipeline: DiffusionPipeline,
758
  prompts: str,
 
760
  attn_init_cfg: omegaconf.DictConfig,
761
  image_size: int,
762
  token_ind: int,
763
+ attn_init: bool = True):
764
  if token_ind <= 0:
765
  raise ValueError("The 'token_ind' should be greater than 0")
766
 
 
835
  self_attn_vis = np.copy(self_attn)
836
  self_attn_vis = self_attn_vis * 255
837
  self_attn_vis = np.repeat(np.expand_dims(self_attn_vis, axis=2), 3, axis=2).astype(np.uint8)
838
+ view_images(self_attn_vis, save_image=True, fp=self.sive_attn_dir / f"self-attn-final-{iter}.png")
839
 
840
  """get final attention map"""
841
  attn_map = attn_init_cfg.attn_coeff * cross_attn_map + (1 - attn_init_cfg.attn_coeff) * self_attn
 
845
  attn_map_vis = np.copy(attn_map)
846
  attn_map_vis = attn_map_vis * 255
847
  attn_map_vis = np.repeat(np.expand_dims(attn_map_vis, axis=2), 3, axis=2).astype(np.uint8)
848
+ view_images(attn_map_vis, save_image=True, fp=self.sive_attn_dir / f'fusion-attn-{iter}.png')
849
 
850
  # inverse fusion-attention to [0, 1]
851
  inverse_attn = 1 - attn_map
 
853
  reversed_attn_map_vis = np.copy(inverse_attn)
854
  reversed_attn_map_vis = reversed_attn_map_vis * 255
855
  reversed_attn_map_vis = np.repeat(np.expand_dims(reversed_attn_map_vis, axis=2), 3, axis=2).astype(np.uint8)
856
+ view_images(reversed_attn_map_vis, save_image=True,
857
+ fp=self.sive_attn_dir / f'reversed-fusion-attn-{iter}.png')
858
 
859
  self.print(f"-> fusion attn_map: {attn_map.shape}")
860
  else: