chaojiemao
commited on
Commit
•
c993df5
1
Parent(s):
3f04824
init
Browse files- README.md +93 -0
- configuration.json +1 -0
- datasets/stylebooth_dataset.zip +3 -0
- models/stylebooth-tb-5000-0.bin +3 -0
- tuners/clay_style_edit/0_SwiftLoRA/adapter_config.json +32 -0
- tuners/clay_style_edit/0_SwiftLoRA/adapter_model.bin +3 -0
- tuners/clay_style_edit/README.md +168 -0
- tuners/clay_style_edit/configuration.json +1 -0
- tuners/clay_style_edit/image.jpg +0 -0
- tuners/clay_style_edit/params.yaml +32 -0
README.md
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
frameworks:
|
3 |
+
- Pytorch
|
4 |
+
license: apache-2.0
|
5 |
+
tasks:
|
6 |
+
- image-style-transfer
|
7 |
+
|
8 |
+
#model-type:
|
9 |
+
##如 gpt、phi、llama、chatglm、baichuan 等
|
10 |
+
#- gpt
|
11 |
+
|
12 |
+
#domain:
|
13 |
+
##如 nlp、cv、audio、multi-modal
|
14 |
+
#- nlp
|
15 |
+
|
16 |
+
#language:
|
17 |
+
##语言代码列表 https://help.aliyun.com/document_detail/215387.html?spm=a2c4g.11186623.0.0.9f8d7467kni6Aa
|
18 |
+
#- cn
|
19 |
+
|
20 |
+
#metrics:
|
21 |
+
##如 CIDEr、Blue、ROUGE 等
|
22 |
+
#- CIDEr
|
23 |
+
|
24 |
+
#tags:
|
25 |
+
##各种自定义,包括 pretrained、fine-tuned、instruction-tuned、RL-tuned 等训练方法和其他
|
26 |
+
#- pretrained
|
27 |
+
|
28 |
+
#tools:
|
29 |
+
##如 vllm、fastchat、llamacpp、AdaSeq 等
|
30 |
+
#- vllm
|
31 |
+
---
|
32 |
+
# StyleBooth: Image Style Editing with Multimodal Instruction
|
33 |
+
|
34 |
+
## Run StyleBooth
|
35 |
+
- Code implementation: See model configuration and code based on [🪄SCEPTER](https://github.com/modelscope/scepter).
|
36 |
+
|
37 |
+
- Demo: Try [🖥️SCEPTER Studio](https://github.com/modelscope/scepter/tree/main?tab=readme-ov-file#%EF%B8%8F-scepter-studio).
|
38 |
+
|
39 |
+
- Easy run:
|
40 |
+
Try the following example script to run StyleBooth modified from [tests/modules/test_diffusion_inference.py](https://github.com/modelscope/scepter/blob/main/tests/modules/test_diffusion_inference.py):
|
41 |
+
|
42 |
+
```python
|
43 |
+
# `pip install scepter>0.0.4` or
|
44 |
+
# clone newest SCEPTER and run `PYTHONPATH=./ python <this_script>` at the main branch root.
|
45 |
+
import os
|
46 |
+
import unittest
|
47 |
+
|
48 |
+
from PIL import Image
|
49 |
+
from torchvision.utils import save_image
|
50 |
+
|
51 |
+
from scepter.modules.inference.stylebooth_inference import StyleboothInference
|
52 |
+
from scepter.modules.utils.config import Config
|
53 |
+
from scepter.modules.utils.file_system import FS
|
54 |
+
from scepter.modules.utils.logger import get_logger
|
55 |
+
|
56 |
+
|
57 |
+
class DiffusionInferenceTest(unittest.TestCase):
|
58 |
+
def setUp(self):
|
59 |
+
print(('Testing %s.%s' % (type(self).__name__, self._testMethodName)))
|
60 |
+
self.logger = get_logger(name='scepter')
|
61 |
+
config_file = 'scepter/methods/studio/scepter_ui.yaml'
|
62 |
+
cfg = Config(cfg_file=config_file)
|
63 |
+
if 'FILE_SYSTEM' in cfg:
|
64 |
+
for fs_info in cfg['FILE_SYSTEM']:
|
65 |
+
FS.init_fs_client(fs_info)
|
66 |
+
self.tmp_dir = './cache/save_data/diffusion_inference'
|
67 |
+
if not os.path.exists(self.tmp_dir):
|
68 |
+
os.makedirs(self.tmp_dir)
|
69 |
+
|
70 |
+
def tearDown(self):
|
71 |
+
super().tearDown()
|
72 |
+
|
73 |
+
# uncomment this line to skip this module.
|
74 |
+
# @unittest.skip('')
|
75 |
+
def test_stylebooth(self):
|
76 |
+
config_file = 'scepter/methods/studio/inference/edit/stylebooth_tb_pro.yaml'
|
77 |
+
cfg = Config(cfg_file=config_file)
|
78 |
+
diff_infer = StyleboothInference(logger=self.logger)
|
79 |
+
diff_infer.init_from_cfg(cfg)
|
80 |
+
|
81 |
+
output = diff_infer({'prompt': 'Let this image be in the style of sai-lowpoly'},
|
82 |
+
style_edit_image=Image.open('asset/images/inpainting_text_ref/ex4_scene_im.jpg'),
|
83 |
+
style_guide_scale_text=7.5,
|
84 |
+
style_guide_scale_image=1.5,
|
85 |
+
stylebooth_state=True)
|
86 |
+
save_path = os.path.join(self.tmp_dir,
|
87 |
+
'stylebooth_test_lowpoly_cute_dog.png')
|
88 |
+
save_image(output['images'], save_path)
|
89 |
+
|
90 |
+
|
91 |
+
if __name__ == '__main__':
|
92 |
+
unittest.main()
|
93 |
+
```
|
configuration.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"framework":"Pytorch","task":"image-style-transfer"}
|
datasets/stylebooth_dataset.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:888263d7c24de3b4000ba8714d74e2051ce2b2e88dc593786478fd12441d2204
|
3 |
+
size 3273029877
|
models/stylebooth-tb-5000-0.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a89eba48e77030f312f1834de44acfe8fc64a452f4f61d05776b45a18f530ae
|
3 |
+
size 4265309292
|
tuners/clay_style_edit/0_SwiftLoRA/adapter_config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": null,
|
5 |
+
"bias": "none",
|
6 |
+
"enable_lora": null,
|
7 |
+
"fan_in_fan_out": false,
|
8 |
+
"inference_mode": false,
|
9 |
+
"init_lora_weights": true,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 256,
|
14 |
+
"lora_dropout": 0.0,
|
15 |
+
"lora_dtype": null,
|
16 |
+
"lr_ratio": 16.0,
|
17 |
+
"megatron_config": null,
|
18 |
+
"megatron_core": "megatron.core",
|
19 |
+
"model_key_mapping": null,
|
20 |
+
"modules_to_save": null,
|
21 |
+
"peft_type": "LORA",
|
22 |
+
"r": 256,
|
23 |
+
"rank_pattern": {},
|
24 |
+
"revision": null,
|
25 |
+
"swift_type": "LORA",
|
26 |
+
"target_modules": "model.*(to_q|to_k|to_v|to_out.0|net.0.proj|net.2)$",
|
27 |
+
"task_type": null,
|
28 |
+
"use_dora": false,
|
29 |
+
"use_merged_linear": false,
|
30 |
+
"use_qa_lora": false,
|
31 |
+
"use_rslora": false
|
32 |
+
}
|
tuners/clay_style_edit/0_SwiftLoRA/adapter_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c929890675f2463b7120dc27d2516627cde5d6dd16588f13a3fb1fbd851e6ee
|
3 |
+
size 383114637
|
tuners/clay_style_edit/README.md
ADDED
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
frameworks:
|
3 |
+
- Pytorch
|
4 |
+
license: apache-2.0
|
5 |
+
tasks:
|
6 |
+
- efficient-diffusion-tuning
|
7 |
+
---
|
8 |
+
|
9 |
+
<p align="center">
|
10 |
+
|
11 |
+
<h2 align="center">clay_style_edit</h2>
|
12 |
+
<p align="center">
|
13 |
+
<br>
|
14 |
+
<a href="https://github.com/modelscope/scepter/"><img src="https://img.shields.io/badge/powered by-scepter-6FEBB9.svg"></a>
|
15 |
+
<br>
|
16 |
+
</p>
|
17 |
+
|
18 |
+
## Model Introduction
|
19 |
+
Transfer images into clay style
|
20 |
+
|
21 |
+
## Model Parameters
|
22 |
+
<table>
|
23 |
+
<thead>
|
24 |
+
<tr>
|
25 |
+
<th rowspan="2">Base Model</th>
|
26 |
+
<th rowspan="2">Tuner Type</th>
|
27 |
+
<th colspan="4">Training Parameters</th>
|
28 |
+
</tr>
|
29 |
+
<tr>
|
30 |
+
<th>Batch Size</th>
|
31 |
+
<th>Epochs</th>
|
32 |
+
<th>Learning Rate</th>
|
33 |
+
<th>Resolution</th>
|
34 |
+
</tr>
|
35 |
+
</thead>
|
36 |
+
<tbody align="center">
|
37 |
+
<tr>
|
38 |
+
<td rowspan="8">EDIT</td>
|
39 |
+
<td>LORA</td>
|
40 |
+
<td>1</td>
|
41 |
+
<td>50</td>
|
42 |
+
<td>0.0001</td>
|
43 |
+
<td>[512, 512]</td>
|
44 |
+
</tr>
|
45 |
+
</tbody>
|
46 |
+
</table>
|
47 |
+
|
48 |
+
|
49 |
+
<table>
|
50 |
+
<thead>
|
51 |
+
<tr>
|
52 |
+
<th>Data Type</th>
|
53 |
+
<th>Data Space</th>
|
54 |
+
<th>Data Name</th>
|
55 |
+
<th>Data Subset</th>
|
56 |
+
</tr>
|
57 |
+
</thead>
|
58 |
+
<tbody align="center">
|
59 |
+
<tr>
|
60 |
+
<td>Image Edit Generation</td>
|
61 |
+
<td></td>
|
62 |
+
<td>clay-v1-20240527_16_06_41</td>
|
63 |
+
<td>default</td>
|
64 |
+
</tr>
|
65 |
+
</tbody>
|
66 |
+
</table>
|
67 |
+
|
68 |
+
|
69 |
+
## Model Performance
|
70 |
+
Given the input "Convert this image into clay style," the following image may be generated:
|
71 |
+
|
72 |
+
![image](./image.jpg)
|
73 |
+
|
74 |
+
## Model Usage
|
75 |
+
### Command Line Execution
|
76 |
+
* Run using Scepter's SDK, taking care to use different configuration files in accordance with the different base models, as per the corresponding relationships shown below
|
77 |
+
<table>
|
78 |
+
<thead>
|
79 |
+
<tr>
|
80 |
+
<th rowspan="2">Base Model</th>
|
81 |
+
<th rowspan="1">LORA</th>
|
82 |
+
<th colspan="1">SCE</th>
|
83 |
+
<th colspan="1">TEXT_LORA</th>
|
84 |
+
<th colspan="1">TEXT_SCE</th>
|
85 |
+
</tr>
|
86 |
+
</thead>
|
87 |
+
<tbody align="center">
|
88 |
+
<tr>
|
89 |
+
<td rowspan="8">SD1.5</td>
|
90 |
+
<td><a href="https://github.com/modelscope/scepter/blob/main/scepter/methods/examples/generation/stable_diffusion_1.5_512_lora.yaml">lora_cfg</a></td>
|
91 |
+
<td><a href="https://github.com/modelscope/scepter/blob/main/scepter/methods/scedit/t2i/sd15_512_sce_t2i_swift.yaml">sce_cfg</a></td>
|
92 |
+
<td><a href="https://github.com/modelscope/scepter/blob/main/scepter/methods/examples/generation/stable_diffusion_1.5_512_text_lora.yaml">text_lora_cfg</a></td>
|
93 |
+
<td><a href="https://github.com/modelscope/scepter/blob/main/scepter/methods/scedit/t2i/stable_diffusion_1.5_512_text_sce.yaml">text_sce_cfg</a></td>
|
94 |
+
</tr>
|
95 |
+
</tbody>
|
96 |
+
<tbody align="center">
|
97 |
+
<tr>
|
98 |
+
<td rowspan="8">SD2.1</td>
|
99 |
+
<td><a href="https://github.com/modelscope/scepter/blob/main/scepter/methods/examples/generation/stable_diffusion_2.1_768_lora.yaml">lora_cfg</a></td>
|
100 |
+
<td><a href="https://github.com/modelscope/scepter/blob/main/scepter/methods/scedit/t2i/sd21_768_sce_t2i_swift.yaml">sce_cfg</a></td>
|
101 |
+
<td><a href="https://github.com/modelscope/scepter/blob/main/scepter/methods/examples/generation/stable_diffusion_2.1_768_text_lora.yaml">text_lora_cfg</a></td>
|
102 |
+
<td><a href="https://github.com/modelscope/scepter/blob/main/scepter/methods/scedit/t2i/sd21_768_text_sce_t2i_swift.yaml">text_sce_cfg</a></td>
|
103 |
+
</tr>
|
104 |
+
</tbody>
|
105 |
+
<tbody align="center">
|
106 |
+
<tr>
|
107 |
+
<td rowspan="8">SDXL</td>
|
108 |
+
<td><a href="https://github.com/modelscope/scepter/blob/main/scepter/methods/examples/generation/stable_diffusion_xl_1024_lora.yaml">lora_cfg</a></td>
|
109 |
+
<td><a href="https://github.com/modelscope/scepter/blob/main/scepter/methods/scedit/t2i/sdxl_1024_sce_t2i_swift.yaml">sce_cfg</a></td>
|
110 |
+
<td><a href="https://github.com/modelscope/scepter/blob/main/scepter/methods/examples/generation/stable_diffusion_xl_1024_text_lora.yaml">text_lora_cfg</a></td>
|
111 |
+
<td><a href="https://github.com/modelscope/scepter/blob/main/scepter/methods/scedit/t2i/sdxl_1024_text_sce_t2i_swift.yaml">text_sce_cfg</a></td>
|
112 |
+
</tr>
|
113 |
+
</tbody>
|
114 |
+
</table>
|
115 |
+
|
116 |
+
* Running from Source Code
|
117 |
+
|
118 |
+
```shell
|
119 |
+
git clone https://github.com/modelscope/scepter.git
|
120 |
+
cd scepter
|
121 |
+
pip install -r requirements/recommended.txt
|
122 |
+
PYTHONPATH=. python scepter/tools/run_inference.py
|
123 |
+
--pretrained_model {this model folder}
|
124 |
+
--cfg {lora_cfg} or {sce_cfg} or {text_lora_cfg} or {text_sce_cfg}
|
125 |
+
--prompt 'Convert this image into clay style'
|
126 |
+
--save_folder 'inference'
|
127 |
+
```
|
128 |
+
|
129 |
+
* Running after Installing Scepter (Recommended)
|
130 |
+
```shell
|
131 |
+
pip install scepter
|
132 |
+
python -m scepter/tools/run_inference.py
|
133 |
+
--pretrained_model {this model folder}
|
134 |
+
--cfg {lora_cfg} or {sce_cfg} or {text_lora_cfg} or {text_sce_cfg}
|
135 |
+
--prompt 'Convert this image into clay style'
|
136 |
+
--save_folder 'inference'
|
137 |
+
```
|
138 |
+
### Running with Scepter Studio
|
139 |
+
|
140 |
+
```shell
|
141 |
+
pip install scepter
|
142 |
+
# Launch Scepter Studio
|
143 |
+
python -m scepter.tools.webui
|
144 |
+
```
|
145 |
+
|
146 |
+
* Refer to the following guides for model usage.
|
147 |
+
|
148 |
+
(video url)
|
149 |
+
|
150 |
+
## Model Reference
|
151 |
+
If you wish to use this model for your own purposes, please cite it as follows.
|
152 |
+
```bibtex
|
153 |
+
@misc{clay_style_edit,
|
154 |
+
title = {clay_style_edit, {MODEL_URL}},
|
155 |
+
author = {{USER_NAME}},
|
156 |
+
year = {2024}
|
157 |
+
}
|
158 |
+
```
|
159 |
+
This model was trained using [Scepter Studio](https://github.com/modelscope/scepter); [Scepter](https://github.com/modelscope/scepter)
|
160 |
+
is an algorithm framework and toolbox developed by the Alibaba Tongyi Wanxiang Team. It provides a suite of tools and models for image generation, editing, fine-tuning, data processing, and more. If you find our work beneficial for your research,
|
161 |
+
please cite as follows.
|
162 |
+
```bibtex
|
163 |
+
@misc{scepter,
|
164 |
+
title = {SCEPTER, https://github.com/modelscope/scepter},
|
165 |
+
author = {SCEPTER},
|
166 |
+
year = {2023}
|
167 |
+
}
|
168 |
+
```
|
tuners/clay_style_edit/configuration.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
tuners/clay_style_edit/image.jpg
ADDED
tuners/clay_style_edit/params.yaml
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
DESCRIPTION: Transfer images into clay style
|
2 |
+
PARAMS:
|
3 |
+
base_model: edit
|
4 |
+
base_model_revision: EDIT
|
5 |
+
bucket_no_upscale: false
|
6 |
+
bucket_resolution_steps: 64.0
|
7 |
+
data_source: Dataset Management
|
8 |
+
data_type: Image Edit Generation
|
9 |
+
enable_resolution_bucket: false
|
10 |
+
eval_prompts: Convert this image into clay style
|
11 |
+
learning_rate: 0.0001
|
12 |
+
lora_alpha: 256.0
|
13 |
+
lora_rank: 256.0
|
14 |
+
max_bucket_resolution: 1024.0
|
15 |
+
min_bucket_resolution: 256.0
|
16 |
+
ms_data_space: ''
|
17 |
+
ms_data_subname: default
|
18 |
+
ori_data_name: clay-v1-20240527_16_06_41
|
19 |
+
prompt_prefix: ''
|
20 |
+
push_to_hub: false
|
21 |
+
replace_keywords: ''
|
22 |
+
resolution_height: 512
|
23 |
+
resolution_width: 512
|
24 |
+
save_interval: 25
|
25 |
+
sce_ratio: 1
|
26 |
+
text_lora_alpha: 256.0
|
27 |
+
text_lora_rank: 256.0
|
28 |
+
train_batch_size: 1
|
29 |
+
train_epoch: 50
|
30 |
+
tuner_name: LORA
|
31 |
+
work_dir: ''
|
32 |
+
work_name: ''
|