Upload README.md with huggingface_hub
Browse files
README.md
CHANGED
@@ -14,12 +14,16 @@ pipeline_tag: text-to-image
|
|
14 |
</div>
|
15 |
|
16 |
## Introduction
|
17 |
-
This open-source project is based on Qwen-Image and has attempted model pruning, removing 20 layers while retaining the weights of 40 layers, resulting in a model size of 13.3B parameters. The pruned model has experienced a slight drop in objective metrics. The pruned version will continue to be iterated upon. Additionally, the pruned version supports the adaptation and loading of community models such as LoRA and ControlNet. Please stay tuned. For the relevant inference scripts, please refer to https://github.com/OPPO-Mente-Lab/Qwen-Image-Pruning
|
18 |
|
19 |
<div align="center">
|
20 |
<img src="bench.png">
|
21 |
</div>
|
22 |
|
|
|
|
|
|
|
|
|
23 |
## Quick Start
|
24 |
|
25 |
Install the latest version of diffusers and pytorch
|
@@ -33,32 +37,26 @@ pip install git+https://github.com/huggingface/diffusers
|
|
33 |
import torch
|
34 |
import os
|
35 |
from diffusers import DiffusionPipeline
|
36 |
-
|
37 |
model_name = "OPPOer/Qwen-Image-Pruning"
|
38 |
-
|
39 |
if torch.cuda.is_available():
|
40 |
torch_dtype = torch.bfloat16
|
41 |
device = "cuda"
|
42 |
else:
|
43 |
torch_dtype = torch.bfloat16
|
44 |
device = "cpu"
|
45 |
-
|
46 |
pipe = DiffusionPipeline.from_pretrained(model_name, torch_dtype=torch_dtype)
|
47 |
pipe = pipe.to(device)
|
48 |
-
|
49 |
# Generate image
|
50 |
positive_magic = {"en": ", Ultra HD, 4K, cinematic composition.", # for english prompt,
|
51 |
"zh": ",超清,4K,电影级构图。" # for chinese prompt,
|
52 |
}
|
53 |
negative_prompt = " "
|
54 |
-
|
55 |
prompts = [
|
56 |
'一个穿着"QWEN"标志的T恤的中国美女正拿着黑色的马克笔面相镜头微笑。她身后的玻璃板上手写体写着 "一、Qwen-Image的技术路线: 探索视觉生成基础模型的极限,开创理解与生成一体化的未来。二、Qwen-Image的模型特色:1、复杂文字渲染。支持中英渲染、自动布局; 2、精准图像编辑。支持文字编辑、物体增减、风格变换。三、Qwen-Image的未来愿景:赋能专业内容创作、助力生成式AI发展。"',
|
57 |
'海报,温馨家庭场景,柔和阳光洒在野餐布上,色彩温暖明亮,主色调为浅黄、米白与淡绿,点缀着鲜艳的水果和野花,营造轻松愉快的氛围,画面简洁而富有层次,充满生活气息,传达家庭团聚与自然和谐的主题。文字内容:“共享阳光,共享爱。全家一起野餐,享受美好时光。让每一刻都充满欢笑与温暖。”',
|
58 |
'一个穿着校服的年轻女孩站在教室里,在黑板上写字。黑板中央用整洁的白粉笔写着“Introducing Qwen-Image, a foundational image generation model that excels in complex text rendering and precise image editing”。柔和的自然光线透过窗户,投下温柔的阴影。场景以写实的摄影风格呈现,细节精细,景深浅,色调温暖。女孩专注的表情和空气中的粉笔灰增添了动感。背景元素包括课桌和教育海报,略微模糊以突出中心动作。超精细32K分辨率,单反质量,柔和的散景效果,纪录片式的构图。',
|
59 |
'一个台球桌上放着两排台球,每排5个,第一行的台球上面分别写着"Qwen""Image" "将 "于" "8" ,第二排台球上面分别写着"月" "正" "式" "发" "布" 。',
|
60 |
]
|
61 |
-
|
62 |
output_dir = 'examples_Pruning'
|
63 |
os.makedirs(output_dir, exist_ok=True)
|
64 |
for prompt in prompts:
|
@@ -80,34 +78,28 @@ for prompt in prompts:
|
|
80 |
import torch
|
81 |
import os
|
82 |
from diffusers import DiffusionPipeline
|
83 |
-
|
84 |
model_name = "OPPOer/Qwen-Image-Pruning"
|
85 |
lora_name = 'flymy_realism.safetensors'
|
86 |
-
|
87 |
if torch.cuda.is_available():
|
88 |
torch_dtype = torch.bfloat16
|
89 |
device = "cuda"
|
90 |
else:
|
91 |
torch_dtype = torch.bfloat16
|
92 |
device = "cpu"
|
93 |
-
|
94 |
pipe = DiffusionPipeline.from_pretrained(model_name, torch_dtype=torch_dtype)
|
95 |
pipe = pipe.to(device)
|
96 |
pipe.load_lora_weights(lora_name, adapter_name="lora")
|
97 |
-
|
98 |
# Generate image
|
99 |
positive_magic = {"en": ", Ultra HD, 4K, cinematic composition.", # for english prompt,
|
100 |
"zh": ",超清,4K,电影级构图。" # for chinese prompt,
|
101 |
}
|
102 |
negative_prompt = " "
|
103 |
-
|
104 |
prompts = [
|
105 |
'一个穿着"QWEN"标志的T恤的中国美女正拿着黑色的马克笔面相镜头微笑。她身后的玻璃板上手写体写着 "一、Qwen-Image的技术路线: 探索视觉生成基础��型的极限,开创理解与生成一体化的未来。二、Qwen-Image的模型特色:1、复杂文字渲染。支持中英渲染、自动布局; 2、精准图像编辑。支持文字编辑、物体增减、风格变换。三、Qwen-Image的未来愿景:赋能专业内容创作、助力生成式AI发展。"',
|
106 |
'海报,温馨家庭场景,柔和阳光洒在野餐布上,色彩温暖明亮,主色调为浅黄、米白与淡绿,点缀着鲜艳的水果和野花,营造轻松愉快的氛围,画面简洁而富有层次,充满生活气息,传达家庭团聚与自然和谐的主题。文字内容:“共享阳光,共享爱。全家一起野餐,享受美好时光。让每一刻都充满欢笑与温暖。”',
|
107 |
'一个穿着校服的年轻女孩站在教室里,在黑板上写字。黑板中央用整洁的白粉笔写着“Introducing Qwen-Image, a foundational image generation model that excels in complex text rendering and precise image editing”。柔和的自然光线透过窗户,投下温柔的阴影。场景以写实的摄影风格呈现,细节精细,景深浅,色调温暖。女孩专注的表情和空气中的粉笔灰增添了动感。背景元素包括课桌和教育海报,略微模糊以突出中心动作。超精细32K分辨率,单反质量,柔和的散景效果,纪录片式的构图。',
|
108 |
'一个台球桌上放着两排台球,每排5个,第一行的台球上面分别写着"Qwen""Image" "将 "于" "8" ,第二排台球上面分别写着"月" "正" "式" "发" "布" 。',
|
109 |
]
|
110 |
-
|
111 |
output_dir = 'examples_Pruning+Realism_LoRA'
|
112 |
os.makedirs(output_dir, exist_ok=True)
|
113 |
for prompt in prompts:
|
@@ -128,16 +120,12 @@ for prompt in prompts:
|
|
128 |
```python
|
129 |
import os
|
130 |
import glob
|
131 |
-
|
132 |
import torch
|
133 |
from diffusers import DiffusionPipeline
|
134 |
-
|
135 |
from diffusers.utils import load_image
|
136 |
from diffusers import QwenImageControlNetPipeline, QwenImageControlNetModel
|
137 |
-
|
138 |
model_name = "OPPOer/Qwen-Image-Pruning"
|
139 |
controlnet_name = "InstantX/Qwen-Image-ControlNet-Union"
|
140 |
-
|
141 |
# Load the pipeline
|
142 |
if torch.cuda.is_available():
|
143 |
torch_dtype = torch.bfloat16
|
@@ -145,14 +133,11 @@ if torch.cuda.is_available():
|
|
145 |
else:
|
146 |
torch_dtype = torch.bfloat16
|
147 |
device = "cpu"
|
148 |
-
|
149 |
controlnet = QwenImageControlNetModel.from_pretrained(controlnet_name, torch_dtype=torch.bfloat16)
|
150 |
-
|
151 |
pipe = QwenImageControlNetPipeline.from_pretrained(
|
152 |
model_name, controlnet=controlnet, torch_dtype=torch.bfloat16
|
153 |
)
|
154 |
pipe = pipe.to(device)
|
155 |
-
|
156 |
# Generate image
|
157 |
prompt_dict = {
|
158 |
"soft_edge.png": "Photograph of a young man with light brown hair jumping mid-air off a large, reddish-brown rock. He's wearing a navy blue sweater, light blue shirt, gray pants, and brown shoes. His arms are outstretched, and he has a slight smile on his face. The background features a cloudy sky and a distant, leafless tree line. The grass around the rock is patchy.",
|
@@ -161,10 +146,8 @@ prompt_dict = {
|
|
161 |
"pose.png": "Photograph of a young man with light brown hair and a beard, wearing a beige flat cap, black leather jacket, gray shirt, brown pants, and white sneakers. He's sitting on a concrete ledge in front of a large circular window, with a cityscape reflected in the glass. The wall is cream-colored, and the sky is clear blue. His shadow is cast on the wall.",
|
162 |
}
|
163 |
controlnet_conditioning_scale = 1.0
|
164 |
-
|
165 |
output_dir = f'examples_Pruning+ControlNet'
|
166 |
os.makedirs(output_dir, exist_ok=True)
|
167 |
-
|
168 |
for path in glob.glob('conds/*'):
|
169 |
control_image = load_image(path)
|
170 |
image_name = path.split('/')[-1]
|
|
|
14 |
</div>
|
15 |
|
16 |
## Introduction
|
17 |
+
This open-source project is based on Qwen-Image and has attempted model pruning, removing 20 layers while retaining the weights of 40 layers, resulting in a model size of 13.3B parameters. The pruned model has experienced a slight drop in objective metrics. The pruned version will continue to be iterated upon. Additionally, the pruned version supports the adaptation and loading of community models such as LoRA and ControlNet. Please stay tuned. For the relevant inference scripts, please refer to **[Qwen-Image-13.3B](https://github.com/OPPO-Mente-Lab/Qwen-Image-Pruning)**.
|
18 |
|
19 |
<div align="center">
|
20 |
<img src="bench.png">
|
21 |
</div>
|
22 |
|
23 |
+
## Update
|
24 |
+
|
25 |
+
- 2025/09/24: We release an open-source pruned 12B model **[Qwen-Image-12B](https://huggingface.co/OPPOer/Qwen-Image-12B)**. Its performance is comparable to the previous version that pruned 20 layers of the 13.3B model, both subjectively and objectively. We will continue to optimize its performance going forward.
|
26 |
+
|
27 |
## Quick Start
|
28 |
|
29 |
Install the latest version of diffusers and pytorch
|
|
|
37 |
import torch
|
38 |
import os
|
39 |
from diffusers import DiffusionPipeline
|
|
|
40 |
model_name = "OPPOer/Qwen-Image-Pruning"
|
|
|
41 |
if torch.cuda.is_available():
|
42 |
torch_dtype = torch.bfloat16
|
43 |
device = "cuda"
|
44 |
else:
|
45 |
torch_dtype = torch.bfloat16
|
46 |
device = "cpu"
|
|
|
47 |
pipe = DiffusionPipeline.from_pretrained(model_name, torch_dtype=torch_dtype)
|
48 |
pipe = pipe.to(device)
|
|
|
49 |
# Generate image
|
50 |
positive_magic = {"en": ", Ultra HD, 4K, cinematic composition.", # for english prompt,
|
51 |
"zh": ",超清,4K,电影级构图。" # for chinese prompt,
|
52 |
}
|
53 |
negative_prompt = " "
|
|
|
54 |
prompts = [
|
55 |
'一个穿着"QWEN"标志的T恤的中国美女正拿着黑色的马克笔面相镜头微笑。她身后的玻璃板上手写体写着 "一、Qwen-Image的技术路线: 探索视觉生成基础模型的极限,开创理解与生成一体化的未来。二、Qwen-Image的模型特色:1、复杂文字渲染。支持中英渲染、自动布局; 2、精准图像编辑。支持文字编辑、物体增减、风格变换。三、Qwen-Image的未来愿景:赋能专业内容创作、助力生成式AI发展。"',
|
56 |
'海报,温馨家庭场景,柔和阳光洒在野餐布上,色彩温暖明亮,主色调为浅黄、米白与淡绿,点缀着鲜艳的水果和野花,营造轻松愉快的氛围,画面简洁而富有层次,充满生活气息,传达家庭团聚与自然和谐的主题。文字内容:“共享阳光,共享爱。全家一起野餐,享受美好时光。让每一刻都充满欢笑与温暖。”',
|
57 |
'一个穿着校服的年轻女孩站在教室里,在黑板上写字。黑板中央用整洁的白粉笔写着“Introducing Qwen-Image, a foundational image generation model that excels in complex text rendering and precise image editing”。柔和的自然光线透过窗户,投下温柔的阴影。场景以写实的摄影风格呈现,细节精细,景深浅,色调温暖。女孩专注的表情和空气中的粉笔灰增添了动感。背景元素包括课桌和教育海报,略微模糊以突出中心动作。超精细32K分辨率,单反质量,柔和的散景效果,纪录片式的构图。',
|
58 |
'一个台球桌上放着两排台球,每排5个,第一行的台球上面分别写着"Qwen""Image" "将 "于" "8" ,第二排台球上面分别写着"月" "正" "式" "发" "布" 。',
|
59 |
]
|
|
|
60 |
output_dir = 'examples_Pruning'
|
61 |
os.makedirs(output_dir, exist_ok=True)
|
62 |
for prompt in prompts:
|
|
|
78 |
import torch
|
79 |
import os
|
80 |
from diffusers import DiffusionPipeline
|
|
|
81 |
model_name = "OPPOer/Qwen-Image-Pruning"
|
82 |
lora_name = 'flymy_realism.safetensors'
|
|
|
83 |
if torch.cuda.is_available():
|
84 |
torch_dtype = torch.bfloat16
|
85 |
device = "cuda"
|
86 |
else:
|
87 |
torch_dtype = torch.bfloat16
|
88 |
device = "cpu"
|
|
|
89 |
pipe = DiffusionPipeline.from_pretrained(model_name, torch_dtype=torch_dtype)
|
90 |
pipe = pipe.to(device)
|
91 |
pipe.load_lora_weights(lora_name, adapter_name="lora")
|
|
|
92 |
# Generate image
|
93 |
positive_magic = {"en": ", Ultra HD, 4K, cinematic composition.", # for english prompt,
|
94 |
"zh": ",超清,4K,电影级构图。" # for chinese prompt,
|
95 |
}
|
96 |
negative_prompt = " "
|
|
|
97 |
prompts = [
|
98 |
'一个穿着"QWEN"标志的T恤的中国美女正拿着黑色的马克笔面相镜头微笑。她身后的玻璃板上手写体写着 "一、Qwen-Image的技术路线: 探索视觉生成基础��型的极限,开创理解与生成一体化的未来。二、Qwen-Image的模型特色:1、复杂文字渲染。支持中英渲染、自动布局; 2、精准图像编辑。支持文字编辑、物体增减、风格变换。三、Qwen-Image的未来愿景:赋能专业内容创作、助力生成式AI发展。"',
|
99 |
'海报,温馨家庭场景,柔和阳光洒在野餐布上,色彩温暖明亮,主色调为浅黄、米白与淡绿,点缀着鲜艳的水果和野花,营造轻松愉快的氛围,画面简洁而富有层次,充满生活气息,传达家庭团聚与自然和谐的主题。文字内容:“共享阳光,共享爱。全家一起野餐,享受美好时光。让每一刻都充满欢笑与温暖。”',
|
100 |
'一个穿着校服的年轻女孩站在教室里,在黑板上写字。黑板中央用整洁的白粉笔写着“Introducing Qwen-Image, a foundational image generation model that excels in complex text rendering and precise image editing”。柔和的自然光线透过窗户,投下温柔的阴影。场景以写实的摄影风格呈现,细节精细,景深浅,色调温暖。女孩专注的表情和空气中的粉笔灰增添了动感。背景元素包括课桌和教育海报,略微模糊以突出中心动作。超精细32K分辨率,单反质量,柔和的散景效果,纪录片式的构图。',
|
101 |
'一个台球桌上放着两排台球,每排5个,第一行的台球上面分别写着"Qwen""Image" "将 "于" "8" ,第二排台球上面分别写着"月" "正" "式" "发" "布" 。',
|
102 |
]
|
|
|
103 |
output_dir = 'examples_Pruning+Realism_LoRA'
|
104 |
os.makedirs(output_dir, exist_ok=True)
|
105 |
for prompt in prompts:
|
|
|
120 |
```python
|
121 |
import os
|
122 |
import glob
|
|
|
123 |
import torch
|
124 |
from diffusers import DiffusionPipeline
|
|
|
125 |
from diffusers.utils import load_image
|
126 |
from diffusers import QwenImageControlNetPipeline, QwenImageControlNetModel
|
|
|
127 |
model_name = "OPPOer/Qwen-Image-Pruning"
|
128 |
controlnet_name = "InstantX/Qwen-Image-ControlNet-Union"
|
|
|
129 |
# Load the pipeline
|
130 |
if torch.cuda.is_available():
|
131 |
torch_dtype = torch.bfloat16
|
|
|
133 |
else:
|
134 |
torch_dtype = torch.bfloat16
|
135 |
device = "cpu"
|
|
|
136 |
controlnet = QwenImageControlNetModel.from_pretrained(controlnet_name, torch_dtype=torch.bfloat16)
|
|
|
137 |
pipe = QwenImageControlNetPipeline.from_pretrained(
|
138 |
model_name, controlnet=controlnet, torch_dtype=torch.bfloat16
|
139 |
)
|
140 |
pipe = pipe.to(device)
|
|
|
141 |
# Generate image
|
142 |
prompt_dict = {
|
143 |
"soft_edge.png": "Photograph of a young man with light brown hair jumping mid-air off a large, reddish-brown rock. He's wearing a navy blue sweater, light blue shirt, gray pants, and brown shoes. His arms are outstretched, and he has a slight smile on his face. The background features a cloudy sky and a distant, leafless tree line. The grass around the rock is patchy.",
|
|
|
146 |
"pose.png": "Photograph of a young man with light brown hair and a beard, wearing a beige flat cap, black leather jacket, gray shirt, brown pants, and white sneakers. He's sitting on a concrete ledge in front of a large circular window, with a cityscape reflected in the glass. The wall is cream-colored, and the sky is clear blue. His shadow is cast on the wall.",
|
147 |
}
|
148 |
controlnet_conditioning_scale = 1.0
|
|
|
149 |
output_dir = f'examples_Pruning+ControlNet'
|
150 |
os.makedirs(output_dir, exist_ok=True)
|
|
|
151 |
for path in glob.glob('conds/*'):
|
152 |
control_image = load_image(path)
|
153 |
image_name = path.split('/')[-1]
|