Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from omegaconf import OmegaConf
|
|
8 |
from huggingface_hub import snapshot_download
|
9 |
|
10 |
import torch
|
11 |
-
|
12 |
|
13 |
from pixelflow.scheduling_pixelflow import PixelFlowScheduler
|
14 |
from pixelflow.pipeline_pixelflow import PixelFlowPipeline
|
@@ -22,12 +22,12 @@ parser.add_argument('--class_cond', action='store_true', help='use class conditi
|
|
22 |
args = parser.parse_args()
|
23 |
|
24 |
# deploy
|
25 |
-
args.checkpoint = "
|
26 |
-
args.class_cond =
|
27 |
|
|
|
28 |
|
29 |
if args.class_cond:
|
30 |
-
output_dir = args.checkpoint
|
31 |
if not os.path.exists(output_dir):
|
32 |
snapshot_download(repo_id="ShoufaChen/PixelFlow-Class2Image", local_dir=output_dir)
|
33 |
config = OmegaConf.load(f"{output_dir}/config.yaml")
|
@@ -39,12 +39,13 @@ if args.class_cond:
|
|
39 |
resolution = 256
|
40 |
NUM_EXAMPLES = 4
|
41 |
else:
|
42 |
-
|
|
|
43 |
config = OmegaConf.load(f"{output_dir}/config.yaml")
|
44 |
-
model = config_utils.instantiate_from_config(config.model)
|
45 |
print(f"Num of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
|
46 |
ckpt = torch.load(f"{output_dir}/model.pt", map_location="cpu", weights_only=True)
|
47 |
-
text_encoder = T5EncoderModel.from_pretrained("google/flan-t5-xl")
|
48 |
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")
|
49 |
resolution = 1024
|
50 |
NUM_EXAMPLES = 1
|
@@ -54,6 +55,7 @@ model.eval()
|
|
54 |
print(f"outside space.GPU. {torch.cuda.is_available()=}")
|
55 |
if torch.cuda.is_available():
|
56 |
model = model.cuda()
|
|
|
57 |
device = torch.device("cuda")
|
58 |
else:
|
59 |
raise ValueError("No GPU")
|
@@ -68,8 +70,8 @@ pipeline = PixelFlowPipeline(
|
|
68 |
max_token_length=512,
|
69 |
)
|
70 |
|
71 |
-
@spaces.GPU
|
72 |
-
def infer(
|
73 |
print(f"inside space.GPU. {torch.cuda.is_available()=}")
|
74 |
seed_everything(seed)
|
75 |
with torch.autocast("cuda", dtype=torch.bfloat16), torch.no_grad():
|
@@ -81,7 +83,7 @@ def infer(use_ode_dopri5, noise_shift, cfg_scale, class_label, seed, *num_steps_
|
|
81 |
guidance_scale=cfg_scale, # The guidance for the first frame, set it to 7 for 384p variant
|
82 |
device=device,
|
83 |
shift=noise_shift,
|
84 |
-
use_ode_dopri5=
|
85 |
)
|
86 |
samples = (samples * 255).round().astype("uint8")
|
87 |
samples = [Image.fromarray(sample) for sample in samples]
|
@@ -106,8 +108,8 @@ with gr.Blocks(css=css) as demo:
|
|
106 |
gr.Markdown("# PixelFlow: Pixel-Space Generative Models with Flow")
|
107 |
gr.HTML("""
|
108 |
<div class="follow-link">
|
109 |
-
For online
|
110 |
-
<a href="https://huggingface.co/spaces/ShoufaChen/PixelFlow">
|
111 |
For more details, refer to our
|
112 |
<a href="https://arxiv.org/abs/2504.07963">arXiv paper</a> and <a href="https://github.com/ShoufaChen/PixelFlow">GitHub repo</a>.
|
113 |
</div>
|
@@ -127,17 +129,16 @@ with gr.Blocks(css=css) as demo:
|
|
127 |
else:
|
128 |
# text input
|
129 |
user_input = gr.Textbox(label='Enter your prompt', show_label=False, max_lines=1, placeholder="Enter your prompt",)
|
130 |
-
ode_dopri5 = gr.Checkbox(label="Dopri5 ODE", info="Use Dopri5 ODE solver")
|
131 |
noise_shift = gr.Slider(minimum=1.0, maximum=100.0, step=1, value=1.0, label='Noise Shift')
|
132 |
cfg_scale = gr.Slider(minimum=1, maximum=25, step=0.1, value=4.0, label='Classifier-free Guidance Scale')
|
133 |
num_steps_per_stage = []
|
134 |
for stage_idx in range(config.scheduler.num_stages):
|
135 |
-
num_steps = gr.Slider(minimum=1, maximum=100, step=1, value=
|
136 |
num_steps_per_stage.append(num_steps)
|
137 |
seed = gr.Slider(minimum=0, maximum=1000, step=1, value=42, label='Seed')
|
138 |
button = gr.Button("Generate", variant="primary")
|
139 |
with gr.Column():
|
140 |
output = gr.Gallery(label='Generated Images', height=700)
|
141 |
-
button.click(infer, inputs=[
|
142 |
demo.queue()
|
143 |
-
demo.launch(share=True, debug=True)
|
|
|
8 |
from huggingface_hub import snapshot_download
|
9 |
|
10 |
import torch
|
11 |
+
from transformers import T5EncoderModel, AutoTokenizer
|
12 |
|
13 |
from pixelflow.scheduling_pixelflow import PixelFlowScheduler
|
14 |
from pixelflow.pipeline_pixelflow import PixelFlowPipeline
|
|
|
22 |
args = parser.parse_args()
|
23 |
|
24 |
# deploy
|
25 |
+
args.checkpoint = "pixelflow_t2i"
|
26 |
+
args.class_cond = False
|
27 |
|
28 |
+
output_dir = args.checkpoint
|
29 |
|
30 |
if args.class_cond:
|
|
|
31 |
if not os.path.exists(output_dir):
|
32 |
snapshot_download(repo_id="ShoufaChen/PixelFlow-Class2Image", local_dir=output_dir)
|
33 |
config = OmegaConf.load(f"{output_dir}/config.yaml")
|
|
|
39 |
resolution = 256
|
40 |
NUM_EXAMPLES = 4
|
41 |
else:
|
42 |
+
if not os.path.exists(output_dir):
|
43 |
+
snapshot_download(repo_id="ShoufaChen/PixelFlow-Text2Image", local_dir=output_dir)
|
44 |
config = OmegaConf.load(f"{output_dir}/config.yaml")
|
45 |
+
model = config_utils.instantiate_from_config(config.model)
|
46 |
print(f"Num of parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad)}")
|
47 |
ckpt = torch.load(f"{output_dir}/model.pt", map_location="cpu", weights_only=True)
|
48 |
+
text_encoder = T5EncoderModel.from_pretrained("google/flan-t5-xl")
|
49 |
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xl")
|
50 |
resolution = 1024
|
51 |
NUM_EXAMPLES = 1
|
|
|
55 |
print(f"outside space.GPU. {torch.cuda.is_available()=}")
|
56 |
if torch.cuda.is_available():
|
57 |
model = model.cuda()
|
58 |
+
text_encoder = text_encoder.cuda() if text_encoder else None
|
59 |
device = torch.device("cuda")
|
60 |
else:
|
61 |
raise ValueError("No GPU")
|
|
|
70 |
max_token_length=512,
|
71 |
)
|
72 |
|
73 |
+
@spaces.GPU(duration=120)
|
74 |
+
def infer(noise_shift, cfg_scale, class_label, seed, *num_steps_per_stage):
|
75 |
print(f"inside space.GPU. {torch.cuda.is_available()=}")
|
76 |
seed_everything(seed)
|
77 |
with torch.autocast("cuda", dtype=torch.bfloat16), torch.no_grad():
|
|
|
83 |
guidance_scale=cfg_scale, # The guidance for the first frame, set it to 7 for 384p variant
|
84 |
device=device,
|
85 |
shift=noise_shift,
|
86 |
+
use_ode_dopri5=False,
|
87 |
)
|
88 |
samples = (samples * 255).round().astype("uint8")
|
89 |
samples = [Image.fromarray(sample) for sample in samples]
|
|
|
108 |
gr.Markdown("# PixelFlow: Pixel-Space Generative Models with Flow")
|
109 |
gr.HTML("""
|
110 |
<div class="follow-link">
|
111 |
+
For online class-to-image generation, please try
|
112 |
+
<a href="https://huggingface.co/spaces/ShoufaChen/PixelFlow">class-to-image</a>.
|
113 |
For more details, refer to our
|
114 |
<a href="https://arxiv.org/abs/2504.07963">arXiv paper</a> and <a href="https://github.com/ShoufaChen/PixelFlow">GitHub repo</a>.
|
115 |
</div>
|
|
|
129 |
else:
|
130 |
# text input
|
131 |
user_input = gr.Textbox(label='Enter your prompt', show_label=False, max_lines=1, placeholder="Enter your prompt",)
|
|
|
132 |
noise_shift = gr.Slider(minimum=1.0, maximum=100.0, step=1, value=1.0, label='Noise Shift')
|
133 |
cfg_scale = gr.Slider(minimum=1, maximum=25, step=0.1, value=4.0, label='Classifier-free Guidance Scale')
|
134 |
num_steps_per_stage = []
|
135 |
for stage_idx in range(config.scheduler.num_stages):
|
136 |
+
num_steps = gr.Slider(minimum=1, maximum=100, step=1, value=5, label=f'Num Inference Steps (Stage {stage_idx})')
|
137 |
num_steps_per_stage.append(num_steps)
|
138 |
seed = gr.Slider(minimum=0, maximum=1000, step=1, value=42, label='Seed')
|
139 |
button = gr.Button("Generate", variant="primary")
|
140 |
with gr.Column():
|
141 |
output = gr.Gallery(label='Generated Images', height=700)
|
142 |
+
button.click(infer, inputs=[noise_shift, cfg_scale, user_input, seed, *num_steps_per_stage], outputs=[output])
|
143 |
demo.queue()
|
144 |
+
demo.launch(share=True, debug=True)
|