xi0v Fabrice-TIERCELIN commited on
Commit
30107ca
β€’
1 Parent(s): 1759c53

Use StableVideoDragNUWAPipeline (#46)

Browse files

- Use StableVideoDragNUWAPipeline (8ddc0b4356abc0f278aac3799820f5641d9dc500)


Co-authored-by: Fabrice TIERCELIN <[email protected]>

Files changed (2) hide show
  1. app.py +299 -299
  2. requirements.txt +8 -8
app.py CHANGED
@@ -1,300 +1,300 @@
1
- import gradio as gr
2
- import torch
3
- import os
4
- import random
5
- import time
6
- import math
7
- import spaces
8
- from glob import glob
9
- from pathlib import Path
10
- from typing import Optional, List, Union
11
-
12
- from diffusers import StableVideoDiffusionPipeline
13
- from diffusers.utils import export_to_video, export_to_gif
14
- from PIL import Image
15
-
16
- fps25Pipe = StableVideoDiffusionPipeline.from_pretrained(
17
- "vdo/stable-video-diffusion-img2vid-xt-1-1", torch_dtype=torch.float16, variant="fp16"
18
- )
19
- fps25Pipe.to("cuda")
20
-
21
- fps14Pipe = StableVideoDiffusionPipeline.from_pretrained(
22
- "stabilityai/stable-video-diffusion-img2vid", torch_dtype=torch.float16, variant="fp16"
23
- )
24
- fps14Pipe.to("cuda")
25
-
26
- dragnuwaPipe = StableVideoDiffusionPipeline.from_pretrained(
27
- "a-r-r-o-w/dragnuwa-svd", torch_dtype=torch.float16, variant="fp16", low_cpu_mem_usage=False, device_map=None
28
- )
29
- dragnuwaPipe.to("cuda")
30
-
31
- max_64_bit_int = 2**63 - 1
32
-
33
- def animate(
34
- image: Image,
35
- seed: Optional[int] = 42,
36
- randomize_seed: bool = True,
37
- motion_bucket_id: int = 127,
38
- fps_id: int = 25,
39
- noise_aug_strength: float = 0.1,
40
- decoding_t: int = 3,
41
- video_format: str = "mp4",
42
- frame_format: str = "webp",
43
- version: str = "auto",
44
- width: int = 1024,
45
- height: int = 576,
46
- motion_control: bool = False,
47
- num_inference_steps: int = 25
48
- ):
49
- start = time.time()
50
-
51
- if image is None:
52
- raise gr.Error("Please provide an image to animate.")
53
-
54
- output_folder = "outputs"
55
- image_data = resize_image(image, output_size=(width, height))
56
- if image_data.mode == "RGBA":
57
- image_data = image_data.convert("RGB")
58
-
59
- if motion_control:
60
- image_data = [image_data] * 2
61
-
62
- if randomize_seed:
63
- seed = random.randint(0, max_64_bit_int)
64
-
65
- if version == "auto":
66
- if 14 < fps_id:
67
- version = "svdxt"
68
- else:
69
- version = "svd"
70
-
71
- frames = animate_on_gpu(
72
- image_data,
73
- seed,
74
- motion_bucket_id,
75
- fps_id,
76
- noise_aug_strength,
77
- decoding_t,
78
- version,
79
- width,
80
- height,
81
- num_inference_steps
82
- )
83
-
84
- os.makedirs(output_folder, exist_ok=True)
85
- base_count = len(glob(os.path.join(output_folder, "*." + video_format)))
86
- result_path = os.path.join(output_folder, f"{base_count:06d}." + video_format)
87
-
88
- if video_format == "gif":
89
- video_path = None
90
- gif_path = result_path
91
- export_to_gif(image=frames, output_gif_path=gif_path, fps=fps_id)
92
- else:
93
- video_path = result_path
94
- gif_path = None
95
- export_to_video(frames, video_path, fps=fps_id)
96
-
97
- end = time.time()
98
- secondes = int(end - start)
99
- minutes = math.floor(secondes / 60)
100
- secondes = secondes - (minutes * 60)
101
- hours = math.floor(minutes / 60)
102
- minutes = minutes - (hours * 60)
103
- information = ("Start the process again if you want a different result. " if randomize_seed else "") + \
104
- "Wait 2 min before a new run to avoid quota penalty or use another computer. " + \
105
- "The video has been generated in " + \
106
- ((str(hours) + " h, ") if hours != 0 else "") + \
107
- ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
108
- str(secondes) + " sec."
109
-
110
- return [
111
- # Display for video
112
- gr.update(value = video_path, visible = video_format != "gif"),
113
- # Display for gif
114
- gr.update(value = gif_path, visible = video_format == "gif"),
115
- # Download button
116
- gr.update(label = "πŸ’Ύ Download animation in *." + video_format + " format", value=result_path, visible=True),
117
- # Frames
118
- gr.update(label = "Generated frames in *." + frame_format + " format", format = frame_format, value = frames, visible = True),
119
- # Used seed
120
- seed,
121
- # Information
122
- gr.update(value = information, visible = True),
123
- # Reset button
124
- gr.update(visible = True)
125
- ]
126
-
127
- @torch.no_grad()
128
- @spaces.GPU(duration=180)
129
- def animate_on_gpu(
130
- image_data: Union[Image.Image, List[Image.Image]],
131
- seed: Optional[int] = 42,
132
- motion_bucket_id: int = 127,
133
- fps_id: int = 6,
134
- noise_aug_strength: float = 0.1,
135
- decoding_t: int = 3,
136
- version: str = "svdxt",
137
- width: int = 1024,
138
- height: int = 576,
139
- num_inference_steps: int = 25
140
- ):
141
- generator = torch.manual_seed(seed)
142
-
143
- if version == "dragnuwa":
144
- return dragnuwaPipe(image_data, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25, num_inference_steps=num_inference_steps).frames[0]
145
- elif version == "svdxt":
146
- return fps25Pipe(image_data, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25, num_inference_steps=num_inference_steps).frames[0]
147
- else:
148
- return fps14Pipe(image_data, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25, num_inference_steps=num_inference_steps).frames[0]
149
-
150
-
151
- def resize_image(image, output_size=(1024, 576)):
152
- # Do not touch the image if the size is good
153
- if image.width == output_size[0] and image.height == output_size[1]:
154
- return image
155
-
156
- # Calculate aspect ratios
157
- target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
158
- image_aspect = image.width / image.height # Aspect ratio of the original image
159
-
160
- # Resize if the original image is larger
161
- if image_aspect > target_aspect:
162
- # Resize the image to match the target height, maintaining aspect ratio
163
- new_height = output_size[1]
164
- new_width = int(new_height * image_aspect)
165
- resized_image = image.resize((new_width, new_height), Image.LANCZOS)
166
- # Calculate coordinates for cropping
167
- left = (new_width - output_size[0]) / 2
168
- top = 0
169
- right = (new_width + output_size[0]) / 2
170
- bottom = output_size[1]
171
- else:
172
- # Resize the image to match the target width, maintaining aspect ratio
173
- new_width = output_size[0]
174
- new_height = int(new_width / image_aspect)
175
- resized_image = image.resize((new_width, new_height), Image.LANCZOS)
176
- # Calculate coordinates for cropping
177
- left = 0
178
- top = (new_height - output_size[1]) / 2
179
- right = output_size[0]
180
- bottom = (new_height + output_size[1]) / 2
181
-
182
- # Crop the image
183
- return resized_image.crop((left, top, right, bottom))
184
-
185
- def reset():
186
- return [
187
- None,
188
- random.randint(0, max_64_bit_int),
189
- True,
190
- 127,
191
- 6,
192
- 0.1,
193
- 3,
194
- "mp4",
195
- "webp",
196
- "auto",
197
- 1024,
198
- 576,
199
- False,
200
- 25
201
- ]
202
-
203
- with gr.Blocks() as demo:
204
- gr.HTML("""
205
- <h1><center>Image-to-Video</center></h1>
206
- <big><center>Animate your image into 25 frames of 1024x576 pixels freely, without account, without watermark and download the video</center></big>
207
- <br/>
208
-
209
- <p>
210
- This demo is based on <i>Stable Video Diffusion</i> artificial intelligence.
211
- No prompt or camera control is handled here.
212
- To control motions, rather use <i><a href="https://huggingface.co/spaces/TencentARC/MotionCtrl_SVD">MotionCtrl SVD</a></i>.
213
- If you need 128 frames, rather use <i><a href="https://huggingface.co/spaces/modelscope/ExVideo-SVD-128f-v1">ExVideo</a></i>.
214
- </p>
215
- """)
216
- with gr.Row():
217
- with gr.Column():
218
- image = gr.Image(label="Upload your image", type="pil")
219
- with gr.Accordion("Advanced options", open=False):
220
- width = gr.Slider(label="Width", info="Width of the video", value=1024, minimum=256, maximum=1024, step=8)
221
- height = gr.Slider(label="Height", info="Height of the video", value=576, minimum=256, maximum=576, step=8)
222
- motion_control = gr.Checkbox(label="Motion control (experimental)", info="Fix the camera", value=False)
223
- video_format = gr.Radio([["*.mp4", "mp4"], ["*.avi", "avi"], ["*.wmv", "wmv"], ["*.mkv", "mkv"], ["*.mov", "mov"], ["*.gif", "gif"]], label="Video format for result", info="File extention", value="mp4", interactive=True)
224
- frame_format = gr.Radio([["*.webp", "webp"], ["*.png", "png"], ["*.jpeg", "jpeg"], ["*.gif (unanimated)", "gif"], ["*.bmp", "bmp"]], label="Image format for frames", info="File extention", value="webp", interactive=True)
225
- fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=25, minimum=5, maximum=30)
226
- motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
227
- noise_aug_strength = gr.Slider(label="Noise strength", info="The noise to add", value=0.1, minimum=0, maximum=1, step=0.1)
228
- num_inference_steps = gr.Slider(label="Number inference steps", info="More denoising steps usually lead to a higher quality video at the expense of slower inference", value=25, minimum=1, maximum=100, step=1)
229
- decoding_t = gr.Slider(label="Decoding", info="Number of frames decoded at a time; this eats more VRAM; reduce if necessary", value=3, minimum=1, maximum=5, step=1)
230
- version = gr.Radio([["Auto", "auto"], ["πŸƒπŸ»β€β™€οΈ SVD (trained on 14 f/s)", "svd"], ["πŸƒπŸ»β€β™€οΈπŸ’¨ SVD-XT (trained on 25 f/s)", "svdxt"], ["DragNUWA (unstable)", "dragnuwa"]], label="Model", info="Trained model", value="auto", interactive=True)
231
- seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
232
- randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
233
-
234
- generate_btn = gr.Button(value="πŸš€ Animate", variant="primary")
235
- reset_btn = gr.Button(value="🧹 Reinit page", variant="stop", elem_id="reset_button", visible = False)
236
-
237
- with gr.Column():
238
- video_output = gr.Video(label="Generated video", format="mp4", autoplay=True, show_download_button=False)
239
- gif_output = gr.Image(label="Generated video", format="gif", show_download_button=False, visible=False)
240
- download_button = gr.DownloadButton(label="πŸ’Ύ Download video", visible=False)
241
- information_msg = gr.HTML(visible=False)
242
- gallery = gr.Gallery(label="Generated frames", visible=False)
243
-
244
- generate_btn.click(fn=animate, inputs=[
245
- image,
246
- seed,
247
- randomize_seed,
248
- motion_bucket_id,
249
- fps_id,
250
- noise_aug_strength,
251
- decoding_t,
252
- video_format,
253
- frame_format,
254
- version,
255
- width,
256
- height,
257
- motion_control,
258
- num_inference_steps
259
- ], outputs=[
260
- video_output,
261
- gif_output,
262
- download_button,
263
- gallery,
264
- seed,
265
- information_msg,
266
- reset_btn
267
- ], api_name="video")
268
-
269
- reset_btn.click(fn = reset, inputs = [], outputs = [
270
- image,
271
- seed,
272
- randomize_seed,
273
- motion_bucket_id,
274
- fps_id,
275
- noise_aug_strength,
276
- decoding_t,
277
- video_format,
278
- frame_format,
279
- version,
280
- width,
281
- height,
282
- motion_control,
283
- num_inference_steps
284
- ], queue = False, show_progress = False)
285
-
286
- gr.Examples(
287
- examples=[
288
- ["Examples/Fire.webp", 42, True, 127, 25, 0.1, 3, "mp4", "png", "auto", 1024, 576, False, 25],
289
- ["Examples/Water.png", 42, True, 127, 25, 0.1, 3, "mp4", "png", "auto", 1024, 576, False, 25],
290
- ["Examples/Town.jpeg", 42, True, 127, 25, 0.1, 3, "mp4", "png", "auto", 1024, 576, False, 25]
291
- ],
292
- inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id, noise_aug_strength, decoding_t, video_format, frame_format, version, width, height, motion_control, num_inference_steps],
293
- outputs=[video_output, gif_output, download_button, gallery, seed, information_msg, reset_btn],
294
- fn=animate,
295
- run_on_click=True,
296
- cache_examples=False,
297
- )
298
-
299
- if __name__ == "__main__":
300
  demo.launch(share=True, show_api=False)
 
1
+ import gradio as gr
2
+ import torch
3
+ import os
4
+ import random
5
+ import time
6
+ import math
7
+ import spaces
8
+ from glob import glob
9
+ from pathlib import Path
10
+ from typing import Optional, List, Union
11
+
12
+ from diffusers import StableVideoDiffusionPipeline, StableVideoDragNUWAPipeline
13
+ from diffusers.utils import export_to_video, export_to_gif
14
+ from PIL import Image
15
+
16
+ fps25Pipe = StableVideoDiffusionPipeline.from_pretrained(
17
+ "vdo/stable-video-diffusion-img2vid-xt-1-1", torch_dtype=torch.float16, variant="fp16"
18
+ )
19
+ fps25Pipe.to("cuda")
20
+
21
+ fps14Pipe = StableVideoDiffusionPipeline.from_pretrained(
22
+ "stabilityai/stable-video-diffusion-img2vid", torch_dtype=torch.float16, variant="fp16"
23
+ )
24
+ fps14Pipe.to("cuda")
25
+
26
+ dragnuwaPipe = StableVideoDragNUWAPipeline.from_pretrained(
27
+ "a-r-r-o-w/dragnuwa-svd", torch_dtype=torch.float16, variant="fp16", low_cpu_mem_usage=False, device_map=None
28
+ )
29
+ dragnuwaPipe.to("cuda")
30
+
31
+ max_64_bit_int = 2**63 - 1
32
+
33
+ def animate(
34
+ image: Image,
35
+ seed: Optional[int] = 42,
36
+ randomize_seed: bool = True,
37
+ motion_bucket_id: int = 127,
38
+ fps_id: int = 25,
39
+ noise_aug_strength: float = 0.1,
40
+ decoding_t: int = 3,
41
+ video_format: str = "mp4",
42
+ frame_format: str = "webp",
43
+ version: str = "auto",
44
+ width: int = 1024,
45
+ height: int = 576,
46
+ motion_control: bool = False,
47
+ num_inference_steps: int = 25
48
+ ):
49
+ start = time.time()
50
+
51
+ if image is None:
52
+ raise gr.Error("Please provide an image to animate.")
53
+
54
+ output_folder = "outputs"
55
+ image_data = resize_image(image, output_size=(width, height))
56
+ if image_data.mode == "RGBA":
57
+ image_data = image_data.convert("RGB")
58
+
59
+ if motion_control:
60
+ image_data = [image_data] * 3
61
+
62
+ if randomize_seed:
63
+ seed = random.randint(0, max_64_bit_int)
64
+
65
+ if version == "auto":
66
+ if 14 < fps_id:
67
+ version = "svdxt"
68
+ else:
69
+ version = "svd"
70
+
71
+ frames = animate_on_gpu(
72
+ image_data,
73
+ seed,
74
+ motion_bucket_id,
75
+ fps_id,
76
+ noise_aug_strength,
77
+ decoding_t,
78
+ version,
79
+ width,
80
+ height,
81
+ num_inference_steps
82
+ )
83
+
84
+ os.makedirs(output_folder, exist_ok=True)
85
+ base_count = len(glob(os.path.join(output_folder, "*." + video_format)))
86
+ result_path = os.path.join(output_folder, f"{base_count:06d}." + video_format)
87
+
88
+ if video_format == "gif":
89
+ video_path = None
90
+ gif_path = result_path
91
+ export_to_gif(image=frames, output_gif_path=gif_path, fps=fps_id)
92
+ else:
93
+ video_path = result_path
94
+ gif_path = None
95
+ export_to_video(frames, video_path, fps=fps_id)
96
+
97
+ end = time.time()
98
+ secondes = int(end - start)
99
+ minutes = math.floor(secondes / 60)
100
+ secondes = secondes - (minutes * 60)
101
+ hours = math.floor(minutes / 60)
102
+ minutes = minutes - (hours * 60)
103
+ information = ("Start the process again if you want a different result. " if randomize_seed else "") + \
104
+ "Wait 2 min before a new run to avoid quota penalty or use another computer. " + \
105
+ "The video has been generated in " + \
106
+ ((str(hours) + " h, ") if hours != 0 else "") + \
107
+ ((str(minutes) + " min, ") if hours != 0 or minutes != 0 else "") + \
108
+ str(secondes) + " sec."
109
+
110
+ return [
111
+ # Display for video
112
+ gr.update(value = video_path, visible = video_format != "gif"),
113
+ # Display for gif
114
+ gr.update(value = gif_path, visible = video_format == "gif"),
115
+ # Download button
116
+ gr.update(label = "πŸ’Ύ Download animation in *." + video_format + " format", value=result_path, visible=True),
117
+ # Frames
118
+ gr.update(label = "Generated frames in *." + frame_format + " format", format = frame_format, value = frames, visible = True),
119
+ # Used seed
120
+ seed,
121
+ # Information
122
+ gr.update(value = information, visible = True),
123
+ # Reset button
124
+ gr.update(visible = True)
125
+ ]
126
+
127
+ @torch.no_grad()
128
+ @spaces.GPU(duration=180)
129
+ def animate_on_gpu(
130
+ image_data: Union[Image.Image, List[Image.Image]],
131
+ seed: Optional[int] = 42,
132
+ motion_bucket_id: int = 127,
133
+ fps_id: int = 6,
134
+ noise_aug_strength: float = 0.1,
135
+ decoding_t: int = 3,
136
+ version: str = "svdxt",
137
+ width: int = 1024,
138
+ height: int = 576,
139
+ num_inference_steps: int = 25
140
+ ):
141
+ generator = torch.manual_seed(seed)
142
+
143
+ if version == "dragnuwa":
144
+ return dragnuwaPipe(image_data, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25, num_inference_steps=num_inference_steps).frames[0]
145
+ elif version == "svdxt":
146
+ return fps25Pipe(image_data, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25, num_inference_steps=num_inference_steps).frames[0]
147
+ else:
148
+ return fps14Pipe(image_data, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25, num_inference_steps=num_inference_steps).frames[0]
149
+
150
+
151
+ def resize_image(image, output_size=(1024, 576)):
152
+ # Do not touch the image if the size is good
153
+ if image.width == output_size[0] and image.height == output_size[1]:
154
+ return image
155
+
156
+ # Calculate aspect ratios
157
+ target_aspect = output_size[0] / output_size[1] # Aspect ratio of the desired size
158
+ image_aspect = image.width / image.height # Aspect ratio of the original image
159
+
160
+ # Resize if the original image is larger
161
+ if image_aspect > target_aspect:
162
+ # Resize the image to match the target height, maintaining aspect ratio
163
+ new_height = output_size[1]
164
+ new_width = int(new_height * image_aspect)
165
+ resized_image = image.resize((new_width, new_height), Image.LANCZOS)
166
+ # Calculate coordinates for cropping
167
+ left = (new_width - output_size[0]) / 2
168
+ top = 0
169
+ right = (new_width + output_size[0]) / 2
170
+ bottom = output_size[1]
171
+ else:
172
+ # Resize the image to match the target width, maintaining aspect ratio
173
+ new_width = output_size[0]
174
+ new_height = int(new_width / image_aspect)
175
+ resized_image = image.resize((new_width, new_height), Image.LANCZOS)
176
+ # Calculate coordinates for cropping
177
+ left = 0
178
+ top = (new_height - output_size[1]) / 2
179
+ right = output_size[0]
180
+ bottom = (new_height + output_size[1]) / 2
181
+
182
+ # Crop the image
183
+ return resized_image.crop((left, top, right, bottom))
184
+
185
+ def reset():
186
+ return [
187
+ None,
188
+ random.randint(0, max_64_bit_int),
189
+ True,
190
+ 127,
191
+ 6,
192
+ 0.1,
193
+ 3,
194
+ "mp4",
195
+ "webp",
196
+ "auto",
197
+ 1024,
198
+ 576,
199
+ False,
200
+ 25
201
+ ]
202
+
203
+ with gr.Blocks() as demo:
204
+ gr.HTML("""
205
+ <h1><center>Image-to-Video</center></h1>
206
+ <big><center>Animate your image into 25 frames of 1024x576 pixels freely, without account, without watermark and download the video</center></big>
207
+ <br/>
208
+
209
+ <p>
210
+ This demo is based on <i>Stable Video Diffusion</i> artificial intelligence.
211
+ No prompt or camera control is handled here.
212
+ To control motions, rather use <i><a href="https://huggingface.co/spaces/TencentARC/MotionCtrl_SVD">MotionCtrl SVD</a></i>.
213
+ If you need 128 frames, rather use <i><a href="https://huggingface.co/spaces/modelscope/ExVideo-SVD-128f-v1">ExVideo</a></i>.
214
+ </p>
215
+ """)
216
+ with gr.Row():
217
+ with gr.Column():
218
+ image = gr.Image(label="Upload your image", type="pil")
219
+ with gr.Accordion("Advanced options", open=False):
220
+ width = gr.Slider(label="Width", info="Width of the video", value=1024, minimum=256, maximum=1024, step=8)
221
+ height = gr.Slider(label="Height", info="Height of the video", value=576, minimum=256, maximum=576, step=8)
222
+ motion_control = gr.Checkbox(label="Motion control (experimental)", info="Fix the camera", value=False)
223
+ video_format = gr.Radio([["*.mp4", "mp4"], ["*.avi", "avi"], ["*.wmv", "wmv"], ["*.mkv", "mkv"], ["*.mov", "mov"], ["*.gif", "gif"]], label="Video format for result", info="File extention", value="mp4", interactive=True)
224
+ frame_format = gr.Radio([["*.webp", "webp"], ["*.png", "png"], ["*.jpeg", "jpeg"], ["*.gif (unanimated)", "gif"], ["*.bmp", "bmp"]], label="Image format for frames", info="File extention", value="webp", interactive=True)
225
+ fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=25, minimum=5, maximum=30)
226
+ motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
227
+ noise_aug_strength = gr.Slider(label="Noise strength", info="The noise to add", value=0.1, minimum=0, maximum=1, step=0.1)
228
+ num_inference_steps = gr.Slider(label="Number inference steps", info="More denoising steps usually lead to a higher quality video at the expense of slower inference", value=25, minimum=1, maximum=100, step=1)
229
+ decoding_t = gr.Slider(label="Decoding", info="Number of frames decoded at a time; this eats more VRAM; reduce if necessary", value=3, minimum=1, maximum=5, step=1)
230
+ version = gr.Radio([["Auto", "auto"], ["πŸƒπŸ»β€β™€οΈ SVD (trained on 14 f/s)", "svd"], ["πŸƒπŸ»β€β™€οΈπŸ’¨ SVD-XT (trained on 25 f/s)", "svdxt"], ["DragNUWA (unstable)", "dragnuwa"]], label="Model", info="Trained model", value="auto", interactive=True)
231
+ seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
232
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
233
+
234
+ generate_btn = gr.Button(value="πŸš€ Animate", variant="primary")
235
+ reset_btn = gr.Button(value="🧹 Reinit page", variant="stop", elem_id="reset_button", visible = False)
236
+
237
+ with gr.Column():
238
+ video_output = gr.Video(label="Generated video", format="mp4", autoplay=True, show_download_button=False)
239
+ gif_output = gr.Image(label="Generated video", format="gif", show_download_button=False, visible=False)
240
+ download_button = gr.DownloadButton(label="πŸ’Ύ Download video", visible=False)
241
+ information_msg = gr.HTML(visible=False)
242
+ gallery = gr.Gallery(label="Generated frames", visible=False)
243
+
244
+ generate_btn.click(fn=animate, inputs=[
245
+ image,
246
+ seed,
247
+ randomize_seed,
248
+ motion_bucket_id,
249
+ fps_id,
250
+ noise_aug_strength,
251
+ decoding_t,
252
+ video_format,
253
+ frame_format,
254
+ version,
255
+ width,
256
+ height,
257
+ motion_control,
258
+ num_inference_steps
259
+ ], outputs=[
260
+ video_output,
261
+ gif_output,
262
+ download_button,
263
+ gallery,
264
+ seed,
265
+ information_msg,
266
+ reset_btn
267
+ ], api_name="video")
268
+
269
+ reset_btn.click(fn = reset, inputs = [], outputs = [
270
+ image,
271
+ seed,
272
+ randomize_seed,
273
+ motion_bucket_id,
274
+ fps_id,
275
+ noise_aug_strength,
276
+ decoding_t,
277
+ video_format,
278
+ frame_format,
279
+ version,
280
+ width,
281
+ height,
282
+ motion_control,
283
+ num_inference_steps
284
+ ], queue = False, show_progress = False)
285
+
286
+ gr.Examples(
287
+ examples=[
288
+ ["Examples/Fire.webp", 42, True, 127, 25, 0.1, 3, "mp4", "png", "auto", 1024, 576, False, 25],
289
+ ["Examples/Water.png", 42, True, 127, 25, 0.1, 3, "mp4", "png", "auto", 1024, 576, False, 25],
290
+ ["Examples/Town.jpeg", 42, True, 127, 25, 0.1, 3, "mp4", "png", "auto", 1024, 576, False, 25]
291
+ ],
292
+ inputs=[image, seed, randomize_seed, motion_bucket_id, fps_id, noise_aug_strength, decoding_t, video_format, frame_format, version, width, height, motion_control, num_inference_steps],
293
+ outputs=[video_output, gif_output, download_button, gallery, seed, information_msg, reset_btn],
294
+ fn=animate,
295
+ run_on_click=True,
296
+ cache_examples=False,
297
+ )
298
+
299
+ if __name__ == "__main__":
300
  demo.launch(share=True, show_api=False)
requirements.txt CHANGED
@@ -1,8 +1,8 @@
1
- https://gradio-builds.s3.amazonaws.com/756e3431d65172df986a7e335dce8136206a293a/gradio-4.7.1-py3-none-any.whl
2
- git+https://github.com/huggingface/diffusers.git
3
- transformers
4
- accelerate
5
- safetensors
6
- opencv-python
7
- uuid
8
- torch
 
1
+ git+https://github.com/Fabrice-TIERCELIN/diffusers.git
2
+ scipy
3
+ transformers
4
+ accelerate
5
+ safetensors
6
+ opencv-python
7
+ uuid
8
+ torch