innoai commited on
Commit
70460d3
·
verified ·
1 Parent(s): 4ac6efd

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +418 -393
app.py CHANGED
@@ -1,393 +1,418 @@
1
- import gradio as gr
2
-
3
- from PIL import Image
4
- from moviepy.editor import VideoFileClip, AudioFileClip
5
-
6
- import os
7
- from openai import OpenAI
8
- import subprocess
9
- from pathlib import Path
10
- import uuid
11
- import tempfile
12
- import shlex
13
- import shutil
14
-
15
- # Supported models configuration
16
- MODELS = {
17
- "deepseek-ai/DeepSeek-V3": {
18
- "base_url": "https://api.deepseek.com/v1",
19
- "env_key": "DEEPSEEK_API_KEY",
20
- },
21
- "Qwen/Qwen2.5-Coder-32B-Instruct": {
22
- "base_url": "https://api-inference.huggingface.co/v1/",
23
- "env_key": "HF_TOKEN",
24
- },
25
- }
26
-
27
- # Initialize client with first available model
28
- client = OpenAI(
29
- base_url=next(iter(MODELS.values()))["base_url"],
30
- api_key=os.environ[next(iter(MODELS.values()))["env_key"]],
31
- )
32
-
33
- allowed_medias = [
34
- ".png",
35
- ".jpg",
36
- ".webp",
37
- ".jpeg",
38
- ".tiff",
39
- ".bmp",
40
- ".gif",
41
- ".svg",
42
- ".mp3",
43
- ".wav",
44
- ".ogg",
45
- ".mp4",
46
- ".avi",
47
- ".mov",
48
- ".mkv",
49
- ".flv",
50
- ".wmv",
51
- ".webm",
52
- ".mpg",
53
- ".mpeg",
54
- ".m4v",
55
- ".3gp",
56
- ".3g2",
57
- ".3gpp",
58
- ]
59
-
60
-
61
- def get_files_infos(files):
62
- results = []
63
- for file in files:
64
- file_path = Path(file.name)
65
- info = {}
66
- info["size"] = os.path.getsize(file_path)
67
- # Sanitize filename by replacing spaces with underscores
68
- info["name"] = file_path.name.replace(" ", "_")
69
- file_extension = file_path.suffix
70
-
71
- if file_extension in (".mp4", ".avi", ".mkv", ".mov"):
72
- info["type"] = "video"
73
- video = VideoFileClip(file.name)
74
- info["duration"] = video.duration
75
- info["dimensions"] = "{}x{}".format(video.size[0], video.size[1])
76
- if video.audio:
77
- info["type"] = "video/audio"
78
- info["audio_channels"] = video.audio.nchannels
79
- video.close()
80
- elif file_extension in (".mp3", ".wav"):
81
- info["type"] = "audio"
82
- audio = AudioFileClip(file.name)
83
- info["duration"] = audio.duration
84
- info["audio_channels"] = audio.nchannels
85
- audio.close()
86
- elif file_extension in (
87
- ".png",
88
- ".jpg",
89
- ".jpeg",
90
- ".tiff",
91
- ".bmp",
92
- ".gif",
93
- ".svg",
94
- ):
95
- info["type"] = "image"
96
- img = Image.open(file.name)
97
- info["dimensions"] = "{}x{}".format(img.size[0], img.size[1])
98
- results.append(info)
99
- return results
100
-
101
-
102
- def get_completion(prompt, files_info, top_p, temperature, model_choice):
103
- # Create table header
104
- files_info_string = "| Type | Name | Dimensions | Duration | Audio Channels |\n"
105
- files_info_string += "|------|------|------------|-----------|--------|\n"
106
-
107
- # Add each file as a table row
108
- for file_info in files_info:
109
- dimensions = file_info.get("dimensions", "-")
110
- duration = (
111
- f"{file_info.get('duration', '-')}s" if "duration" in file_info else "-"
112
- )
113
- audio = (
114
- f"{file_info.get('audio_channels', '-')} channels"
115
- if "audio_channels" in file_info
116
- else "-"
117
- )
118
-
119
- files_info_string += f"| {file_info['type']} | {file_info['name']} | {dimensions} | {duration} | {audio} |\n"
120
-
121
- messages = [
122
- {
123
- "role": "system",
124
- "content": """
125
- You are a very experienced media engineer, controlling a UNIX terminal.
126
- You are an FFMPEG expert with years of experience and multiple contributions to the FFMPEG project.
127
-
128
- You are given:
129
- (1) a set of video, audio and/or image assets. Including their name, duration, dimensions and file size
130
- (2) the description of a new video you need to create from the list of assets
131
-
132
- Your objective is to generate the SIMPLEST POSSIBLE single ffmpeg command to create the requested video.
133
-
134
- Key requirements:
135
- - Use the absolute minimum number of ffmpeg options needed
136
- - Avoid complex filter chains or filter_complex if possible
137
- - Prefer simple concatenation, scaling, and basic filters
138
- - Output exactly ONE command that will be directly pasted into the terminal
139
- - Never output multiple commands chained together
140
- - Output the command in a single line (no line breaks or multiple lines)
141
- - If the user asks for waveform visualization make sure to set the mode to `line` with and the use the full width of the video. Also concatenate the audio into a single channel.
142
- - For image sequences: Use -framerate and pattern matching (like 'img%d.jpg') when possible, falling back to individual image processing with -loop 1 and appropriate filters only when necessary.
143
- - When showing file operations or commands, always use explicit paths and filenames without wildcards - avoid using asterisk (*) or glob patterns. Instead, use specific numbered sequences (like %d), explicit file lists, or show the full filename.
144
-
145
- Remember: Simpler is better. Only use advanced ffmpeg features if absolutely necessary for the requested output.
146
- """,
147
- },
148
- {
149
- "role": "user",
150
- "content": f"""Always output the media as video/mp4 and output file with "output.mp4". Provide only the shell command without any explanations.
151
- The current assets and objective follow. Reply with the FFMPEG command:
152
-
153
- AVAILABLE ASSETS LIST:
154
-
155
- {files_info_string}
156
-
157
- OBJECTIVE: {prompt} and output at "output.mp4"
158
- YOUR FFMPEG COMMAND:
159
- """,
160
- },
161
- ]
162
- try:
163
- # Print the complete prompt
164
- print("\n=== COMPLETE PROMPT ===")
165
- for msg in messages:
166
- print(f"\n[{msg['role'].upper()}]:")
167
- print(msg["content"])
168
- print("=====================\n")
169
-
170
- if model_choice not in MODELS:
171
- raise ValueError(f"Model {model_choice} is not supported")
172
-
173
- model_config = MODELS[model_choice]
174
- client.base_url = model_config["base_url"]
175
- client.api_key = os.environ[model_config["env_key"]]
176
- model = "deepseek-chat" if "deepseek" in model_choice.lower() else model_choice
177
-
178
- completion = client.chat.completions.create(
179
- model=model,
180
- messages=messages,
181
- temperature=temperature,
182
- top_p=top_p,
183
- max_tokens=2048,
184
- )
185
- content = completion.choices[0].message.content
186
- # Extract command from code block if present
187
- if "```" in content:
188
- # Find content between ```sh or ```bash and the next ```
189
- import re
190
-
191
- command = re.search(r"```(?:sh|bash)?\n(.*?)\n```", content, re.DOTALL)
192
- if command:
193
- command = command.group(1).strip()
194
- else:
195
- command = content.replace("\n", "")
196
- else:
197
- command = content.replace("\n", "")
198
-
199
- # remove output.mp4 with the actual output file path
200
- command = command.replace("output.mp4", "")
201
-
202
- return command
203
- except Exception as e:
204
- raise Exception("API Error")
205
-
206
-
207
- def update(
208
- files,
209
- prompt,
210
- top_p=1,
211
- temperature=1,
212
- model_choice="Qwen/Qwen2.5-Coder-32B-Instruct",
213
- ):
214
- if prompt == "":
215
- raise gr.Error("Please enter a prompt.")
216
-
217
- files_info = get_files_infos(files)
218
- # disable this if you're running the app locally or on your own server
219
- for file_info in files_info:
220
- if file_info["type"] == "video":
221
- if file_info["duration"] > 120:
222
- raise gr.Error(
223
- "Please make sure all videos are less than 2 minute long."
224
- )
225
- if file_info["size"] > 100000000:
226
- raise gr.Error("Please make sure all files are less than 100MB in size.")
227
-
228
- attempts = 0
229
- while attempts < 2:
230
- print("ATTEMPT", attempts)
231
- try:
232
- command_string = get_completion(
233
- prompt, files_info, top_p, temperature, model_choice
234
- )
235
- print(
236
- f"""///PROMTP {prompt} \n\n/// START OF COMMAND ///:\n\n{command_string}\n\n/// END OF COMMAND ///\n\n"""
237
- )
238
-
239
- # split command string into list of arguments
240
- args = shlex.split(command_string)
241
- if args[0] != "ffmpeg":
242
- raise Exception("Command does not start with ffmpeg")
243
- temp_dir = tempfile.mkdtemp()
244
- # copy files to temp dir with sanitized names
245
- for file in files:
246
- file_path = Path(file.name)
247
- sanitized_name = file_path.name.replace(" ", "_")
248
- shutil.copy(file_path, Path(temp_dir) / sanitized_name)
249
-
250
- # test if ffmpeg command is valid dry run
251
- ffmpg_dry_run = subprocess.run(
252
- args + ["-f", "null", "-"],
253
- stderr=subprocess.PIPE,
254
- text=True,
255
- cwd=temp_dir,
256
- )
257
- if ffmpg_dry_run.returncode == 0:
258
- print("Command is valid.")
259
- else:
260
- print("Command is not valid. Error output:")
261
- print(ffmpg_dry_run.stderr)
262
- raise Exception(
263
- "FFMPEG generated command is not valid. Please try something else."
264
- )
265
-
266
- output_file_name = f"output_{uuid.uuid4()}.mp4"
267
- output_file_path = str((Path(temp_dir) / output_file_name).resolve())
268
- final_command = args + ["-y", output_file_path]
269
- print(
270
- f"\n=== EXECUTING FFMPEG COMMAND ===\nffmpeg {' '.join(final_command[1:])}\n"
271
- )
272
- subprocess.run(final_command, cwd=temp_dir)
273
- generated_command = f"### Generated Command\n```bash\nffmpeg {' '.join(args[1:])} -y output.mp4\n```"
274
- return output_file_path, gr.update(value=generated_command)
275
- except Exception as e:
276
- attempts += 1
277
- if attempts >= 2:
278
- print("FROM UPDATE", e)
279
- raise gr.Error(e)
280
-
281
-
282
- with gr.Blocks() as demo:
283
- gr.Markdown(
284
- """
285
- # 🏞 AI Video Composer
286
- Compose new videos from your assets using natural language. Add video, image and audio assets and let [Qwen2.5-Coder](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) or [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base) generate a new video for you (using FFMPEG).
287
- """,
288
- elem_id="header",
289
- )
290
- with gr.Row():
291
- with gr.Column():
292
- user_files = gr.File(
293
- file_count="multiple",
294
- label="Media files",
295
- file_types=allowed_medias,
296
- )
297
- user_prompt = gr.Textbox(
298
- placeholder="eg: Remove the 3 first seconds of the video",
299
- label="Instructions",
300
- )
301
- btn = gr.Button("Run")
302
- with gr.Accordion("Parameters", open=False):
303
- model_choice = gr.Radio(
304
- choices=list(MODELS.keys()),
305
- value=list(MODELS.keys())[0],
306
- label="Model",
307
- )
308
- top_p = gr.Slider(
309
- minimum=-0,
310
- maximum=1.0,
311
- value=0.7,
312
- step=0.05,
313
- interactive=True,
314
- label="Top-p (nucleus sampling)",
315
- )
316
- temperature = gr.Slider(
317
- minimum=-0,
318
- maximum=5.0,
319
- value=0.1,
320
- step=0.1,
321
- interactive=True,
322
- label="Temperature",
323
- )
324
- with gr.Column():
325
- generated_video = gr.Video(
326
- interactive=False, label="Generated Video", include_audio=True
327
- )
328
- generated_command = gr.Markdown()
329
-
330
- btn.click(
331
- fn=update,
332
- inputs=[user_files, user_prompt, top_p, temperature, model_choice],
333
- outputs=[generated_video, generated_command],
334
- )
335
- with gr.Row():
336
- gr.Examples(
337
- examples=[
338
- [
339
- ["./examples/ai_talk.wav", "./examples/bg-image.png"],
340
- "Use the image as the background with a waveform visualization for the audio positioned in center of the video.",
341
- 0.7,
342
- 0.1,
343
- (
344
- list(MODELS.keys())[1]
345
- if len(MODELS) > 1
346
- else list(MODELS.keys())[0]
347
- ),
348
- ],
349
- [
350
- ["./examples/ai_talk.wav", "./examples/bg-image.png"],
351
- "Use the image as the background with a waveform visualization for the audio positioned in center of the video. Make sure the waveform has a max height of 250 pixels.",
352
- 0.7,
353
- 0.1,
354
- list(MODELS.keys())[0],
355
- ],
356
- [
357
- [
358
- "./examples/cat1.jpeg",
359
- "./examples/cat2.jpeg",
360
- "./examples/cat3.jpeg",
361
- "./examples/cat4.jpeg",
362
- "./examples/cat5.jpeg",
363
- "./examples/cat6.jpeg",
364
- "./examples/heat-wave.mp3",
365
- ],
366
- "Create a 3x2 grid of the cat images with the audio as background music. Make the video duration match the audio duration.",
367
- 0.7,
368
- 0.1,
369
- (
370
- list(MODELS.keys())[1]
371
- if len(MODELS) > 1
372
- else list(MODELS.keys())[0]
373
- ),
374
- ],
375
- ],
376
- inputs=[user_files, user_prompt, top_p, temperature, model_choice],
377
- outputs=[generated_video, generated_command],
378
- fn=update,
379
- run_on_click=True,
380
- cache_examples=False,
381
- )
382
-
383
- with gr.Row():
384
- gr.Markdown(
385
- """
386
- If you have idea to improve this please open a PR:
387
-
388
- [![Open a Pull Request](https://huggingface.co/datasets/huggingface/badges/raw/main/open-a-pr-lg-light.svg)](https://huggingface.co/spaces/huggingface-projects/video-composer-gpt4/discussions)
389
- """,
390
- )
391
-
392
- demo.queue(default_concurrency_limit=200)
393
- demo.launch(show_api=False, ssr_mode=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from PIL import Image
4
+ from moviepy.editor import VideoFileClip, AudioFileClip
5
+
6
+ import os
7
+ from openai import OpenAI
8
+ import subprocess
9
+ from pathlib import Path
10
+ import uuid
11
+ import tempfile
12
+ import shlex
13
+ import shutil
14
+
15
+ # Supported models configuration
16
+ MODELS = {
17
+ "deepseek-ai/DeepSeek-V3": {
18
+ "base_url": "https://api.deepseek.com/v1",
19
+ "env_key": "DEEPSEEK_API_KEY",
20
+ },
21
+ "Qwen/Qwen2.5-Coder-32B-Instruct": {
22
+ "base_url": "https://api-inference.huggingface.co/v1/",
23
+ "env_key": "HF_TOKEN",
24
+ },
25
+ }
26
+
27
+ # Initialize client with first available model
28
+ client = OpenAI(
29
+ base_url=next(iter(MODELS.values()))["base_url"],
30
+ api_key=os.environ[next(iter(MODELS.values()))["env_key"]],
31
+ )
32
+
33
+ allowed_medias = [
34
+ ".png",
35
+ ".jpg",
36
+ ".webp",
37
+ ".jpeg",
38
+ ".tiff",
39
+ ".bmp",
40
+ ".gif",
41
+ ".svg",
42
+ ".mp3",
43
+ ".wav",
44
+ ".ogg",
45
+ ".mp4",
46
+ ".avi",
47
+ ".mov",
48
+ ".mkv",
49
+ ".flv",
50
+ ".wmv",
51
+ ".webm",
52
+ ".mpg",
53
+ ".mpeg",
54
+ ".m4v",
55
+ ".3gp",
56
+ ".3g2",
57
+ ".3gpp",
58
+ ]
59
+
60
+
61
+ def get_files_infos(files):
62
+ results = []
63
+ for file in files:
64
+ file_path = Path(file.name)
65
+ info = {}
66
+ info["size"] = os.path.getsize(file_path)
67
+ # Sanitize filename by replacing spaces with underscores
68
+ info["name"] = file_path.name.replace(" ", "_")
69
+ file_extension = file_path.suffix
70
+
71
+ if file_extension in (".mp4", ".avi", ".mkv", ".mov"):
72
+ info["type"] = "video"
73
+ video = VideoFileClip(file.name)
74
+ info["duration"] = video.duration
75
+ info["dimensions"] = "{}x{}".format(video.size[0], video.size[1])
76
+ if video.audio:
77
+ info["type"] = "video/audio"
78
+ info["audio_channels"] = video.audio.nchannels
79
+ video.close()
80
+ elif file_extension in (".mp3", ".wav"):
81
+ info["type"] = "audio"
82
+ audio = AudioFileClip(file.name)
83
+ info["duration"] = audio.duration
84
+ info["audio_channels"] = audio.nchannels
85
+ audio.close()
86
+ elif file_extension in (
87
+ ".png",
88
+ ".jpg",
89
+ ".jpeg",
90
+ ".tiff",
91
+ ".bmp",
92
+ ".gif",
93
+ ".svg",
94
+ ):
95
+ info["type"] = "image"
96
+ img = Image.open(file.name)
97
+ info["dimensions"] = "{}x{}".format(img.size[0], img.size[1])
98
+ results.append(info)
99
+ return results
100
+
101
+
102
+ def get_completion(prompt, files_info, top_p, temperature, model_choice):
103
+ # Create table header
104
+ files_info_string = "| Type | Name | Dimensions | Duration | Audio Channels |\n"
105
+ files_info_string += "|------|------|------------|-----------|--------|\n"
106
+
107
+ # Add each file as a table row
108
+ for file_info in files_info:
109
+ dimensions = file_info.get("dimensions", "-")
110
+ duration = (
111
+ f"{file_info.get('duration', '-')}s" if "duration" in file_info else "-"
112
+ )
113
+ audio = (
114
+ f"{file_info.get('audio_channels', '-')} channels"
115
+ if "audio_channels" in file_info
116
+ else "-"
117
+ )
118
+
119
+ files_info_string += f"| {file_info['type']} | {file_info['name']} | {dimensions} | {duration} | {audio} |\n"
120
+
121
+ messages = [
122
+ {
123
+ "role": "system",
124
+ "content": """
125
+ You are a very experienced media engineer, controlling a UNIX terminal.
126
+ You are an FFMPEG expert with years of experience and multiple contributions to the FFMPEG project.
127
+
128
+ You are given:
129
+ (1) a set of video, audio and/or image assets. Including their name, duration, dimensions and file size
130
+ (2) the description of a new video you need to create from the list of assets
131
+
132
+ Your objective is to generate the SIMPLEST POSSIBLE single ffmpeg command to create the requested video.
133
+
134
+ Key requirements:
135
+ - Use the absolute minimum number of ffmpeg options needed
136
+ - Avoid complex filter chains or filter_complex if possible
137
+ - Prefer simple concatenation, scaling, and basic filters
138
+ - Output exactly ONE command that will be directly pasted into the terminal
139
+ - Never output multiple commands chained together
140
+ - Output the command in a single line (no line breaks or multiple lines)
141
+ - If the user asks for waveform visualization make sure to set the mode to `line` with and the use the full width of the video. Also concatenate the audio into a single channel.
142
+ - For image sequences: Use -framerate and pattern matching (like 'img%d.jpg') when possible, falling back to individual image processing with -loop 1 and appropriate filters only when necessary.
143
+ - When showing file operations or commands, always use explicit paths and filenames without wildcards - avoid using asterisk (*) or glob patterns. Instead, use specific numbered sequences (like %d), explicit file lists, or show the full filename.
144
+
145
+ Remember: Simpler is better. Only use advanced ffmpeg features if absolutely necessary for the requested output.
146
+ """,
147
+ },
148
+ {
149
+ "role": "user",
150
+ "content": f"""Always output the media as video/mp4 and output file with "output.mp4". Provide only the shell command without any explanations.
151
+ The current assets and objective follow. Reply with the FFMPEG command:
152
+
153
+ AVAILABLE ASSETS LIST:
154
+
155
+ {files_info_string}
156
+
157
+ OBJECTIVE: {prompt} and output at "output.mp4"
158
+ YOUR FFMPEG COMMAND:
159
+ """,
160
+ },
161
+ ]
162
+ try:
163
+ # Print the complete prompt
164
+ print("\n=== COMPLETE PROMPT ===")
165
+ for msg in messages:
166
+ print(f"\n[{msg['role'].upper()}]:")
167
+ print(msg["content"])
168
+ print("=====================\n")
169
+
170
+ if model_choice not in MODELS:
171
+ raise ValueError(f"Model {model_choice} is not supported")
172
+
173
+ model_config = MODELS[model_choice]
174
+ client.base_url = model_config["base_url"]
175
+ client.api_key = os.environ[model_config["env_key"]]
176
+ model = "deepseek-chat" if "deepseek" in model_choice.lower() else model_choice
177
+
178
+ completion = client.chat.completions.create(
179
+ model=model,
180
+ messages=messages,
181
+ temperature=temperature,
182
+ top_p=top_p,
183
+ max_tokens=2048,
184
+ )
185
+ content = completion.choices[0].message.content
186
+ # Extract command from code block if present
187
+ if "```" in content:
188
+ # Find content between ```sh or ```bash and the next ```
189
+ import re
190
+
191
+ command = re.search(r"```(?:sh|bash)?\n(.*?)\n```", content, re.DOTALL)
192
+ if command:
193
+ command = command.group(1).strip()
194
+ else:
195
+ command = content.replace("\n", "")
196
+ else:
197
+ command = content.replace("\n", "")
198
+
199
+ # remove output.mp4 with the actual output file path
200
+ command = command.replace("output.mp4", "")
201
+
202
+ return command
203
+ except Exception as e:
204
+ raise Exception("API Error")
205
+
206
+
207
+ def update(
208
+ files,
209
+ prompt,
210
+ top_p=1,
211
+ temperature=1,
212
+ model_choice="Qwen/Qwen2.5-Coder-32B-Instruct",
213
+ ):
214
+ if prompt == "":
215
+ raise gr.Error("Please enter a prompt.")
216
+
217
+ files_info = get_files_infos(files)
218
+ # disable this if you're running the app locally or on your own server
219
+ for file_info in files_info:
220
+ if file_info["type"] == "video":
221
+ if file_info["duration"] > 120:
222
+ raise gr.Error(
223
+ "Please make sure all videos are less than 2 minute long."
224
+ )
225
+ if file_info["size"] > 100000000:
226
+ raise gr.Error("Please make sure all files are less than 100MB in size.")
227
+
228
+ attempts = 0
229
+ while attempts < 2:
230
+ print("ATTEMPT", attempts)
231
+ try:
232
+ command_string = get_completion(
233
+ prompt, files_info, top_p, temperature, model_choice
234
+ )
235
+ print(
236
+ f"""///PROMTP {prompt} \n\n/// START OF COMMAND ///:\n\n{command_string}\n\n/// END OF COMMAND ///\n\n"""
237
+ )
238
+
239
+ # split command string into list of arguments
240
+ args = shlex.split(command_string)
241
+ if args[0] != "ffmpeg":
242
+ raise Exception("Command does not start with ffmpeg")
243
+ temp_dir = tempfile.mkdtemp()
244
+ # copy files to temp dir with sanitized names
245
+ for file in files:
246
+ file_path = Path(file.name)
247
+ sanitized_name = file_path.name.replace(" ", "_")
248
+ shutil.copy(file_path, Path(temp_dir) / sanitized_name)
249
+
250
+ # test if ffmpeg command is valid dry run
251
+ ffmpg_dry_run = subprocess.run(
252
+ args + ["-f", "null", "-"],
253
+ stderr=subprocess.PIPE,
254
+ text=True,
255
+ cwd=temp_dir,
256
+ )
257
+ if ffmpg_dry_run.returncode == 0:
258
+ print("Command is valid.")
259
+ else:
260
+ print("Command is not valid. Error output:")
261
+ print(ffmpg_dry_run.stderr)
262
+ raise Exception(
263
+ "FFMPEG generated command is not valid. Please try something else."
264
+ )
265
+
266
+ output_file_name = f"output_{uuid.uuid4()}.mp4"
267
+ output_file_path = str((Path(temp_dir) / output_file_name).resolve())
268
+ final_command = args + ["-y", output_file_path]
269
+ print(
270
+ f"\n=== EXECUTING FFMPEG COMMAND ===\nffmpeg {' '.join(final_command[1:])}\n"
271
+ )
272
+ subprocess.run(final_command, cwd=temp_dir)
273
+ generated_command = f"### Generated Command\n```bash\nffmpeg {' '.join(args[1:])} -y output.mp4\n```"
274
+ return output_file_path, gr.update(value=generated_command)
275
+ except Exception as e:
276
+ attempts += 1
277
+ if attempts >= 2:
278
+ print("FROM UPDATE", e)
279
+ raise gr.Error(e)
280
+
281
+
282
+ with gr.Blocks() as demo:
283
+ gr.Markdown(
284
+ """
285
+ # 🏞 AI Video Editor
286
+ Your advanced video editing assistant powered by AI. Transform, enhance, and edit videos using natural language instructions. Upload your video, image, or audio assets and let [Qwen2.5-Coder](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct) or [DeepSeek-V3](https://huggingface.co/deepseek-ai/DeepSeek-V3-Base) generate professional-quality video edits using FFMPEG - no coding required!
287
+ """,
288
+ elem_id="header",
289
+ )
290
+
291
+ with gr.Accordion("📋 Usage Instructions", open=False):
292
+ gr.Markdown(
293
+ """
294
+ ### How to Use AI Video Editor
295
+
296
+ 1. **Upload Media Files**: Add your video, image, or audio files using the upload area
297
+ 2. **Write Instructions**: Describe what edits you want in plain English
298
+ 3. **Adjust Parameters** (optional): Customize model and generation settings
299
+ 4. **Generate**: Click "Run" and watch your edited video being created
300
+
301
+ ### Example Instructions
302
+ - "Trim the first 5 seconds of the video"
303
+ - "Add a text overlay with my name at the bottom"
304
+ - "Convert video to black and white"
305
+ - "Combine these videos with a crossfade transition"
306
+ - "Add background music to my slideshow"
307
+ - "Create a picture-in-picture effect"
308
+
309
+ ### Tips
310
+ - Be specific about timecodes when trimming (e.g., "from 0:15 to 0:45")
311
+ - Include positioning details for overlays (e.g., "top right corner")
312
+ - Specify dimensions if you need to resize (e.g., "scale to 720p")
313
+ """
314
+ )
315
+
316
+ with gr.Row():
317
+ with gr.Column():
318
+ user_files = gr.File(
319
+ file_count="multiple",
320
+ label="Media files",
321
+ file_types=allowed_medias,
322
+ )
323
+ user_prompt = gr.Textbox(
324
+ placeholder="eg: Remove the 3 first seconds of the video",
325
+ label="Instructions",
326
+ )
327
+ btn = gr.Button("Run")
328
+ with gr.Accordion("Parameters", open=False):
329
+ model_choice = gr.Radio(
330
+ choices=list(MODELS.keys()),
331
+ value=list(MODELS.keys())[0],
332
+ label="Model",
333
+ )
334
+ top_p = gr.Slider(
335
+ minimum=-0,
336
+ maximum=1.0,
337
+ value=0.7,
338
+ step=0.05,
339
+ interactive=True,
340
+ label="Top-p (nucleus sampling)",
341
+ )
342
+ temperature = gr.Slider(
343
+ minimum=-0,
344
+ maximum=5.0,
345
+ value=0.1,
346
+ step=0.1,
347
+ interactive=True,
348
+ label="Temperature",
349
+ )
350
+ with gr.Column():
351
+ generated_video = gr.Video(
352
+ interactive=False, label="Generated Video", include_audio=True
353
+ )
354
+ generated_command = gr.Markdown()
355
+
356
+ btn.click(
357
+ fn=update,
358
+ inputs=[user_files, user_prompt, top_p, temperature, model_choice],
359
+ outputs=[generated_video, generated_command],
360
+ )
361
+ with gr.Row():
362
+ gr.Examples(
363
+ examples=[
364
+ [
365
+ ["./examples/Jiangnan_Rain.mp4"],
366
+ "Add a text watermark 'Sample Video' to the upper right corner of the video with white text and semi-transparent background.",
367
+ 0.7,
368
+ 0.1,
369
+ list(MODELS.keys())[0],
370
+ ],
371
+ [
372
+ ["./examples/Jiangnan_Rain.mp4"],
373
+ "Cut the video to extract only the middle 30 seconds (starting at 00:30 and ending at 01:00).",
374
+ 0.7,
375
+ 0.1,
376
+ (
377
+ list(MODELS.keys())[1]
378
+ if len(MODELS) > 1
379
+ else list(MODELS.keys())[0]
380
+ ),
381
+ ],
382
+ [
383
+ ["./examples/Lotus_Pond01.mp4"],
384
+ "Convert the video to black and white (grayscale) while maintaining the original audio.",
385
+ 0.7,
386
+ 0.1,
387
+ list(MODELS.keys())[0],
388
+ ],
389
+ [
390
+ ["./examples/Lotus_Pond01.mp4"],
391
+ "Create a slow-motion version of the video by reducing the speed to 0.5x.",
392
+ 0.7,
393
+ 0.1,
394
+ (
395
+ list(MODELS.keys())[1]
396
+ if len(MODELS) > 1
397
+ else list(MODELS.keys())[0]
398
+ ),
399
+ ],
400
+ ],
401
+ inputs=[user_files, user_prompt, top_p, temperature, model_choice],
402
+ outputs=[generated_video, generated_command],
403
+ fn=update,
404
+ run_on_click=True,
405
+ cache_examples=False,
406
+ )
407
+
408
+ with gr.Row():
409
+ gr.Markdown(
410
+ """
411
+ If you have idea to improve this please open a PR:
412
+
413
+ [![Open a Pull Request](https://huggingface.co/datasets/huggingface/badges/raw/main/open-a-pr-lg-light.svg)](https://huggingface.co/spaces/huggingface-projects/video-composer-gpt4/discussions)
414
+ """,
415
+ )
416
+
417
+ demo.queue(default_concurrency_limit=200)
418
+ demo.launch(show_api=False, ssr_mode=True)