fffiloni commited on
Commit
3f89683
·
verified ·
1 Parent(s): d187cc6
Files changed (1) hide show
  1. app.py +27 -2
app.py CHANGED
@@ -102,6 +102,31 @@ def merge_audio_to_video(input_vid, input_aud):
102
 
103
  @spaces.GPU(duration=100)
104
  def infer(video_in):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  # check if 'outputs' dir exists and empty it if necessary
107
  check_outputs_folder('./outputs/tmp')
@@ -223,6 +248,6 @@ with gr.Blocks(css=css) as demo:
223
  fn = infer,
224
  inputs = [video_in],
225
  outputs = [output_sound, output_spectrogram, merged_out],
226
- show_api = False
227
  )
228
- demo.launch(show_api=False, show_error=True)
 
102
 
103
  @spaces.GPU(duration=100)
104
  def infer(video_in):
105
+ """Generate an audio track from a silent video using a pre-trained VTA (Video-to-Audio) model.
106
+
107
+ This function performs the following steps:
108
+ 1. Ensures the output directory is clean.
109
+ 2. Optionally trims the video to a maximum of 10 seconds.
110
+ 3. Runs inference using a pre-trained latent diffusion model to generate audio.
111
+ 4. Finds the generated WAV audio output.
112
+ 5. Plots a spectrogram of the generated audio.
113
+ 6. Merges the audio back into the input video.
114
+
115
+ Args:
116
+ video_in (str): The file path to the input silent video (MP4 format). If the video is longer than 10 seconds, it will be trimmed.
117
+
118
+ Returns:
119
+ Tuple[str, str, str]:
120
+ - The path to the generated `.wav` audio file.
121
+ - The path to the generated spectrogram `.png` image.
122
+ - The path to the final `.mp4` video with the generated audio merged in.
123
+
124
+ Example:
125
+ Given a silent video of a lion, this function will return:
126
+ - A realistic generated audio track simulating the lion's sound,
127
+ - A visual spectrogram representation of the audio,
128
+ - And a new video file where the generated audio is synced to the original visuals.
129
+ """
130
 
131
  # check if 'outputs' dir exists and empty it if necessary
132
  check_outputs_folder('./outputs/tmp')
 
248
  fn = infer,
249
  inputs = [video_in],
250
  outputs = [output_sound, output_spectrogram, merged_out],
251
+ show_api = True
252
  )
253
+ demo.launch(show_api=True, show_error=True, ssr_mode=False, mcp_server=True)