Spaces:

fffiloni
/

vta-ldm

Running

App Files Files Community

fffiloni commited on Jun 6

Commit

3f89683

verified ·

1 Parent(s): d187cc6

MCP ready

Browse files

Files changed (1) hide show

app.py +27 -2

app.py CHANGED Viewed

@@ -102,6 +102,31 @@ def merge_audio_to_video(input_vid, input_aud):
 @spaces.GPU(duration=100)
 def infer(video_in):
     # check if 'outputs' dir exists and empty it if necessary
     check_outputs_folder('./outputs/tmp')
@@ -223,6 +248,6 @@ with gr.Blocks(css=css) as demo:
         fn = infer,
         inputs = [video_in],
         outputs = [output_sound, output_spectrogram, merged_out],
-        show_api = False
     )
-demo.launch(show_api=False, show_error=True)

 @spaces.GPU(duration=100)
 def infer(video_in):
+    """Generate an audio track from a silent video using a pre-trained VTA (Video-to-Audio) model.
+    This function performs the following steps:
+    1. Ensures the output directory is clean.
+    2. Optionally trims the video to a maximum of 10 seconds.
+    3. Runs inference using a pre-trained latent diffusion model to generate audio.
+    4. Finds the generated WAV audio output.
+    5. Plots a spectrogram of the generated audio.
+    6. Merges the audio back into the input video.
+    Args:
+        video_in (str): The file path to the input silent video (MP4 format). If the video is longer than 10 seconds, it will be trimmed.
+    Returns:
+        Tuple[str, str, str]:
+            - The path to the generated `.wav` audio file.
+            - The path to the generated spectrogram `.png` image.
+            - The path to the final `.mp4` video with the generated audio merged in.
+    Example:
+        Given a silent video of a lion, this function will return:
+        - A realistic generated audio track simulating the lion's sound,
+        - A visual spectrogram representation of the audio,
+        - And a new video file where the generated audio is synced to the original visuals.
+    """
     # check if 'outputs' dir exists and empty it if necessary
     check_outputs_folder('./outputs/tmp')
         fn = infer,
         inputs = [video_in],
         outputs = [output_sound, output_spectrogram, merged_out],
+        show_api = True
     )
+demo.launch(show_api=True, show_error=True, ssr_mode=False, mcp_server=True)