Update app.py
Browse files
app.py
CHANGED
@@ -127,7 +127,10 @@ def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(tra
|
|
127 |
|
128 |
return "output.wav", input_spec_image_path, output_spec_image_path
|
129 |
|
130 |
-
def infer_inp(prompt, audio_path, progress=gr.Progress(track_tqdm=True)):
|
|
|
|
|
|
|
131 |
|
132 |
pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
|
133 |
dtype = torch.float16
|
@@ -213,6 +216,18 @@ def infer_inp(prompt, audio_path, progress=gr.Progress(track_tqdm=True)):
|
|
213 |
|
214 |
return "output.wav"
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
css="""
|
217 |
div#col-container{
|
218 |
margin: 0 auto;
|
@@ -282,6 +297,7 @@ with gr.Blocks(css=css) as demo:
|
|
282 |
with gr.Tab("Audio InPainting"):
|
283 |
prompt_inp = gr.Textbox(label="Prompt")
|
284 |
audio_in_inp = gr.Audio(label="Audio Reference", type="filepath")
|
|
|
285 |
submit_btn_inp = gr.Button("Submit")
|
286 |
audio_out_inp = gr.Audio(label="Audio Ressult")
|
287 |
|
@@ -290,11 +306,15 @@ with gr.Blocks(css=css) as demo:
|
|
290 |
input_spectrogram_inp = gr.Image(label="Input Spectrogram")
|
291 |
output_spectrogram_inp = gr.Image(label="Output Spectrogram")
|
292 |
|
293 |
-
|
|
|
|
|
|
|
|
|
294 |
|
295 |
submit_btn_inp.click(
|
296 |
fn = infer_inp,
|
297 |
-
inputs = [prompt_inp, audio_in_inp],
|
298 |
outputs = [audio_out_inp]
|
299 |
)
|
300 |
|
|
|
127 |
|
128 |
return "output.wav", input_spec_image_path, output_spec_image_path
|
129 |
|
130 |
+
def infer_inp(prompt, audio_path, spec_with_mask, progress=gr.Progress(track_tqdm=True)):
|
131 |
+
|
132 |
+
if spec_with_mask:
|
133 |
+
print(spec_with_mask)
|
134 |
|
135 |
pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
|
136 |
dtype = torch.float16
|
|
|
216 |
|
217 |
return "output.wav"
|
218 |
|
219 |
+
def load_spec_for_manual_masking(audio_path):
|
220 |
+
# Loading
|
221 |
+
audio, sampling_rate = load_wav(audio_path)
|
222 |
+
audio, spec = get_mel_spectrogram_from_audio(audio)
|
223 |
+
norm_spec = normalize_spectrogram(spec)
|
224 |
+
norm_spec = pad_spec(norm_spec, 1024)
|
225 |
+
norm_spec = normalize(norm_spec) # normalize to [-1, 1], because pipeline do not normalize for torch.Tensor input
|
226 |
+
|
227 |
+
raw_image = image_add_color(torch_to_pil(norm_spec))
|
228 |
+
return raw_image
|
229 |
+
|
230 |
+
|
231 |
css="""
|
232 |
div#col-container{
|
233 |
margin: 0 auto;
|
|
|
297 |
with gr.Tab("Audio InPainting"):
|
298 |
prompt_inp = gr.Textbox(label="Prompt")
|
299 |
audio_in_inp = gr.Audio(label="Audio Reference", type="filepath")
|
300 |
+
spec_for_mask = gr.ImageMask(label="Draw Mask", type="pil", interactive=False)
|
301 |
submit_btn_inp = gr.Button("Submit")
|
302 |
audio_out_inp = gr.Audio(label="Audio Ressult")
|
303 |
|
|
|
306 |
input_spectrogram_inp = gr.Image(label="Input Spectrogram")
|
307 |
output_spectrogram_inp = gr.Image(label="Output Spectrogram")
|
308 |
|
309 |
+
audio_in_inp.upload(
|
310 |
+
fn = load_spec_for_manual_masking,
|
311 |
+
inputs = [audio_in_inp],
|
312 |
+
outputs = [spec_for_mask]
|
313 |
+
)
|
314 |
|
315 |
submit_btn_inp.click(
|
316 |
fn = infer_inp,
|
317 |
+
inputs = [prompt_inp, audio_in_inp, spec_for_mask],
|
318 |
outputs = [audio_out_inp]
|
319 |
)
|
320 |
|