fffiloni commited on
Commit
ed49550
·
verified ·
1 Parent(s): 57a7a6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -1
app.py CHANGED
@@ -213,7 +213,48 @@ def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.
213
 
214
  return "output.wav", input_spec_image_path, color_output_spec_image
215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  css="""
218
  div#col-container{
219
  margin: 0 auto;
@@ -283,9 +324,15 @@ with gr.Blocks(css=css) as demo:
283
  with gr.Tab("Audio InPainting"):
284
  prompt_inp = gr.Textbox(label="Prompt")
285
  audio_in_inp = gr.Audio(label="Audio Reference", type="filepath")
 
 
286
  mask_start_point = gr.Slider(label="Mask Start point", minimum=0, maximum=1024, step=1, value=256)
287
  mask_end_point = gr.Slider(label="Mask End point", minimum=0, maximum=1024, step=1, value=768)
 
 
 
288
  submit_btn_inp = gr.Button("Submit")
 
289
  audio_out_inp = gr.Audio(label="Audio Ressult")
290
 
291
  with gr.Accordion("Compare Spectrograms", open=False):
@@ -293,10 +340,22 @@ with gr.Blocks(css=css) as demo:
293
  input_spectrogram_inp = gr.Image(label="Input Spectrogram")
294
  output_spectrogram_inp = gr.Image(label="Output Spectrogram")
295
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  submit_btn_inp.click(
297
  fn = infer_inp,
298
  inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
299
- outputs = [audio_out_inp, input_spectrogram, output_spectrogram]
300
  )
301
 
302
  demo.queue().launch(show_api=False, show_error=True)
 
213
 
214
  return "output.wav", input_spec_image_path, color_output_spec_image
215
 
216
+ def load_input_spectrogram(audio_path):
217
+ # Loading
218
+ audio, sampling_rate = load_wav(audio_path)
219
+ audio, spec = get_mel_spectrogram_from_audio(audio)
220
+ norm_spec = normalize_spectrogram(spec)
221
+ norm_spec = pad_spec(norm_spec, 1024)
222
+ norm_spec = normalize(norm_spec) # normalize to [-1, 1], because pipeline do not normalize for torch.Tensor input
223
+
224
+ raw_image = image_add_color(torch_to_pil(norm_spec))
225
+
226
+ # Save input spectrogram image
227
+ input_spec_image_path = "input_spectrogram.png"
228
+ raw_image.save(input_spec_image_path)
229
+
230
+ def preview_masked_area(audio_path, mask_start_point, mask_end_point):
231
+ # Loading
232
+ audio, sampling_rate = load_wav(audio_path)
233
+ audio, spec = get_mel_spectrogram_from_audio(audio)
234
+ norm_spec = normalize_spectrogram(spec)
235
+ norm_spec = pad_spec(norm_spec, 1024)
236
+ norm_spec = normalize(norm_spec) # normalize to [-1, 1], because pipeline do not normalize for torch.Tensor input
237
+
238
+ # Add Mask
239
+ mask = torch.zeros_like(norm_spec)[:1,...]
240
+ mask[:, :, width_start:width_start+width] = 1
241
+ mask_image = torch_to_pil(mask)
242
+
243
+ mask, masked_spec = prepare_mask_and_masked_image(norm_spec, mask)
244
+ masked_spec_image = torch_to_pil(masked_spec)
245
 
246
+ # color masked spec and paint masked area to black
247
+ color_masked_spec_image = image_add_color(masked_spec_image)
248
+ color_masked_spec_image = np.array(color_masked_spec_image)
249
+ color_masked_spec_image[:, width_start:width_start+width, :] = 0
250
+ color_masked_spec_image = Image.fromarray(color_masked_spec_image)
251
+
252
+ # Save the masked spectrogram image
253
+ masked_spec_image_path = "masked_spectrogram.png"
254
+ color_masked_spec_image.save(masked_spec_image_path)
255
+
256
+ return masked_spec_image_path
257
+
258
  css="""
259
  div#col-container{
260
  margin: 0 auto;
 
324
  with gr.Tab("Audio InPainting"):
325
  prompt_inp = gr.Textbox(label="Prompt")
326
  audio_in_inp = gr.Audio(label="Audio Reference", type="filepath")
327
+
328
+ audio_in_spec = gr.Image(label="Audio IN spectrogram")
329
  mask_start_point = gr.Slider(label="Mask Start point", minimum=0, maximum=1024, step=1, value=256)
330
  mask_end_point = gr.Slider(label="Mask End point", minimum=0, maximum=1024, step=1, value=768)
331
+ preview_mask_btn = gr.Button("Preview Mask")
332
+
333
+ masked_spec_preview = gr.Image(label="Spectrogram Mask Preview")
334
  submit_btn_inp = gr.Button("Submit")
335
+
336
  audio_out_inp = gr.Audio(label="Audio Ressult")
337
 
338
  with gr.Accordion("Compare Spectrograms", open=False):
 
340
  input_spectrogram_inp = gr.Image(label="Input Spectrogram")
341
  output_spectrogram_inp = gr.Image(label="Output Spectrogram")
342
 
343
+ audio_in_inp.upload(
344
+ fn = load_input_spectrogram,
345
+ inputs = [audio_in_inp],
346
+ outputs = [audio_in_spec]
347
+ )
348
+
349
+ preview_mask_btn.click(
350
+ fn = preview_masked_area,
351
+ inputs = [audio_in_inp, mask_start_point, mask_end_point],
352
+ outputs = [masked_spec_preview]
353
+ )
354
+
355
  submit_btn_inp.click(
356
  fn = infer_inp,
357
  inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
358
+ outputs = [audio_out_inp, input_spectrogram_inp, output_spectrogram_inp]
359
  )
360
 
361
  demo.queue().launch(show_api=False, show_error=True)