fffiloni commited on
Commit
080877e
·
verified ·
1 Parent(s): 175ce84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -3
app.py CHANGED
@@ -127,7 +127,10 @@ def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(tra
127
 
128
  return "output.wav", input_spec_image_path, output_spec_image_path
129
 
130
- def infer_inp(prompt, audio_path, progress=gr.Progress(track_tqdm=True)):
 
 
 
131
 
132
  pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
133
  dtype = torch.float16
@@ -213,6 +216,18 @@ def infer_inp(prompt, audio_path, progress=gr.Progress(track_tqdm=True)):
213
 
214
  return "output.wav"
215
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  css="""
217
  div#col-container{
218
  margin: 0 auto;
@@ -282,6 +297,7 @@ with gr.Blocks(css=css) as demo:
282
  with gr.Tab("Audio InPainting"):
283
  prompt_inp = gr.Textbox(label="Prompt")
284
  audio_in_inp = gr.Audio(label="Audio Reference", type="filepath")
 
285
  submit_btn_inp = gr.Button("Submit")
286
  audio_out_inp = gr.Audio(label="Audio Ressult")
287
 
@@ -290,11 +306,15 @@ with gr.Blocks(css=css) as demo:
290
  input_spectrogram_inp = gr.Image(label="Input Spectrogram")
291
  output_spectrogram_inp = gr.Image(label="Output Spectrogram")
292
 
293
-
 
 
 
 
294
 
295
  submit_btn_inp.click(
296
  fn = infer_inp,
297
- inputs = [prompt_inp, audio_in_inp],
298
  outputs = [audio_out_inp]
299
  )
300
 
 
127
 
128
  return "output.wav", input_spec_image_path, output_spec_image_path
129
 
130
+ def infer_inp(prompt, audio_path, spec_with_mask, progress=gr.Progress(track_tqdm=True)):
131
+
132
+ if spec_with_mask:
133
+ print(spec_with_mask)
134
 
135
  pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
136
  dtype = torch.float16
 
216
 
217
  return "output.wav"
218
 
219
+ def load_spec_for_manual_masking(audio_path):
220
+ # Loading
221
+ audio, sampling_rate = load_wav(audio_path)
222
+ audio, spec = get_mel_spectrogram_from_audio(audio)
223
+ norm_spec = normalize_spectrogram(spec)
224
+ norm_spec = pad_spec(norm_spec, 1024)
225
+ norm_spec = normalize(norm_spec) # normalize to [-1, 1], because pipeline do not normalize for torch.Tensor input
226
+
227
+ raw_image = image_add_color(torch_to_pil(norm_spec))
228
+ return raw_image
229
+
230
+
231
  css="""
232
  div#col-container{
233
  margin: 0 auto;
 
297
  with gr.Tab("Audio InPainting"):
298
  prompt_inp = gr.Textbox(label="Prompt")
299
  audio_in_inp = gr.Audio(label="Audio Reference", type="filepath")
300
+ spec_for_mask = gr.ImageMask(label="Draw Mask", type="pil", interactive=False)
301
  submit_btn_inp = gr.Button("Submit")
302
  audio_out_inp = gr.Audio(label="Audio Ressult")
303
 
 
306
  input_spectrogram_inp = gr.Image(label="Input Spectrogram")
307
  output_spectrogram_inp = gr.Image(label="Output Spectrogram")
308
 
309
+ audio_in_inp.upload(
310
+ fn = load_spec_for_manual_masking,
311
+ inputs = [audio_in_inp],
312
+ outputs = [spec_for_mask]
313
+ )
314
 
315
  submit_btn_inp.click(
316
  fn = infer_inp,
317
+ inputs = [prompt_inp, audio_in_inp, spec_for_mask],
318
  outputs = [audio_out_inp]
319
  )
320