Update app.py
Browse files
app.py
CHANGED
@@ -213,7 +213,48 @@ def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.
|
|
213 |
|
214 |
return "output.wav", input_spec_image_path, color_output_spec_image
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
css="""
|
218 |
div#col-container{
|
219 |
margin: 0 auto;
|
@@ -283,9 +324,15 @@ with gr.Blocks(css=css) as demo:
|
|
283 |
with gr.Tab("Audio InPainting"):
|
284 |
prompt_inp = gr.Textbox(label="Prompt")
|
285 |
audio_in_inp = gr.Audio(label="Audio Reference", type="filepath")
|
|
|
|
|
286 |
mask_start_point = gr.Slider(label="Mask Start point", minimum=0, maximum=1024, step=1, value=256)
|
287 |
mask_end_point = gr.Slider(label="Mask End point", minimum=0, maximum=1024, step=1, value=768)
|
|
|
|
|
|
|
288 |
submit_btn_inp = gr.Button("Submit")
|
|
|
289 |
audio_out_inp = gr.Audio(label="Audio Ressult")
|
290 |
|
291 |
with gr.Accordion("Compare Spectrograms", open=False):
|
@@ -293,10 +340,22 @@ with gr.Blocks(css=css) as demo:
|
|
293 |
input_spectrogram_inp = gr.Image(label="Input Spectrogram")
|
294 |
output_spectrogram_inp = gr.Image(label="Output Spectrogram")
|
295 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
submit_btn_inp.click(
|
297 |
fn = infer_inp,
|
298 |
inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
|
299 |
-
outputs = [audio_out_inp,
|
300 |
)
|
301 |
|
302 |
demo.queue().launch(show_api=False, show_error=True)
|
|
|
213 |
|
214 |
return "output.wav", input_spec_image_path, color_output_spec_image
|
215 |
|
216 |
+
def load_input_spectrogram(audio_path):
|
217 |
+
# Loading
|
218 |
+
audio, sampling_rate = load_wav(audio_path)
|
219 |
+
audio, spec = get_mel_spectrogram_from_audio(audio)
|
220 |
+
norm_spec = normalize_spectrogram(spec)
|
221 |
+
norm_spec = pad_spec(norm_spec, 1024)
|
222 |
+
norm_spec = normalize(norm_spec) # normalize to [-1, 1], because pipeline do not normalize for torch.Tensor input
|
223 |
+
|
224 |
+
raw_image = image_add_color(torch_to_pil(norm_spec))
|
225 |
+
|
226 |
+
# Save input spectrogram image
|
227 |
+
input_spec_image_path = "input_spectrogram.png"
|
228 |
+
raw_image.save(input_spec_image_path)
|
229 |
+
|
230 |
+
def preview_masked_area(audio_path, mask_start_point, mask_end_point):
|
231 |
+
# Loading
|
232 |
+
audio, sampling_rate = load_wav(audio_path)
|
233 |
+
audio, spec = get_mel_spectrogram_from_audio(audio)
|
234 |
+
norm_spec = normalize_spectrogram(spec)
|
235 |
+
norm_spec = pad_spec(norm_spec, 1024)
|
236 |
+
norm_spec = normalize(norm_spec) # normalize to [-1, 1], because pipeline do not normalize for torch.Tensor input
|
237 |
+
|
238 |
+
# Add Mask
|
239 |
+
mask = torch.zeros_like(norm_spec)[:1,...]
|
240 |
+
mask[:, :, width_start:width_start+width] = 1
|
241 |
+
mask_image = torch_to_pil(mask)
|
242 |
+
|
243 |
+
mask, masked_spec = prepare_mask_and_masked_image(norm_spec, mask)
|
244 |
+
masked_spec_image = torch_to_pil(masked_spec)
|
245 |
|
246 |
+
# color masked spec and paint masked area to black
|
247 |
+
color_masked_spec_image = image_add_color(masked_spec_image)
|
248 |
+
color_masked_spec_image = np.array(color_masked_spec_image)
|
249 |
+
color_masked_spec_image[:, width_start:width_start+width, :] = 0
|
250 |
+
color_masked_spec_image = Image.fromarray(color_masked_spec_image)
|
251 |
+
|
252 |
+
# Save the masked spectrogram image
|
253 |
+
masked_spec_image_path = "masked_spectrogram.png"
|
254 |
+
color_masked_spec_image.save(masked_spec_image_path)
|
255 |
+
|
256 |
+
return masked_spec_image_path
|
257 |
+
|
258 |
css="""
|
259 |
div#col-container{
|
260 |
margin: 0 auto;
|
|
|
324 |
with gr.Tab("Audio InPainting"):
|
325 |
prompt_inp = gr.Textbox(label="Prompt")
|
326 |
audio_in_inp = gr.Audio(label="Audio Reference", type="filepath")
|
327 |
+
|
328 |
+
audio_in_spec = gr.Image(label="Audio IN spectrogram")
|
329 |
mask_start_point = gr.Slider(label="Mask Start point", minimum=0, maximum=1024, step=1, value=256)
|
330 |
mask_end_point = gr.Slider(label="Mask End point", minimum=0, maximum=1024, step=1, value=768)
|
331 |
+
preview_mask_btn = gr.Button("Preview Mask")
|
332 |
+
|
333 |
+
masked_spec_preview = gr.Image(label="Spectrogram Mask Preview")
|
334 |
submit_btn_inp = gr.Button("Submit")
|
335 |
+
|
336 |
audio_out_inp = gr.Audio(label="Audio Ressult")
|
337 |
|
338 |
with gr.Accordion("Compare Spectrograms", open=False):
|
|
|
340 |
input_spectrogram_inp = gr.Image(label="Input Spectrogram")
|
341 |
output_spectrogram_inp = gr.Image(label="Output Spectrogram")
|
342 |
|
343 |
+
audio_in_inp.upload(
|
344 |
+
fn = load_input_spectrogram,
|
345 |
+
inputs = [audio_in_inp],
|
346 |
+
outputs = [audio_in_spec]
|
347 |
+
)
|
348 |
+
|
349 |
+
preview_mask_btn.click(
|
350 |
+
fn = preview_masked_area,
|
351 |
+
inputs = [audio_in_inp, mask_start_point, mask_end_point],
|
352 |
+
outputs = [masked_spec_preview]
|
353 |
+
)
|
354 |
+
|
355 |
submit_btn_inp.click(
|
356 |
fn = infer_inp,
|
357 |
inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
|
358 |
+
outputs = [audio_out_inp, input_spectrogram_inp, output_spectrogram_inp]
|
359 |
)
|
360 |
|
361 |
demo.queue().launch(show_api=False, show_error=True)
|