fffiloni commited on
Commit
721c606
·
verified ·
1 Parent(s): 2f7657b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -2
app.py CHANGED
@@ -14,6 +14,16 @@ from diffusers import StableDiffusionImg2ImgPipeline
14
  from converter import load_wav, mel_spectrogram, normalize_spectrogram, denormalize_spectrogram, Generator, get_mel_spectrogram_from_audio
15
  from utils import pad_spec, image_add_color, torch_to_pil, normalize, denormalize
16
 
 
 
 
 
 
 
 
 
 
 
17
  def infer(prompt, progress=gr.Progress(track_tqdm=True)):
18
 
19
  prompt = prompt
@@ -106,8 +116,16 @@ def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(tra
106
 
107
  # Save as WAV
108
  sf.write("output.wav", concat_audio, 16000)
 
 
 
 
 
 
 
 
109
 
110
- return "output.wav"
111
 
112
  css="""
113
  div#col-container{
@@ -161,6 +179,10 @@ with gr.Blocks(css=css) as demo:
161
  prompt_strength = gr.Slider(label="Prompt Strength", minimum=0.0, maximum=1.0, step=0.1, value=0.7)
162
  submit_btn_img2img = gr.Button("Submit")
163
  audio_out_img2img = gr.Audio(label="Audio Ressult")
 
 
 
 
164
 
165
  gr.Examples(
166
  examples = [
@@ -174,7 +196,7 @@ with gr.Blocks(css=css) as demo:
174
  submit_btn_img2img.click(
175
  fn = infer_img2img,
176
  inputs = [prompt_img2img, audio_in_img2img, prompt_strength],
177
- outputs = [audio_out_img2img]
178
  )
179
 
180
  demo.queue().launch(show_api=False, show_error=True)
 
14
  from converter import load_wav, mel_spectrogram, normalize_spectrogram, denormalize_spectrogram, Generator, get_mel_spectrogram_from_audio
15
  from utils import pad_spec, image_add_color, torch_to_pil, normalize, denormalize
16
 
17
+ # ——
18
+
19
+ def save_spectrogram_image(spectrogram, filename):
20
+ """Save a spectrogram as an image."""
21
+ plt.figure(figsize=(10, 4))
22
+ plt.imshow(spectrogram.squeeze(), aspect='auto', origin='lower', cmap='magma')
23
+ plt.axis('off') # Hide axes for a cleaner image
24
+ plt.savefig(filename, bbox_inches='tight', pad_inches=0)
25
+ plt.close()
26
+
27
  def infer(prompt, progress=gr.Progress(track_tqdm=True)):
28
 
29
  prompt = prompt
 
116
 
117
  # Save as WAV
118
  sf.write("output.wav", concat_audio, 16000)
119
+
120
+ # Save input spectrogram image
121
+ input_spec_image_path = "input_spectrogram.png"
122
+ raw_image.save(input_spec_image_path)
123
+
124
+ # Save concatenated spectrogram image
125
+ output_spec_image_path = "output_spectrogram.png"
126
+ concat_image.save(output_spec_image_path)
127
 
128
+ return "output.wav", input_spec_image_path, output_spec_image_path
129
 
130
  css="""
131
  div#col-container{
 
179
  prompt_strength = gr.Slider(label="Prompt Strength", minimum=0.0, maximum=1.0, step=0.1, value=0.7)
180
  submit_btn_img2img = gr.Button("Submit")
181
  audio_out_img2img = gr.Audio(label="Audio Ressult")
182
+
183
+ with gr.Row():
184
+ input_spectrogram = gr.Image(label="Input Spectrogram")
185
+ output_spectrogram = gr.Image(label="Output Spectrogram")
186
 
187
  gr.Examples(
188
  examples = [
 
196
  submit_btn_img2img.click(
197
  fn = infer_img2img,
198
  inputs = [prompt_img2img, audio_in_img2img, prompt_strength],
199
+ outputs = [audio_out_img2img, input_spectrogram, output_spectrogram]
200
  )
201
 
202
  demo.queue().launch(show_api=False, show_error=True)