fffiloni commited on
Commit
cf62874
·
verified ·
1 Parent(s): 9b94d3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -2
app.py CHANGED
@@ -17,6 +17,26 @@ from converter import load_wav, mel_spectrogram, normalize_spectrogram, denormal
17
  from utils import pad_spec, image_add_color, torch_to_pil, normalize, denormalize, prepare_mask_and_masked_image
18
 
19
  # ——
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def convert_wav_to_16khz(input_path, output_path):
21
  with wave.open(input_path, "rb") as wav_in:
22
  params = wav_in.getparams()
@@ -102,8 +122,13 @@ def infer(prompt, progress=gr.Progress(track_tqdm=True)):
102
 
103
  def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
104
 
105
- audio_path = convert_wav_to_16khz(audio_path, "output_16khz.wav")
106
-
 
 
 
 
 
107
  pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
108
  dtype = torch.float16
109
  device = "cuda"
 
17
  from utils import pad_spec, image_add_color, torch_to_pil, normalize, denormalize, prepare_mask_and_masked_image
18
 
19
  # ——
20
+ def resample_audio(input_audio, original_sr, target_sr=16000):
21
+ """
22
+ Resample the audio to the target sample rate (16000 Hz by default).
23
+
24
+ Args:
25
+ - input_audio (numpy array): The raw audio data.
26
+ - original_sr (int): The original sample rate of the input audio.
27
+ - target_sr (int): The target sample rate (default is 16000 Hz).
28
+
29
+ Returns:
30
+ - numpy array: The resampled audio.
31
+ """
32
+ if original_sr != target_sr:
33
+ # Resample the audio using librosa
34
+ audio_resampled = librosa.resample(input_audio, orig_sr=original_sr, target_sr=target_sr)
35
+ return audio_resampled
36
+ else:
37
+ # If sample rate is already 16000, no resampling is needed
38
+ return input_audio
39
+
40
  def convert_wav_to_16khz(input_path, output_path):
41
  with wave.open(input_path, "rb") as wav_in:
42
  params = wav_in.getparams()
 
122
 
123
  def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
124
 
125
+ # Load your audio file
126
+ input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
127
+ resampled_audio = resample_audio(input_audio, original_sr, target_sr=16000)
128
+ # Save the resampled audio to a new file
129
+ sf.write('resampled_audio.wav', resampled_audio, 16000)
130
+ audio_path = 'resampled_audio.wav'
131
+
132
  pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
133
  dtype = torch.float16
134
  device = "cuda"