Update app.py
Browse files
app.py
CHANGED
@@ -17,6 +17,26 @@ from converter import load_wav, mel_spectrogram, normalize_spectrogram, denormal
|
|
17 |
from utils import pad_spec, image_add_color, torch_to_pil, normalize, denormalize, prepare_mask_and_masked_image
|
18 |
|
19 |
# ——
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def convert_wav_to_16khz(input_path, output_path):
|
21 |
with wave.open(input_path, "rb") as wav_in:
|
22 |
params = wav_in.getparams()
|
@@ -102,8 +122,13 @@ def infer(prompt, progress=gr.Progress(track_tqdm=True)):
|
|
102 |
|
103 |
def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
|
104 |
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
107 |
pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
|
108 |
dtype = torch.float16
|
109 |
device = "cuda"
|
|
|
17 |
from utils import pad_spec, image_add_color, torch_to_pil, normalize, denormalize, prepare_mask_and_masked_image
|
18 |
|
19 |
# ——
|
20 |
+
def resample_audio(input_audio, original_sr, target_sr=16000):
|
21 |
+
"""
|
22 |
+
Resample the audio to the target sample rate (16000 Hz by default).
|
23 |
+
|
24 |
+
Args:
|
25 |
+
- input_audio (numpy array): The raw audio data.
|
26 |
+
- original_sr (int): The original sample rate of the input audio.
|
27 |
+
- target_sr (int): The target sample rate (default is 16000 Hz).
|
28 |
+
|
29 |
+
Returns:
|
30 |
+
- numpy array: The resampled audio.
|
31 |
+
"""
|
32 |
+
if original_sr != target_sr:
|
33 |
+
# Resample the audio using librosa
|
34 |
+
audio_resampled = librosa.resample(input_audio, orig_sr=original_sr, target_sr=target_sr)
|
35 |
+
return audio_resampled
|
36 |
+
else:
|
37 |
+
# If sample rate is already 16000, no resampling is needed
|
38 |
+
return input_audio
|
39 |
+
|
40 |
def convert_wav_to_16khz(input_path, output_path):
|
41 |
with wave.open(input_path, "rb") as wav_in:
|
42 |
params = wav_in.getparams()
|
|
|
122 |
|
123 |
def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(track_tqdm=True)):
|
124 |
|
125 |
+
# Load your audio file
|
126 |
+
input_audio, original_sr = librosa.load(audio_path, sr=None) # Load with original sampling rate
|
127 |
+
resampled_audio = resample_audio(input_audio, original_sr, target_sr=16000)
|
128 |
+
# Save the resampled audio to a new file
|
129 |
+
sf.write('resampled_audio.wav', resampled_audio, 16000)
|
130 |
+
audio_path = 'resampled_audio.wav'
|
131 |
+
|
132 |
pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
|
133 |
dtype = torch.float16
|
134 |
device = "cuda"
|