Spaces:
kgout
/
Running on Zero

kgout commited on
Commit
b515b62
·
verified ·
1 Parent(s): cb6ae20

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +17 -14
main.py CHANGED
@@ -8,7 +8,6 @@ import random
8
  import numpy as np
9
  from scipy.signal.windows import hann
10
  import soundfile as sf
11
- import torch
12
  import librosa
13
  from audiosr import build_model, super_resolution
14
  from scipy import signal
@@ -16,25 +15,27 @@ import pyloudnorm as pyln
16
  import tempfile
17
  import spaces
18
 
 
19
  class AudioUpscaler:
20
  """
21
  Upscales audio using the AudioSR model.
22
  """
23
 
24
- def __init__(self, model_name="basic", device="auto"):
25
  """
26
  Initializes the AudioUpscaler.
27
 
28
  Args:
29
  model_name (str, optional): Name of the AudioSR model to use. Defaults to "basic".
30
- device (str, optional): Device to use for inference. Defaults to "auto".
31
  """
32
 
33
  self.model_name = model_name
34
  self.device = device
35
- self.sr = 48000
36
  self.audiosr = None # Model will be loaded in setup()
37
-
 
38
  def setup(self):
39
  """
40
  Loads the AudioSR model.
@@ -107,12 +108,12 @@ class AudioUpscaler:
107
  self,
108
  input_file,
109
  chunk_size=5.12,
110
- overlap=0.1,
111
  seed=None,
112
  guidance_scale=3.5,
113
  ddim_steps=50,
114
  multiband_ensemble=True,
115
- input_cutoff=14000,
116
  ):
117
  """
118
  Processes the audio in chunks and performs upsampling.
@@ -130,7 +131,7 @@ class AudioUpscaler:
130
  Returns:
131
  np.ndarray: Upsampled audio data.
132
  """
133
-
134
  audio, sr = librosa.load(input_file, sr=input_cutoff * 2, mono=False)
135
  audio = audio.T
136
  sr = input_cutoff * 2
@@ -141,12 +142,13 @@ class AudioUpscaler:
141
  else:
142
  audio_ch1 = audio
143
 
144
- chunk_samples = int(chunk_size * sr)
145
  overlap_samples = int(overlap * chunk_samples)
146
 
147
- output_chunk_samples = int(chunk_size * self.sr)
148
- output_overlap_samples = int(overlap * output_chunk_samples)
149
- enable_overlap = True if overlap > 0 else False
 
150
 
151
  def process_chunks(audio):
152
  chunks = []
@@ -320,7 +322,7 @@ class AudioUpscaler:
320
  chunk_size=10.24,
321
  seed=None,
322
  multiband_ensemble=True,
323
- input_cutoff=14000,
324
  ):
325
  """
326
  Upscales the audio and saves the result.
@@ -338,6 +340,7 @@ class AudioUpscaler:
338
  """
339
  if seed == 0:
340
  seed = random.randint(0, 2**32 - 1)
 
341
 
342
  os.makedirs(output_folder, exist_ok=True)
343
  waveform = self._process_audio(
@@ -385,7 +388,6 @@ def inference(audio_file, model_name, guidance_scale, ddim_steps, seed):
385
 
386
  return (48000, waveform)
387
 
388
- @spaces.GPU(duration=300)
389
  def upscale_audio(
390
  input_file,
391
  output_folder,
@@ -415,6 +417,7 @@ def upscale_audio(
415
  tuple: Upscaled audio data and sample rate.
416
  """
417
  torch.cuda.empty_cache()
 
418
 
419
  gc.collect()
420
  upscaler = AudioUpscaler()
 
8
  import numpy as np
9
  from scipy.signal.windows import hann
10
  import soundfile as sf
 
11
  import librosa
12
  from audiosr import build_model, super_resolution
13
  from scipy import signal
 
15
  import tempfile
16
  import spaces
17
 
18
+
19
  class AudioUpscaler:
20
  """
21
  Upscales audio using the AudioSR model.
22
  """
23
 
24
+ def __init__(self, model_name="basic", device="cuda"):
25
  """
26
  Initializes the AudioUpscaler.
27
 
28
  Args:
29
  model_name (str, optional): Name of the AudioSR model to use. Defaults to "basic".
30
+ device (str, optional): Device to use for inference. Defaults to "cuda".
31
  """
32
 
33
  self.model_name = model_name
34
  self.device = device
35
+ self.sr = 44100
36
  self.audiosr = None # Model will be loaded in setup()
37
+
38
+ @spaces.GPU(duration=120)
39
  def setup(self):
40
  """
41
  Loads the AudioSR model.
 
108
  self,
109
  input_file,
110
  chunk_size=5.12,
111
+ overlap=0.16,
112
  seed=None,
113
  guidance_scale=3.5,
114
  ddim_steps=50,
115
  multiband_ensemble=True,
116
+ input_cutoff=8000,
117
  ):
118
  """
119
  Processes the audio in chunks and performs upsampling.
 
131
  Returns:
132
  np.ndarray: Upsampled audio data.
133
  """
134
+ chunk_size = random.randint(a=0, b=10)*0.08
135
  audio, sr = librosa.load(input_file, sr=input_cutoff * 2, mono=False)
136
  audio = audio.T
137
  sr = input_cutoff * 2
 
142
  else:
143
  audio_ch1 = audio
144
 
145
+ chunk_samples = int(chunk_size * sr)
146
  overlap_samples = int(overlap * chunk_samples)
147
 
148
+
149
+ output_chunk_samples = int(chunk_size * self.sr)
150
+ output_overlap_samples = int(overlap * output_chunk_samples)
151
+ enable_overlap = True if overlap > 0 else False
152
 
153
  def process_chunks(audio):
154
  chunks = []
 
322
  chunk_size=10.24,
323
  seed=None,
324
  multiband_ensemble=True,
325
+ input_cutoff=8000,
326
  ):
327
  """
328
  Upscales the audio and saves the result.
 
340
  """
341
  if seed == 0:
342
  seed = random.randint(0, 2**32 - 1)
343
+ chunk_size = random.randint(0, 10) * 0.08
344
 
345
  os.makedirs(output_folder, exist_ok=True)
346
  waveform = self._process_audio(
 
388
 
389
  return (48000, waveform)
390
 
 
391
  def upscale_audio(
392
  input_file,
393
  output_folder,
 
417
  tuple: Upscaled audio data and sample rate.
418
  """
419
  torch.cuda.empty_cache()
420
+ chunk_size = random.randint(a=0, b=10)*0.08
421
 
422
  gc.collect()
423
  upscaler = AudioUpscaler()