xushengyuan commited on
Commit
1005670
·
verified ·
1 Parent(s): 7ee61ea

change audio format from flac to 320kbps mp3 (#3)

Browse files

- change audio format from flac to 320kbps mp3 (2ed96609921a553f9d175ad30437e9da5923dd15)

Files changed (1) hide show
  1. pipeline_ace_step.py +5 -4
pipeline_ace_step.py CHANGED
@@ -24,6 +24,7 @@ from models.ace_step_transformer import ACEStepTransformer2DModel
24
  from models.lyrics_utils.lyric_tokenizer import VoiceBpeTokenizer
25
  from apg_guidance import apg_forward, MomentumBuffer, cfg_forward, cfg_zero_star, cfg_double_condition_forward
26
  import torchaudio
 
27
 
28
 
29
  torch.backends.cudnn.benchmark = False
@@ -917,7 +918,7 @@ class ACEStepPipeline:
917
  target_latents = torch.cate([to_right_pad_gt_latents, target_latents], dim=0)
918
  return target_latents
919
 
920
- def latents2audio(self, latents, target_wav_duration_second=30, sample_rate=48000, save_path=None, format="flac"):
921
  output_audio_paths = []
922
  bs = latents.shape[0]
923
  audio_lengths = [target_wav_duration_second * sample_rate] * bs
@@ -930,7 +931,7 @@ class ACEStepPipeline:
930
  output_audio_paths.append(output_audio_path)
931
  return output_audio_paths
932
 
933
- def save_wav_file(self, target_wav, idx, save_path=None, sample_rate=48000, format="flac"):
934
  if save_path is None:
935
  logger.warning("save_path is None, using default path ./outputs/")
936
  base_path = f"./outputs"
@@ -941,7 +942,7 @@ class ACEStepPipeline:
941
 
942
  output_path_flac = f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
943
  target_wav = target_wav.float()
944
- torchaudio.save(output_path_flac, target_wav, sample_rate=sample_rate, format=format)
945
  return output_path_flac
946
 
947
  def infer_latents(self, input_audio_path):
@@ -986,7 +987,7 @@ class ACEStepPipeline:
986
  edit_n_max: float = 1.0,
987
  edit_n_avg: int = 1,
988
  save_path: str = None,
989
- format: str = "flac",
990
  batch_size: int = 1,
991
  debug: bool = False,
992
  ):
 
24
  from models.lyrics_utils.lyric_tokenizer import VoiceBpeTokenizer
25
  from apg_guidance import apg_forward, MomentumBuffer, cfg_forward, cfg_zero_star, cfg_double_condition_forward
26
  import torchaudio
27
+ import torio
28
 
29
 
30
  torch.backends.cudnn.benchmark = False
 
918
  target_latents = torch.cate([to_right_pad_gt_latents, target_latents], dim=0)
919
  return target_latents
920
 
921
+ def latents2audio(self, latents, target_wav_duration_second=30, sample_rate=48000, save_path=None, format="mp3"):
922
  output_audio_paths = []
923
  bs = latents.shape[0]
924
  audio_lengths = [target_wav_duration_second * sample_rate] * bs
 
931
  output_audio_paths.append(output_audio_path)
932
  return output_audio_paths
933
 
934
+ def save_wav_file(self, target_wav, idx, save_path=None, sample_rate=48000, format="mp3"):
935
  if save_path is None:
936
  logger.warning("save_path is None, using default path ./outputs/")
937
  base_path = f"./outputs"
 
942
 
943
  output_path_flac = f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
944
  target_wav = target_wav.float()
945
+ torchaudio.save(output_path_flac, target_wav, sample_rate=sample_rate, format=format, compression=torio.io.CodecConfig(bit_rate=320000))
946
  return output_path_flac
947
 
948
  def infer_latents(self, input_audio_path):
 
987
  edit_n_max: float = 1.0,
988
  edit_n_avg: int = 1,
989
  save_path: str = None,
990
+ format: str = "mp3",
991
  batch_size: int = 1,
992
  debug: bool = False,
993
  ):