Spaces:
Running
Running
change audio format from flac to 320kbps mp3 (#3)
Browse files- change audio format from flac to 320kbps mp3 (2ed96609921a553f9d175ad30437e9da5923dd15)
- pipeline_ace_step.py +5 -4
pipeline_ace_step.py
CHANGED
@@ -24,6 +24,7 @@ from models.ace_step_transformer import ACEStepTransformer2DModel
|
|
24 |
from models.lyrics_utils.lyric_tokenizer import VoiceBpeTokenizer
|
25 |
from apg_guidance import apg_forward, MomentumBuffer, cfg_forward, cfg_zero_star, cfg_double_condition_forward
|
26 |
import torchaudio
|
|
|
27 |
|
28 |
|
29 |
torch.backends.cudnn.benchmark = False
|
@@ -917,7 +918,7 @@ class ACEStepPipeline:
|
|
917 |
target_latents = torch.cate([to_right_pad_gt_latents, target_latents], dim=0)
|
918 |
return target_latents
|
919 |
|
920 |
-
def latents2audio(self, latents, target_wav_duration_second=30, sample_rate=48000, save_path=None, format="
|
921 |
output_audio_paths = []
|
922 |
bs = latents.shape[0]
|
923 |
audio_lengths = [target_wav_duration_second * sample_rate] * bs
|
@@ -930,7 +931,7 @@ class ACEStepPipeline:
|
|
930 |
output_audio_paths.append(output_audio_path)
|
931 |
return output_audio_paths
|
932 |
|
933 |
-
def save_wav_file(self, target_wav, idx, save_path=None, sample_rate=48000, format="
|
934 |
if save_path is None:
|
935 |
logger.warning("save_path is None, using default path ./outputs/")
|
936 |
base_path = f"./outputs"
|
@@ -941,7 +942,7 @@ class ACEStepPipeline:
|
|
941 |
|
942 |
output_path_flac = f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
|
943 |
target_wav = target_wav.float()
|
944 |
-
torchaudio.save(output_path_flac, target_wav, sample_rate=sample_rate, format=format)
|
945 |
return output_path_flac
|
946 |
|
947 |
def infer_latents(self, input_audio_path):
|
@@ -986,7 +987,7 @@ class ACEStepPipeline:
|
|
986 |
edit_n_max: float = 1.0,
|
987 |
edit_n_avg: int = 1,
|
988 |
save_path: str = None,
|
989 |
-
format: str = "
|
990 |
batch_size: int = 1,
|
991 |
debug: bool = False,
|
992 |
):
|
|
|
24 |
from models.lyrics_utils.lyric_tokenizer import VoiceBpeTokenizer
|
25 |
from apg_guidance import apg_forward, MomentumBuffer, cfg_forward, cfg_zero_star, cfg_double_condition_forward
|
26 |
import torchaudio
|
27 |
+
import torio
|
28 |
|
29 |
|
30 |
torch.backends.cudnn.benchmark = False
|
|
|
918 |
target_latents = torch.cate([to_right_pad_gt_latents, target_latents], dim=0)
|
919 |
return target_latents
|
920 |
|
921 |
+
def latents2audio(self, latents, target_wav_duration_second=30, sample_rate=48000, save_path=None, format="mp3"):
|
922 |
output_audio_paths = []
|
923 |
bs = latents.shape[0]
|
924 |
audio_lengths = [target_wav_duration_second * sample_rate] * bs
|
|
|
931 |
output_audio_paths.append(output_audio_path)
|
932 |
return output_audio_paths
|
933 |
|
934 |
+
def save_wav_file(self, target_wav, idx, save_path=None, sample_rate=48000, format="mp3"):
|
935 |
if save_path is None:
|
936 |
logger.warning("save_path is None, using default path ./outputs/")
|
937 |
base_path = f"./outputs"
|
|
|
942 |
|
943 |
output_path_flac = f"{base_path}/output_{time.strftime('%Y%m%d%H%M%S')}_{idx}.{format}"
|
944 |
target_wav = target_wav.float()
|
945 |
+
torchaudio.save(output_path_flac, target_wav, sample_rate=sample_rate, format=format, compression=torio.io.CodecConfig(bit_rate=320000))
|
946 |
return output_path_flac
|
947 |
|
948 |
def infer_latents(self, input_audio_path):
|
|
|
987 |
edit_n_max: float = 1.0,
|
988 |
edit_n_avg: int = 1,
|
989 |
save_path: str = None,
|
990 |
+
format: str = "mp3",
|
991 |
batch_size: int = 1,
|
992 |
debug: bool = False,
|
993 |
):
|