how to use this
there is no description on how to use this model to inference on audio samples?
I figure it out on app.py .
def fn_clearvoice_ss(input_wav):
myClearVoice = ClearVoice(task='speech_separation', model_names=['MossFormer2_SS_16K'])
output_wav_dict = myClearVoice(input_path=input_wav, online_write=False)
if isinstance(output_wav_dict, dict):
key = next(iter(output_wav_dict))
output_wav_list = output_wav_dict[key]
output_wav_s1 = output_wav_list[0]
output_wav_s2 = output_wav_list[1]
else:
output_wav_list = output_wav_dict
output_wav_s1 = output_wav_list[0]
output_wav_s2 = output_wav_list[1]
sf.write('separated_s1.wav', output_wav_s1[0,:], 16000)
sf.write('separated_s2.wav', output_wav_s2[0,:], 16000)
return "separated_s1.wav", "separated_s2.wav"