Spaces:
Running
on
Zero
Running
on
Zero
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
- app.py +2 -3
- inference-cli.py +2 -3
- model/utils.py +2 -3
app.py
CHANGED
|
@@ -158,9 +158,8 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, exp_name, remove_silence,
|
|
| 158 |
|
| 159 |
# Calculate duration
|
| 160 |
ref_audio_len = audio.shape[-1] // hop_length
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
|
| 164 |
duration = ref_audio_len + int(ref_audio_len / ref_text_len * gen_text_len / speed)
|
| 165 |
|
| 166 |
# inference
|
|
|
|
| 158 |
|
| 159 |
# Calculate duration
|
| 160 |
ref_audio_len = audio.shape[-1] // hop_length
|
| 161 |
+
ref_text_len = len(ref_text.encode('utf-8'))
|
| 162 |
+
gen_text_len = len(gen_text.encode('utf-8'))
|
|
|
|
| 163 |
duration = ref_audio_len + int(ref_audio_len / ref_text_len * gen_text_len / speed)
|
| 164 |
|
| 165 |
# inference
|
inference-cli.py
CHANGED
|
@@ -250,9 +250,8 @@ def infer_batch(ref_audio, ref_text, gen_text_batches, model,ckpt_file,file_voca
|
|
| 250 |
|
| 251 |
# Calculate duration
|
| 252 |
ref_audio_len = audio.shape[-1] // hop_length
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
gen_text_len = len(gen_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gen_text))
|
| 256 |
duration = ref_audio_len + int(ref_audio_len / ref_text_len * gen_text_len / speed)
|
| 257 |
|
| 258 |
# inference
|
|
|
|
| 250 |
|
| 251 |
# Calculate duration
|
| 252 |
ref_audio_len = audio.shape[-1] // hop_length
|
| 253 |
+
ref_text_len = len(ref_text.encode('utf-8'))
|
| 254 |
+
gen_text_len = len(gen_text.encode('utf-8'))
|
|
|
|
| 255 |
duration = ref_audio_len + int(ref_audio_len / ref_text_len * gen_text_len / speed)
|
| 256 |
|
| 257 |
# inference
|
model/utils.py
CHANGED
|
@@ -296,9 +296,8 @@ def get_inference_prompt(
|
|
| 296 |
# # test vocoder resynthesis
|
| 297 |
# ref_audio = gt_audio
|
| 298 |
else:
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
gen_text_len = len(gt_text.encode('utf-8')) + 3 * len(re.findall(zh_pause_punc, gt_text))
|
| 302 |
total_mel_len = ref_mel_len + int(ref_mel_len / ref_text_len * gen_text_len / speed)
|
| 303 |
|
| 304 |
# to mel spectrogram
|
|
|
|
| 296 |
# # test vocoder resynthesis
|
| 297 |
# ref_audio = gt_audio
|
| 298 |
else:
|
| 299 |
+
ref_text_len = len(prompt_text.encode('utf-8'))
|
| 300 |
+
gen_text_len = len(gt_text.encode('utf-8'))
|
|
|
|
| 301 |
total_mel_len = ref_mel_len + int(ref_mel_len / ref_text_len * gen_text_len / speed)
|
| 302 |
|
| 303 |
# to mel spectrogram
|