Spaces:
Running
Running
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
app.py
CHANGED
|
@@ -112,15 +112,15 @@ def chunk_text(text, max_chars=135):
|
|
| 112 |
chunks = []
|
| 113 |
current_chunk = ""
|
| 114 |
# Split the text into sentences based on punctuation followed by whitespace
|
| 115 |
-
sentences = re.split(r'(?<=[;:,.!?])\s
|
| 116 |
|
| 117 |
for sentence in sentences:
|
| 118 |
-
if len(current_chunk) + len(sentence) <= max_chars:
|
| 119 |
-
current_chunk += sentence + " "
|
| 120 |
else:
|
| 121 |
if current_chunk:
|
| 122 |
chunks.append(current_chunk.strip())
|
| 123 |
-
current_chunk = sentence + " "
|
| 124 |
|
| 125 |
if current_chunk:
|
| 126 |
chunks.append(current_chunk.strip())
|
|
@@ -258,7 +258,7 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
|
|
| 258 |
aseg = AudioSegment.from_file(ref_audio_orig)
|
| 259 |
|
| 260 |
non_silent_segs = silence.split_on_silence(
|
| 261 |
-
aseg, min_silence_len=1000, silence_thresh=-50, keep_silence=
|
| 262 |
)
|
| 263 |
non_silent_wave = AudioSegment.silent(duration=0)
|
| 264 |
for non_silent_seg in non_silent_segs:
|
|
@@ -295,7 +295,8 @@ def infer(ref_audio_orig, ref_text, gen_text, exp_name, remove_silence, cross_fa
|
|
| 295 |
audio, sr = torchaudio.load(ref_audio)
|
| 296 |
|
| 297 |
# Use the new chunk_text function to split gen_text
|
| 298 |
-
|
|
|
|
| 299 |
print('ref_text', ref_text)
|
| 300 |
for i, batch_text in enumerate(gen_text_batches):
|
| 301 |
print(f'gen_text {i}', batch_text)
|
|
|
|
| 112 |
chunks = []
|
| 113 |
current_chunk = ""
|
| 114 |
# Split the text into sentences based on punctuation followed by whitespace
|
| 115 |
+
sentences = re.split(r'(?<=[;:,.!?])\s+|(?<=[;:,。!?])', text)
|
| 116 |
|
| 117 |
for sentence in sentences:
|
| 118 |
+
if len(current_chunk.encode('utf-8')) + len(sentence.encode('utf-8')) <= max_chars:
|
| 119 |
+
current_chunk += sentence + " " if sentence and len(sentence[-1].encode('utf-8')) == 1 else sentence
|
| 120 |
else:
|
| 121 |
if current_chunk:
|
| 122 |
chunks.append(current_chunk.strip())
|
| 123 |
+
current_chunk = sentence + " " if sentence and len(sentence[-1].encode('utf-8')) == 1 else sentence
|
| 124 |
|
| 125 |
if current_chunk:
|
| 126 |
chunks.append(current_chunk.strip())
|
|
|
|
| 258 |
aseg = AudioSegment.from_file(ref_audio_orig)
|
| 259 |
|
| 260 |
non_silent_segs = silence.split_on_silence(
|
| 261 |
+
aseg, min_silence_len=1000, silence_thresh=-50, keep_silence=1000
|
| 262 |
)
|
| 263 |
non_silent_wave = AudioSegment.silent(duration=0)
|
| 264 |
for non_silent_seg in non_silent_segs:
|
|
|
|
| 295 |
audio, sr = torchaudio.load(ref_audio)
|
| 296 |
|
| 297 |
# Use the new chunk_text function to split gen_text
|
| 298 |
+
max_chars = int(len(ref_text.encode('utf-8')) / (audio.shape[-1] / sr) * (25 - audio.shape[-1] / sr))
|
| 299 |
+
gen_text_batches = chunk_text(gen_text, max_chars=max_chars)
|
| 300 |
print('ref_text', ref_text)
|
| 301 |
for i, batch_text in enumerate(gen_text_batches):
|
| 302 |
print(f'gen_text {i}', batch_text)
|