Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ import pypdf
|
|
10 |
import random
|
11 |
import re
|
12 |
import spaces
|
|
|
13 |
import torch
|
14 |
import yaml
|
15 |
|
@@ -43,6 +44,13 @@ def get_random_text(voice):
|
|
43 |
lang = 'en'
|
44 |
return random.choice(random_texts[lang])
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
def parens_to_angles(s):
|
47 |
return s.replace('(', '«').replace(')', '»')
|
48 |
|
@@ -232,8 +240,9 @@ def clamp_speed(speed):
|
|
232 |
return 2
|
233 |
return speed
|
234 |
|
|
|
235 |
# Must be backwards compatible with https://huggingface.co/spaces/Pendrokar/TTS-Spaces-Arena
|
236 |
-
def generate(text, voice='af', ps=None, speed=1, trim=3000, use_gpu='auto'
|
237 |
voices = resolve_voices(voice, warn=ps)
|
238 |
ps = ps or phonemize(text, voice)
|
239 |
speed = clamp_speed(speed)
|
@@ -246,6 +255,7 @@ def generate(text, voice='af', ps=None, speed=1, trim=3000, use_gpu='auto', sk=N
|
|
246 |
tokens = tokens[:510]
|
247 |
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
248 |
use_gpu = len(ps) > 99 if use_gpu == 'auto' else use_gpu
|
|
|
249 |
print('🔥', datetime.now(), text, voices, ps, use_gpu, sk)
|
250 |
try:
|
251 |
if use_gpu:
|
@@ -321,10 +331,8 @@ with gr.Blocks() as basic_tts:
|
|
321 |
btn = gr.Button(list(CHOICES.values())[i*4+j], variant='primary' if i*4+j < 10 else 'secondary')
|
322 |
btn.click(lambda v, b: f'{v}+{b}' if v.startswith(b[:2]) else b, inputs=[voice, btn], outputs=[voice])
|
323 |
voice.change(lambda v, b: gr.Button(b, variant='primary' if v.startswith(b[:2]) else 'secondary'), inputs=[voice, btn], outputs=[btn])
|
324 |
-
sk = gr.State()
|
325 |
text.submit(generate, inputs=[text, voice, in_ps, speed, trim, use_gpu, sk], outputs=[audio, out_ps])
|
326 |
generate_btn.click(generate, inputs=[text, voice, in_ps, speed, trim, use_gpu, sk], outputs=[audio, out_ps])
|
327 |
-
basic_tts.load(lambda r: r.session_hash, None, sk)
|
328 |
|
329 |
@torch.no_grad()
|
330 |
def lf_forward(token_lists, voices, speed, device='cpu'):
|
@@ -500,7 +508,7 @@ with gr.Blocks() as lf_tts:
|
|
500 |
|
501 |
with gr.Blocks() as about:
|
502 |
gr.Markdown('''
|
503 |
-
Kokoro is a frontier TTS model for its size. It has [80 million](https://hf.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#
|
504 |
|
505 |
### FAQ
|
506 |
**Will this be open sourced?**<br/>
|
@@ -582,6 +590,7 @@ with gr.Blocks() as app:
|
|
582 |
[basic_tts, lf_tts, about, changelog],
|
583 |
['🔥 Basic TTS', '📖 Long Form', 'ℹ️ About', '📝 Changelog'],
|
584 |
)
|
|
|
585 |
|
586 |
if __name__ == '__main__':
|
587 |
app.queue(api_open=True).launch()
|
|
|
10 |
import random
|
11 |
import re
|
12 |
import spaces
|
13 |
+
import subprocess
|
14 |
import torch
|
15 |
import yaml
|
16 |
|
|
|
44 |
lang = 'en'
|
45 |
return random.choice(random_texts[lang])
|
46 |
|
47 |
+
sents = set()
|
48 |
+
for txt in {'harvard_sentences', 'llama3_command-r_sentences_1st_person', 'llama3_command-r_sentences_excla', 'llama3_command-r_questions'}:
|
49 |
+
txt += '.txt'
|
50 |
+
subprocess.run(['wget', f'https://huggingface.co/spaces/Pendrokar/TTS-Spaces-Arena/resolve/main/{txt}'])
|
51 |
+
with open('harvard_sentences.txt') as f:
|
52 |
+
sents.update(f.read().strip().splitlines())
|
53 |
+
|
54 |
def parens_to_angles(s):
|
55 |
return s.replace('(', '«').replace(')', '»')
|
56 |
|
|
|
240 |
return 2
|
241 |
return speed
|
242 |
|
243 |
+
sk = gr.State()
|
244 |
# Must be backwards compatible with https://huggingface.co/spaces/Pendrokar/TTS-Spaces-Arena
|
245 |
+
def generate(text, voice='af', ps=None, speed=1, trim=3000, use_gpu='auto'):
|
246 |
voices = resolve_voices(voice, warn=ps)
|
247 |
ps = ps or phonemize(text, voice)
|
248 |
speed = clamp_speed(speed)
|
|
|
255 |
tokens = tokens[:510]
|
256 |
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
257 |
use_gpu = len(ps) > 99 if use_gpu == 'auto' else use_gpu
|
258 |
+
global sk
|
259 |
print('🔥', datetime.now(), text, voices, ps, use_gpu, sk)
|
260 |
try:
|
261 |
if use_gpu:
|
|
|
331 |
btn = gr.Button(list(CHOICES.values())[i*4+j], variant='primary' if i*4+j < 10 else 'secondary')
|
332 |
btn.click(lambda v, b: f'{v}+{b}' if v.startswith(b[:2]) else b, inputs=[voice, btn], outputs=[voice])
|
333 |
voice.change(lambda v, b: gr.Button(b, variant='primary' if v.startswith(b[:2]) else 'secondary'), inputs=[voice, btn], outputs=[btn])
|
|
|
334 |
text.submit(generate, inputs=[text, voice, in_ps, speed, trim, use_gpu, sk], outputs=[audio, out_ps])
|
335 |
generate_btn.click(generate, inputs=[text, voice, in_ps, speed, trim, use_gpu, sk], outputs=[audio, out_ps])
|
|
|
336 |
|
337 |
@torch.no_grad()
|
338 |
def lf_forward(token_lists, voices, speed, device='cpu'):
|
|
|
508 |
|
509 |
with gr.Blocks() as about:
|
510 |
gr.Markdown('''
|
511 |
+
Kokoro is a frontier TTS model for its size. It has [80 million](https://hf.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L33) parameters, uses a lean [StyleTTS 2](https://github.com/yl4579/StyleTTS2) architecture, and was trained on high-quality data. The weights are currently private, but a free public demo is hosted here, at `https://hf.co/spaces/hexgrad/Kokoro-TTS`. The Community tab is open for feature requests, bug reports, etc. For other inquiries, contact `@rzvzn` on Discord.
|
512 |
|
513 |
### FAQ
|
514 |
**Will this be open sourced?**<br/>
|
|
|
590 |
[basic_tts, lf_tts, about, changelog],
|
591 |
['🔥 Basic TTS', '📖 Long Form', 'ℹ️ About', '📝 Changelog'],
|
592 |
)
|
593 |
+
app.load(lambda r: r.session_hash, None, sk)
|
594 |
|
595 |
if __name__ == '__main__':
|
596 |
app.queue(api_open=True).launch()
|