hexgrad commited on
Commit
e1f0af6
·
verified ·
1 Parent(s): 814da15

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -10,6 +10,7 @@ import pypdf
10
  import random
11
  import re
12
  import spaces
 
13
  import torch
14
  import yaml
15
 
@@ -43,6 +44,13 @@ def get_random_text(voice):
43
  lang = 'en'
44
  return random.choice(random_texts[lang])
45
 
 
 
 
 
 
 
 
46
  def parens_to_angles(s):
47
  return s.replace('(', '«').replace(')', '»')
48
 
@@ -232,8 +240,9 @@ def clamp_speed(speed):
232
  return 2
233
  return speed
234
 
 
235
  # Must be backwards compatible with https://huggingface.co/spaces/Pendrokar/TTS-Spaces-Arena
236
- def generate(text, voice='af', ps=None, speed=1, trim=3000, use_gpu='auto', sk=None):
237
  voices = resolve_voices(voice, warn=ps)
238
  ps = ps or phonemize(text, voice)
239
  speed = clamp_speed(speed)
@@ -246,6 +255,7 @@ def generate(text, voice='af', ps=None, speed=1, trim=3000, use_gpu='auto', sk=N
246
  tokens = tokens[:510]
247
  ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
248
  use_gpu = len(ps) > 99 if use_gpu == 'auto' else use_gpu
 
249
  print('🔥', datetime.now(), text, voices, ps, use_gpu, sk)
250
  try:
251
  if use_gpu:
@@ -321,10 +331,8 @@ with gr.Blocks() as basic_tts:
321
  btn = gr.Button(list(CHOICES.values())[i*4+j], variant='primary' if i*4+j < 10 else 'secondary')
322
  btn.click(lambda v, b: f'{v}+{b}' if v.startswith(b[:2]) else b, inputs=[voice, btn], outputs=[voice])
323
  voice.change(lambda v, b: gr.Button(b, variant='primary' if v.startswith(b[:2]) else 'secondary'), inputs=[voice, btn], outputs=[btn])
324
- sk = gr.State()
325
  text.submit(generate, inputs=[text, voice, in_ps, speed, trim, use_gpu, sk], outputs=[audio, out_ps])
326
  generate_btn.click(generate, inputs=[text, voice, in_ps, speed, trim, use_gpu, sk], outputs=[audio, out_ps])
327
- basic_tts.load(lambda r: r.session_hash, None, sk)
328
 
329
  @torch.no_grad()
330
  def lf_forward(token_lists, voices, speed, device='cpu'):
@@ -500,7 +508,7 @@ with gr.Blocks() as lf_tts:
500
 
501
  with gr.Blocks() as about:
502
  gr.Markdown('''
503
- Kokoro is a frontier TTS model for its size. It has [80 million](https://hf.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L32) parameters, uses a lean [StyleTTS 2](https://github.com/yl4579/StyleTTS2) architecture, and was trained on high-quality data. The weights are currently private, but a free public demo is hosted here, at `https://hf.co/spaces/hexgrad/Kokoro-TTS`. The Community tab is open for feature requests, bug reports, etc. For other inquiries, contact `@rzvzn` on Discord.
504
 
505
  ### FAQ
506
  **Will this be open sourced?**<br/>
@@ -582,6 +590,7 @@ with gr.Blocks() as app:
582
  [basic_tts, lf_tts, about, changelog],
583
  ['🔥 Basic TTS', '📖 Long Form', 'ℹ️ About', '📝 Changelog'],
584
  )
 
585
 
586
  if __name__ == '__main__':
587
  app.queue(api_open=True).launch()
 
10
  import random
11
  import re
12
  import spaces
13
+ import subprocess
14
  import torch
15
  import yaml
16
 
 
44
  lang = 'en'
45
  return random.choice(random_texts[lang])
46
 
47
+ sents = set()
48
+ for txt in {'harvard_sentences', 'llama3_command-r_sentences_1st_person', 'llama3_command-r_sentences_excla', 'llama3_command-r_questions'}:
49
+ txt += '.txt'
50
+ subprocess.run(['wget', f'https://huggingface.co/spaces/Pendrokar/TTS-Spaces-Arena/resolve/main/{txt}'])
51
+ with open('harvard_sentences.txt') as f:
52
+ sents.update(f.read().strip().splitlines())
53
+
54
  def parens_to_angles(s):
55
  return s.replace('(', '«').replace(')', '»')
56
 
 
240
  return 2
241
  return speed
242
 
243
+ sk = gr.State()
244
  # Must be backwards compatible with https://huggingface.co/spaces/Pendrokar/TTS-Spaces-Arena
245
+ def generate(text, voice='af', ps=None, speed=1, trim=3000, use_gpu='auto'):
246
  voices = resolve_voices(voice, warn=ps)
247
  ps = ps or phonemize(text, voice)
248
  speed = clamp_speed(speed)
 
255
  tokens = tokens[:510]
256
  ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
257
  use_gpu = len(ps) > 99 if use_gpu == 'auto' else use_gpu
258
+ global sk
259
  print('🔥', datetime.now(), text, voices, ps, use_gpu, sk)
260
  try:
261
  if use_gpu:
 
331
  btn = gr.Button(list(CHOICES.values())[i*4+j], variant='primary' if i*4+j < 10 else 'secondary')
332
  btn.click(lambda v, b: f'{v}+{b}' if v.startswith(b[:2]) else b, inputs=[voice, btn], outputs=[voice])
333
  voice.change(lambda v, b: gr.Button(b, variant='primary' if v.startswith(b[:2]) else 'secondary'), inputs=[voice, btn], outputs=[btn])
 
334
  text.submit(generate, inputs=[text, voice, in_ps, speed, trim, use_gpu, sk], outputs=[audio, out_ps])
335
  generate_btn.click(generate, inputs=[text, voice, in_ps, speed, trim, use_gpu, sk], outputs=[audio, out_ps])
 
336
 
337
  @torch.no_grad()
338
  def lf_forward(token_lists, voices, speed, device='cpu'):
 
508
 
509
  with gr.Blocks() as about:
510
  gr.Markdown('''
511
+ Kokoro is a frontier TTS model for its size. It has [80 million](https://hf.co/spaces/hexgrad/Kokoro-TTS/blob/main/app.py#L33) parameters, uses a lean [StyleTTS 2](https://github.com/yl4579/StyleTTS2) architecture, and was trained on high-quality data. The weights are currently private, but a free public demo is hosted here, at `https://hf.co/spaces/hexgrad/Kokoro-TTS`. The Community tab is open for feature requests, bug reports, etc. For other inquiries, contact `@rzvzn` on Discord.
512
 
513
  ### FAQ
514
  **Will this be open sourced?**<br/>
 
590
  [basic_tts, lf_tts, about, changelog],
591
  ['🔥 Basic TTS', '📖 Long Form', 'ℹ️ About', '📝 Changelog'],
592
  )
593
+ app.load(lambda r: r.session_hash, None, sk)
594
 
595
  if __name__ == '__main__':
596
  app.queue(api_open=True).launch()