Pendrokar commited on
Commit
b45c900
·
1 Parent(s): bc6dc80
app/models.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  from gradio_client import handle_file
3
 
4
- # Models to include in the leaderboard, only include models that users can vote on
5
  AVAILABLE_MODELS = {
6
  # 'XTTSv2': 'xtts',
7
  # 'WhisperSpeech': 'whisperspeech',
@@ -52,10 +52,15 @@ AVAILABLE_MODELS = {
52
 
53
  # IMS-Toucan
54
  # 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
 
55
  # StyleTTS v2
56
- 'Pendrokar/style-tts-2': 'Pendrokar/style-tts-2', # more votes in OG arena; emotionless
57
- # StyleTTS kokoro
58
- 'hexgrad/kokoro': 'hexgrad/kokoro',
 
 
 
 
59
 
60
  # MaskGCT (by Amphion)
61
  # 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
@@ -92,6 +97,7 @@ HF_SPACES = {
92
  'return_audio_index': 1,
93
  'series': 'XTTS',
94
  },
 
95
  # WhisperSpeech
96
  'collabora/WhisperSpeech': {
97
  'name': 'WhisperSpeech',
@@ -101,6 +107,7 @@ HF_SPACES = {
101
  'series': 'WhisperSpeech',
102
  'emoji': '😷', # broken space
103
  },
 
104
  # OpenVoice (MyShell.ai)
105
  'myshell-ai/OpenVoice': {
106
  'name':'OpenVoice',
@@ -117,6 +124,7 @@ HF_SPACES = {
117
  'return_audio_index': 1,
118
  'series': 'OpenVoice',
119
  },
 
120
  # MetaVoice
121
  'mrfakename/MetaVoice-1B-v0.1': {
122
  'name':'MetaVoice',
@@ -126,6 +134,7 @@ HF_SPACES = {
126
  'series': 'MetaVoice-1B',
127
  'emoji': '😷', # broken space
128
  },
 
129
  # xVASynth (CPU)
130
  'Pendrokar/xVASynth-TTS': {
131
  'name': 'xVASynth v3',
@@ -134,6 +143,7 @@ HF_SPACES = {
134
  'return_audio_index': 0,
135
  'series': 'xVASynth',
136
  },
 
137
  # CoquiTTS (CPU)
138
  'coqui/CoquiTTS': {
139
  'name': 'CoquiTTS',
@@ -142,6 +152,7 @@ HF_SPACES = {
142
  'return_audio_index': 0,
143
  'series': 'CoquiTTS',
144
  },
 
145
  # HierSpeech_TTS
146
  'LeeSangHoon/HierSpeech_TTS': {
147
  'name': 'HierSpeech++',
@@ -151,6 +162,7 @@ HF_SPACES = {
151
  'series': 'HierSpeech++',
152
  'emoji': '😒', # unemotional
153
  },
 
154
  # MeloTTS (MyShell.ai)
155
  'mrfakename/MeloTTS': {
156
  'name': 'MeloTTS',
@@ -279,6 +291,17 @@ HF_SPACES = {
279
  'series': 'Kokoro',
280
  },
281
 
 
 
 
 
 
 
 
 
 
 
 
282
  # MaskGCT (by Amphion)
283
  'amphion/maskgct': {
284
  'name': 'MaskGCT',
@@ -287,7 +310,7 @@ HF_SPACES = {
287
  'return_audio_index': 0,
288
  'is_zero_gpu_space': True,
289
  'series': 'MaskGCT',
290
- # 'emoji': '🥵', # 300s minimum ZeroGPU!
291
  },
292
  'Svngoku/maskgct-audio-lab': {
293
  'name': 'MaskGCT',
@@ -296,8 +319,10 @@ HF_SPACES = {
296
  'return_audio_index': 0,
297
  'is_zero_gpu_space': True,
298
  'series': 'MaskGCT',
299
- # 'emoji': '🥵', # 300s minimum ZeroGPU!
300
  },
 
 
301
  'lj1995/GPT-SoVITS-v2': {
302
  'name': 'GPT-SoVITS v2',
303
  'function': '/get_tts_wav',
@@ -306,6 +331,8 @@ HF_SPACES = {
306
  'is_zero_gpu_space': True,
307
  'series': 'GPT-SoVITS',
308
  },
 
 
309
  'ameerazam08/OuteTTS-0.2-500M-Demo': {
310
  'name': 'OuteTTS v2 500M',
311
  'function': '/generate_tts',
@@ -313,7 +340,9 @@ HF_SPACES = {
313
  'return_audio_index': 0,
314
  'is_zero_gpu_space': True,
315
  'series': 'OuteTTS',
 
316
  },
 
317
  'OuteAI/OuteTTS-0.3-1B-Demo': {
318
  'name': 'OuteTTS v3 1B',
319
  'function': '/generate_tts',
@@ -321,14 +350,18 @@ HF_SPACES = {
321
  'return_audio_index': 0,
322
  'is_zero_gpu_space': True,
323
  'series': 'OuteTTS',
 
324
  },
 
 
325
  'srinivasbilla/llasa-3b-tts': {
326
- 'name': 'llasa 3b',
327
  'function': '/infer',
328
  'text_param_index': 'target_text',
329
  'return_audio_index': 0,
330
  'is_zero_gpu_space': True,
331
  'series': 'llasa 3b',
 
332
  },
333
  }
334
 
@@ -487,6 +520,12 @@ OVERRIDE_INPUTS = {
487
  'sk': os.getenv('KOKORO'),
488
  },
489
 
 
 
 
 
 
 
490
  # maskGCT (by amphion)
491
  'amphion/maskgct': {
492
  0: DEFAULT_VOICE_SAMPLE, #prompt_wav
 
1
  import os
2
  from gradio_client import handle_file
3
 
4
+ # Models to enable, only include models that users can vote on
5
  AVAILABLE_MODELS = {
6
  # 'XTTSv2': 'xtts',
7
  # 'WhisperSpeech': 'whisperspeech',
 
52
 
53
  # IMS-Toucan
54
  # 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
55
+
56
  # StyleTTS v2
57
+ # 'Pendrokar/style-tts-2': 'Pendrokar/style-tts-2', # more votes in OG arena; emotionless
58
+ # StyleTTS Kokoro v0.19
59
+ # 'hexgrad/kokoro': 'hexgrad/Kokoro-TTS',
60
+ # StyleTTS Kokoro v0.23
61
+ # 'hexgrad/Kokoro-TTS/0.23': 'hexgrad/Kokoro-TTS',
62
+ # StyleTTS Kokoro v1.0
63
+ 'hexgrad/Kokoro-API': 'hexgrad/kokoro-API',
64
 
65
  # MaskGCT (by Amphion)
66
  # 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
 
97
  'return_audio_index': 1,
98
  'series': 'XTTS',
99
  },
100
+
101
  # WhisperSpeech
102
  'collabora/WhisperSpeech': {
103
  'name': 'WhisperSpeech',
 
107
  'series': 'WhisperSpeech',
108
  'emoji': '😷', # broken space
109
  },
110
+
111
  # OpenVoice (MyShell.ai)
112
  'myshell-ai/OpenVoice': {
113
  'name':'OpenVoice',
 
124
  'return_audio_index': 1,
125
  'series': 'OpenVoice',
126
  },
127
+
128
  # MetaVoice
129
  'mrfakename/MetaVoice-1B-v0.1': {
130
  'name':'MetaVoice',
 
134
  'series': 'MetaVoice-1B',
135
  'emoji': '😷', # broken space
136
  },
137
+
138
  # xVASynth (CPU)
139
  'Pendrokar/xVASynth-TTS': {
140
  'name': 'xVASynth v3',
 
143
  'return_audio_index': 0,
144
  'series': 'xVASynth',
145
  },
146
+
147
  # CoquiTTS (CPU)
148
  'coqui/CoquiTTS': {
149
  'name': 'CoquiTTS',
 
152
  'return_audio_index': 0,
153
  'series': 'CoquiTTS',
154
  },
155
+
156
  # HierSpeech_TTS
157
  'LeeSangHoon/HierSpeech_TTS': {
158
  'name': 'HierSpeech++',
 
162
  'series': 'HierSpeech++',
163
  'emoji': '😒', # unemotional
164
  },
165
+
166
  # MeloTTS (MyShell.ai)
167
  'mrfakename/MeloTTS': {
168
  'name': 'MeloTTS',
 
291
  'series': 'Kokoro',
292
  },
293
 
294
+ # StyleTTS Kokoro v1.0
295
+ 'hexgrad/Kokoro-API': {
296
+ 'name': 'Kokoro v1.0',
297
+ 'function': '/predict',
298
+ 'text_param_index': 'text',
299
+ 'return_audio_index': 0,
300
+ 'is_zero_gpu_space': False,
301
+ 'series': 'Kokoro',
302
+ 'hf_token': os.getenv('KOKORO'), #special
303
+ },
304
+
305
  # MaskGCT (by Amphion)
306
  'amphion/maskgct': {
307
  'name': 'MaskGCT',
 
310
  'return_audio_index': 0,
311
  'is_zero_gpu_space': True,
312
  'series': 'MaskGCT',
313
+ # 'emoji': '🥵', # requires 300s reserved ZeroGPU!
314
  },
315
  'Svngoku/maskgct-audio-lab': {
316
  'name': 'MaskGCT',
 
319
  'return_audio_index': 0,
320
  'is_zero_gpu_space': True,
321
  'series': 'MaskGCT',
322
+ # 'emoji': '🥵', # requires 300s reserved ZeroGPU!
323
  },
324
+
325
+ # GPT-SoVITS v2
326
  'lj1995/GPT-SoVITS-v2': {
327
  'name': 'GPT-SoVITS v2',
328
  'function': '/get_tts_wav',
 
331
  'is_zero_gpu_space': True,
332
  'series': 'GPT-SoVITS',
333
  },
334
+
335
+ # OuteTTS v0.2 500M
336
  'ameerazam08/OuteTTS-0.2-500M-Demo': {
337
  'name': 'OuteTTS v2 500M',
338
  'function': '/generate_tts',
 
340
  'return_audio_index': 0,
341
  'is_zero_gpu_space': True,
342
  'series': 'OuteTTS',
343
+ 'emoji': '🥵', # requires 300s reserved ZeroGPU!
344
  },
345
+ # OuteTTS v0.3 1B
346
  'OuteAI/OuteTTS-0.3-1B-Demo': {
347
  'name': 'OuteTTS v3 1B',
348
  'function': '/generate_tts',
 
350
  'return_audio_index': 0,
351
  'is_zero_gpu_space': True,
352
  'series': 'OuteTTS',
353
+ 'emoji': '🥵', # requires 300s reserved ZeroGPU!
354
  },
355
+
356
+ # LlaSa 3B
357
  'srinivasbilla/llasa-3b-tts': {
358
+ 'name': 'LLaSA 3B',
359
  'function': '/infer',
360
  'text_param_index': 'target_text',
361
  'return_audio_index': 0,
362
  'is_zero_gpu_space': True,
363
  'series': 'llasa 3b',
364
+ # 'emoji': '🥵', # requires 300s reserved ZeroGPU!
365
  },
366
  }
367
 
 
520
  'sk': os.getenv('KOKORO'),
521
  },
522
 
523
+ # StyleTTS 2 Kokoro v1.0
524
+ 'hexgrad/Kokoro-API': {
525
+ 'voice': "af_heart",
526
+ 'speed': 1,
527
+ },
528
+
529
  # maskGCT (by amphion)
530
  'amphion/maskgct': {
531
  0: DEFAULT_VOICE_SAMPLE, #prompt_wav
app/sample_caching.py CHANGED
@@ -144,7 +144,7 @@ def give_cached_sample(session_hash: str, autoplay: bool, request: gr.Request):
144
 
145
  return (
146
  gr.update(visible=True, value=pair[0].transcript, elem_classes=['blurred-text']),
147
- "Synthesize",
148
  gr.update(visible=True), # r2
149
  pair[0].modelName, # model1
150
  pair[1].modelName, # model2
 
144
 
145
  return (
146
  gr.update(visible=True, value=pair[0].transcript, elem_classes=['blurred-text']),
147
+ "Synthesize 🐢",
148
  gr.update(visible=True), # r2
149
  pair[0].modelName, # model1
150
  pair[1].modelName, # model2
app/synth.py CHANGED
@@ -100,8 +100,16 @@ def synthandreturn(text, autoplay, request: gr.Request):
100
  if '/' in model:
101
  # Use public HF Space
102
  # if (model not in hf_clients):
 
103
  # hf_clients[model] = Client(model, hf_token=hf_token, headers=hf_headers)
104
- mdl_space = Client(AVAILABLE_MODELS[model], hf_token=hf_token, headers=hf_headers)
 
 
 
 
 
 
 
105
 
106
  # print(f"{model}: Fetching endpoints of HF Space")
107
  # assume the index is one of the first 9 return params
 
100
  if '/' in model:
101
  # Use public HF Space
102
  # if (model not in hf_clients):
103
+ # #save client to local variable; can timeout
104
  # hf_clients[model] = Client(model, hf_token=hf_token, headers=hf_headers)
105
+ try:
106
+ # use TTS host's token
107
+ client_token = HF_SPACES[model]['hf_token']
108
+ except:
109
+ # use arena host's token
110
+ client_token = hf_token
111
+ # even this may cause 429 Too Many Request
112
+ mdl_space = Client(AVAILABLE_MODELS[model], hf_token=client_token, headers=hf_headers)
113
 
114
  # print(f"{model}: Fetching endpoints of HF Space")
115
  # assume the index is one of the first 9 return params
app/ui_vote.py CHANGED
@@ -120,7 +120,7 @@ with gr.Blocks() as vote:
120
  ]
121
  """
122
  text,
123
- "Synthesize",
124
  gr.update(visible=True), # r2
125
  mdl1, # model1
126
  mdl2, # model2
 
120
  ]
121
  """
122
  text,
123
+ "Synthesize 🐢",
124
  gr.update(visible=True), # r2
125
  mdl1, # model1
126
  mdl2, # model2
test_tts_styletts_kokoro_v1.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client, file
3
+
4
+ client = Client("hexgrad/Kokoro-API", hf_token=os.getenv('KOKORO'))
5
+ # endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ # print(endpoints)
7
+ result = client.predict(
8
+ text='Hello there, you.',
9
+ voice='af_heart',
10
+ speed=1,
11
+ api_name='/predict'
12
+ )
13
+
14
+ print(result)
15
+
16
+ # text="Oh, hello there!!",
17
+ # voice="af",
18
+ # ps=None,
19
+ # speed=1,
20
+ # trim=3000,
21
+ # use_gpu=False,