Pendrokar commited on
Commit
48cef3d
ยท
1 Parent(s): d6e6719

TTS added: Oute & SoVITS; F5 voice change

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.wav filter=lfs diff=lfs merge=lfs -text
37
+ *.mp3 filter=lfs diff=lfs merge=lfs -text
app/models.py CHANGED
@@ -45,7 +45,7 @@ AVAILABLE_MODELS = {
45
  # 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
46
 
47
  # # Microsoft Edge TTS
48
- # 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # 4.29
49
 
50
  # IMS-Toucan
51
  # 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
@@ -55,10 +55,15 @@ AVAILABLE_MODELS = {
55
  'hexgrad/kokoro': 'hexgrad/kokoro',
56
 
57
  # MaskGCT (by Amphion)
58
- # DEMANDS 300 seconds of ZeroGPU
59
- # 'amphion/maskgct': 'amphion/maskgct',
60
- # default ZeroGPU borrow time
61
- 'Svngoku/maskgct-audio-lab': 'Svngoku/maskgct-audio-lab',
 
 
 
 
 
62
 
63
  # HF TTS w issues
64
  # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
@@ -230,7 +235,7 @@ HF_SPACES = {
230
  'Pendrokar/style-tts-2': {
231
  'name': 'StyleTTS v2',
232
  'function': '/synthesize',
233
- 'text_param_index': 0,
234
  'return_audio_index': 0,
235
  'is_zero_gpu_space': True,
236
  'series': 'StyleTTS',
@@ -239,12 +244,12 @@ HF_SPACES = {
239
 
240
  # StyleTTS v2 kokoro fine tune
241
  'hexgrad/kokoro': {
242
- 'name': 'StyleTTS Kokoro',
243
  'function': '/generate',
244
  'text_param_index': 0,
245
  'return_audio_index': 0,
246
  'is_zero_gpu_space': True,
247
- 'series': 'StyleTTS',
248
  },
249
 
250
  # MaskGCT (by Amphion)
@@ -266,10 +271,26 @@ HF_SPACES = {
266
  'series': 'MaskGCT',
267
  'emoji': '๐Ÿฅต', # 300s minimum ZeroGPU!
268
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  }
270
 
271
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
272
- DEFAULT_VOICE_SAMPLE_STR = 'https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav'
273
  DEFAULT_VOICE_SAMPLE = handle_file(DEFAULT_VOICE_SAMPLE_STR)
274
  DEFAULT_VOICE_TRANSCRIPT = "The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory."
275
 
@@ -326,9 +347,12 @@ OVERRIDE_INPUTS = {
326
  4: 'No', # split by newline
327
  },
328
  'mrfakename/MeloTTS': {
329
- 1: 'EN-Default', # speaker; DEFAULT_VOICE_SAMPLE=EN-Default
330
- 2: 1, # speed
331
- 3: 'EN', # language
 
 
 
332
  },
333
  'mrfakename/MetaVoice-1B-v0.1': {
334
  1: 5, # float (numeric value between 0.0 and 10.0) in 'Speech Stability - improves text following for a challenging speaker' Slider component
@@ -362,13 +386,14 @@ OVERRIDE_INPUTS = {
362
  10: "never", #use_memory_cache
363
  },
364
 
 
365
  'mrfakename/E2-F5-TTS': {
366
- 0: DEFAULT_VOICE_SAMPLE, # voice sample
367
- 1: DEFAULT_VOICE_TRANSCRIPT, # transcript of sample (< 15 seconds required)
368
- 3: False, # cleanup silence
369
- 4: 0.15, #crossfade
370
- 5: 32, #nfe_slider
371
- 6: 1, #speed
372
  },
373
 
374
  # IMS-Toucan
@@ -383,9 +408,9 @@ OVERRIDE_INPUTS = {
383
 
384
  # StyleTTS 2
385
  'Pendrokar/style-tts-2': {
386
- 1: "f-us-2", #voice
387
- 2: 'en-us', # lang
388
- 3: 8, # lngsteps
389
  },
390
 
391
  # StyleTTS 2 kokoro
@@ -409,6 +434,29 @@ OVERRIDE_INPUTS = {
409
  2: -1, #target_len
410
  3: 25, #n_timesteps
411
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  }
413
 
414
 
 
45
  # 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
46
 
47
  # # Microsoft Edge TTS
48
+ # 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # API disabled
49
 
50
  # IMS-Toucan
51
  # 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
 
55
  'hexgrad/kokoro': 'hexgrad/kokoro',
56
 
57
  # MaskGCT (by Amphion)
58
+ # 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
59
+ # 'Svngoku/maskgct-audio-lab': 'Svngoku/maskgct-audio-lab', # DEMANDS 300 seconds of ZeroGPU!
60
+
61
+ # GPT-SoVITS
62
+ 'lj1995/GPT-SoVITS-v2': 'lj1995/GPT-SoVITS-v2',
63
+
64
+ # OuteTTS
65
+ # 'OuteAI/OuteTTS-0.2-500M-Demo': 'OuteAI/OuteTTS-0.2-500M-Demo',
66
+ 'ameerazam08/OuteTTS-0.2-500M-Demo': 'ameerazam08/OuteTTS-0.2-500M-Demo', # ZeroGPU Space
67
 
68
  # HF TTS w issues
69
  # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
 
235
  'Pendrokar/style-tts-2': {
236
  'name': 'StyleTTS v2',
237
  'function': '/synthesize',
238
+ 'text_param_index': 'text',
239
  'return_audio_index': 0,
240
  'is_zero_gpu_space': True,
241
  'series': 'StyleTTS',
 
244
 
245
  # StyleTTS v2 kokoro fine tune
246
  'hexgrad/kokoro': {
247
+ 'name': 'StyleTTS Kokoro v19',
248
  'function': '/generate',
249
  'text_param_index': 0,
250
  'return_audio_index': 0,
251
  'is_zero_gpu_space': True,
252
+ 'series': 'Kokoro',
253
  },
254
 
255
  # MaskGCT (by Amphion)
 
271
  'series': 'MaskGCT',
272
  'emoji': '๐Ÿฅต', # 300s minimum ZeroGPU!
273
  },
274
+ 'lj1995/GPT-SoVITS-v2': {
275
+ 'name': 'GPT-SoVITS',
276
+ 'function': '/get_tts_wav',
277
+ 'text_param_index': 'text',
278
+ 'return_audio_index': 0,
279
+ 'is_zero_gpu_space': True,
280
+ 'series': 'GPT-SoVITS',
281
+ },
282
+ 'ameerazam08/OuteTTS-0.2-500M-Demo': {
283
+ 'name': 'OuteTTS 500M',
284
+ 'function': '/generate_tts',
285
+ 'text_param_index': 0,
286
+ 'return_audio_index': 0,
287
+ 'is_zero_gpu_space': True,
288
+ 'series': 'OuteTTS',
289
+ },
290
  }
291
 
292
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
293
+ DEFAULT_VOICE_SAMPLE_STR = 'voice_samples/xtts_sample.wav'
294
  DEFAULT_VOICE_SAMPLE = handle_file(DEFAULT_VOICE_SAMPLE_STR)
295
  DEFAULT_VOICE_TRANSCRIPT = "The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory."
296
 
 
347
  4: 'No', # split by newline
348
  },
349
  'mrfakename/MeloTTS': {
350
+ # 1: 'EN-Default', # speaker; DEFAULT_VOICE_SAMPLE=EN-Default
351
+ # 2: 1, # speed
352
+ # 3: 'EN', # language
353
+ 'speaker': 'EN-Default', # DEFAULT_VOICE_SAMPLE=EN-Default
354
+ 'speed': 1.0,
355
+ 'language': 'EN',
356
  },
357
  'mrfakename/MetaVoice-1B-v0.1': {
358
  1: 5, # float (numeric value between 0.0 and 10.0) in 'Speech Stability - improves text following for a challenging speaker' Slider component
 
386
  10: "never", #use_memory_cache
387
  },
388
 
389
+ # F5
390
  'mrfakename/E2-F5-TTS': {
391
+ 'ref_audio_input': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3'),
392
+ 'ref_text_input': 'Our model manager is Graham, whom we observed leading a small team of chemical engineers within a multinational European firm we\'ll call Kruger Bern.',
393
+ 'remove_silence': False,
394
+ 'cross_fade_duration_slider': 0.15,
395
+ 'nfe_slider': 32,
396
+ 'speed_slider': 1,
397
  },
398
 
399
  # IMS-Toucan
 
408
 
409
  # StyleTTS 2
410
  'Pendrokar/style-tts-2': {
411
+ 'voice': "f-us-2",
412
+ 'lang': 'en-us',
413
+ 'lngsteps': 8,
414
  },
415
 
416
  # StyleTTS 2 kokoro
 
434
  2: -1, #target_len
435
  3: 25, #n_timesteps
436
  },
437
+ 'lj1995/GPT-SoVITS-v2': {
438
+ 'ref_wav_path': handle_file('voice_samples/EN_B00004_S00051_W000213.wav'),
439
+ 'prompt_text': "Our model manager is Graham, whom we observed leading a small team of chemical engineers within a multinational European firm we'll call",
440
+ 'prompt_language': "English",
441
+ # text: "Please surprise me and speak in whatever voice you enjoy.",
442
+ 'text_language': "English",
443
+ 'how_to_cut': "No slice",
444
+ 'top_k': 15,
445
+ 'top_p': 1,
446
+ 'temperature': 1,
447
+ 'ref_free': False,
448
+ 'speed': 1,
449
+ 'if_freeze': False,
450
+ 'inp_refs': None,
451
+ },
452
+ 'ameerazam08/OuteTTS-0.2-500M-Demo': {
453
+ 1: 0.1, # temperature
454
+ 2: 1.1, # repetition_penalty
455
+ 3: "en", # language
456
+ 4: "female_1", # speaker_selection
457
+ 5: None, # reference_audio
458
+ 6: None, # reference_text
459
+ },
460
  }
461
 
462
 
app/synth.py CHANGED
@@ -135,7 +135,19 @@ def synthandreturn(text, autoplay, request: gr.Request):
135
  space_inputs[HF_SPACES[model]['text_param_index']] = text
136
 
137
  print(f"{model}: Sending request to HF Space")
138
- results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  # return path to audio
141
  result = results
@@ -189,31 +201,64 @@ def synthandreturn(text, autoplay, request: gr.Request):
189
  result_storage[model] = result
190
 
191
  def _get_param_examples(parameters):
192
- example_inputs = []
 
 
 
 
 
 
 
 
 
193
  for param_info in parameters:
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  if (
195
  param_info['component'] == 'Radio'
196
  or param_info['component'] == 'Dropdown'
197
  or param_info['component'] == 'Audio'
198
  or param_info['python_type']['type'] == 'str'
199
  ):
200
- example_inputs.append(str(param_info['example_input']))
201
- continue
202
- if param_info['python_type']['type'] == 'int':
203
- example_inputs.append(int(param_info['example_input']))
204
- continue
205
- if param_info['python_type']['type'] == 'float':
206
- example_inputs.append(float(param_info['example_input']))
207
- continue
208
- if param_info['python_type']['type'] == 'bool':
209
- example_inputs.append(bool(param_info['example_input']))
210
- continue
 
 
 
211
 
212
  return example_inputs
213
 
214
  def _override_params(inputs, modelname):
215
  try:
216
  for key,value in OVERRIDE_INPUTS[modelname].items():
 
 
 
 
 
 
 
 
217
  inputs[key] = value
218
  print(f"{modelname}: Default inputs overridden by Arena")
219
  except:
 
135
  space_inputs[HF_SPACES[model]['text_param_index']] = text
136
 
137
  print(f"{model}: Sending request to HF Space")
138
+ # results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)
139
+ if(type(space_inputs) == dict):
140
+ results = mdl_space.predict(
141
+ **space_inputs,
142
+ api_name=api_name,
143
+ fn_index=fn_index
144
+ )
145
+ else:
146
+ results = mdl_space.predict(
147
+ *space_inputs,
148
+ api_name=api_name,
149
+ fn_index=fn_index
150
+ )
151
 
152
  # return path to audio
153
  result = results
 
201
  result_storage[model] = result
202
 
203
  def _get_param_examples(parameters):
204
+ # named or unnamed parameters
205
+ try:
206
+ param_name = parameters[0]['parameter_name']
207
+ # success => named params, use dict
208
+ example_inputs = {}
209
+ except:
210
+ # unnamed params, use list
211
+ example_inputs = []
212
+ pass
213
+
214
  for param_info in parameters:
215
+
216
+
217
+ param_name = ''
218
+ param_default_value = param_info['example_input']
219
+ try:
220
+ # named params
221
+ param_name = param_info['parameter_name']
222
+ param_default_value = param_info['parameter_default']
223
+ except:
224
+ # unnamed params
225
+ pass
226
+
227
+ param_value = None
228
  if (
229
  param_info['component'] == 'Radio'
230
  or param_info['component'] == 'Dropdown'
231
  or param_info['component'] == 'Audio'
232
  or param_info['python_type']['type'] == 'str'
233
  ):
234
+ param_value = str(param_default_value)
235
+ elif param_info['python_type']['type'] == 'int':
236
+ param_value = int(param_default_value)
237
+ elif param_info['python_type']['type'] == 'float':
238
+ param_value = float(param_default_value)
239
+ elif param_info['python_type']['type'] == 'bool':
240
+ param_value = bool(param_default_value)
241
+
242
+ if (param_name != ''):
243
+ # named param
244
+ example_inputs[param_info['parameter_name']] = param_value
245
+ else:
246
+ # just append unnamed param and hope
247
+ example_inputs.append(param_value)
248
 
249
  return example_inputs
250
 
251
  def _override_params(inputs, modelname):
252
  try:
253
  for key,value in OVERRIDE_INPUTS[modelname].items():
254
+ # if override keys are integers, make the dict into a list
255
+ if (
256
+ (type(inputs) is dict)
257
+ and (type(key) is int)
258
+ ):
259
+ print(f"{modelname}: Converting unnamed override params to List")
260
+ inputs = list(inputs.values())
261
+
262
  inputs[key] = value
263
  print(f"{modelname}: Default inputs overridden by Arena")
264
  except:
test_overrides.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app.models import *
2
+
3
+ # # has named endpoint
4
+ # if '/' == HF_SPACES[model]['function'][0]:
5
+ # # audio sync function name
6
+ # api_name = HF_SPACES[model]['function']
7
+
8
+ # end_parameters = _get_param_examples(
9
+ # endpoints['named_endpoints'][api_name]['parameters']
10
+ # )
11
+ # # has unnamed endpoint
12
+ # else:
13
+ # # endpoint index is the first character
14
+ # fn_index = int(HF_SPACES[model]['function'])
15
+
16
+ # end_parameters = _get_param_examples(
17
+ # endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
18
+ # )
19
+
20
+ def _get_param_examples(parameters):
21
+ # named or unnamed parameters
22
+ try:
23
+ param_name = parameters[0]['parameter_name']
24
+ # success => named params, use dict
25
+ example_inputs = {}
26
+ except:
27
+ # unnamed params, use list
28
+ example_inputs = []
29
+ pass
30
+
31
+ for param_info in parameters:
32
+
33
+
34
+ param_name = ''
35
+ param_default_value = param_info['example_input']
36
+ try:
37
+ # named params
38
+ param_name = param_info['parameter_name']
39
+ param_default_value = param_info['parameter_default']
40
+ except:
41
+ # unnamed params
42
+ pass
43
+
44
+ param_value = None
45
+ if (
46
+ param_info['component'] == 'Radio'
47
+ or param_info['component'] == 'Dropdown'
48
+ or param_info['component'] == 'Audio'
49
+ or param_info['python_type']['type'] == 'str'
50
+ ):
51
+ param_value = str(param_default_value)
52
+ elif param_info['python_type']['type'] == 'int':
53
+ param_value = int(param_default_value)
54
+ elif param_info['python_type']['type'] == 'float':
55
+ param_value = float(param_default_value)
56
+ elif param_info['python_type']['type'] == 'bool':
57
+ param_value = bool(param_default_value)
58
+
59
+ if (param_name != ''):
60
+ # named param
61
+ example_inputs[param_info['parameter_name']] = param_value
62
+ else:
63
+ # just append unnamed param and hope
64
+ example_inputs.append(param_value)
65
+
66
+ return example_inputs
67
+
68
+ def _override_params(inputs, modelname):
69
+ try:
70
+ for key,value in OVERRIDE_INPUTS[modelname].items():
71
+ # if override keys are integers, make the dict into a list
72
+ if (
73
+ (type(inputs) is dict)
74
+ and (type(key) is int)
75
+ ):
76
+ print("Converting unnamed override params to List")
77
+ inputs = list(inputs.values())
78
+
79
+ inputs[key] = value
80
+ print(f"{modelname}: Default inputs overridden by Arena")
81
+ except:
82
+ pass
83
+
84
+ return inputs
test_tts_cosyvoice.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client, handle_file
3
+
4
+ client = Client("FunAudioLLM/CosyVoice2-0.5B", hf_token=os.getenv('HF_TOKEN'))
5
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
+ # print(endpoints)
7
+
8
+ result = client.predict(
9
+ tts_text="CosyVoice is undergoing a comprehensive upgrade, providing more accurate, stable, faster, and better voice generation capabilities.",
10
+ mode_checkbox_group="3s Voice Clone",
11
+ prompt_text='The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory.',
12
+ prompt_wav_upload=handle_file("https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav"),
13
+ prompt_wav_record=None,
14
+ instruct_text=None,
15
+ seed=0,
16
+ stream="false",
17
+ api_name="/generate_audio"
18
+ )
19
+ print(result)
test_tts_e2_f5_f5.py CHANGED
@@ -4,7 +4,7 @@ from gradio_client import Client, handle_file
4
  client = Client("mrfakename/E2-F5-TTS", hf_token=os.getenv('HF_TOKEN'))
5
  endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
  result = client.predict(
7
- ref_audio_input=handle_file('https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav'),
8
  ref_text_input="The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory.",
9
  gen_text_input="Please surprise me and speak in whatever voice you enjoy.",
10
  remove_silence=False,
 
4
  client = Client("mrfakename/E2-F5-TTS", hf_token=os.getenv('HF_TOKEN'))
5
  endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
  result = client.predict(
7
+ ref_audio_input=handle_file('voice_samples/EN_B00004_S00051_W000213.mp3'),
8
  ref_text_input="The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory.",
9
  gen_text_input="Please surprise me and speak in whatever voice you enjoy.",
10
  remove_silence=False,
test_tts_melo.py CHANGED
@@ -1,13 +1,42 @@
1
  import os
 
2
  from gradio_client import Client
3
 
4
- client = Client("mrfakename/MeloTTS", hf_token=os.getenv('HF_TOKEN'))
 
5
  endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
  # print(endpoints)
7
- result = client.predict(
8
- "Please surprise me and speak in whatever voice you enjoy.", # str in 'Text to speak' Textbox component
9
- "EN-US", # Literal['EN-US', 'EN-BR', 'EN_INDIA', 'EN-AU', 'EN-Default'] in 'Speaker' Dropdown component
10
- 1.0, # float (numeric value between 0.1 and 10.0)
11
- "EN", # Literal['EN', 'ES', 'FR', 'ZH', 'JP', 'KR'] in 'Language' Radio component
12
- api_name="/synthesize"
13
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ from test_overrides import _get_param_examples, _override_params
3
  from gradio_client import Client
4
 
5
+ model = "mrfakename/MeloTTS"
6
+ client = Client(model, hf_token=os.getenv('HF_TOKEN'))
7
  endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
8
  # print(endpoints)
9
+
10
+ api_name = '/synthesize'
11
+ fn_index = None
12
+ end_parameters = None
13
+ text = 'This is what my voice sounds like.'
14
+
15
+ end_parameters = _get_param_examples(
16
+ endpoints['named_endpoints'][api_name]['parameters']
17
+ )
18
+ print(end_parameters)
19
+
20
+
21
+ # override some or all default parameters
22
+ space_inputs = _override_params(end_parameters, model)
23
+
24
+ # space_inputs[0] = text
25
+ space_inputs['text'] = text
26
+ print(space_inputs)
27
+
28
+ if(type(space_inputs) == dict):
29
+ space_inputs['text'] = text
30
+ result = client.predict(
31
+ **space_inputs,
32
+ api_name=api_name,
33
+ fn_index=fn_index
34
+ )
35
+ else:
36
+ space_inputs[0] = text
37
+ result = client.predict(
38
+ *space_inputs,
39
+ api_name=api_name,
40
+ fn_index=fn_index
41
+ )
42
+ print(result)
test_tts_oute.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from gradio_client import Client, handle_file
3
+
4
+ # client = Client("OuteAI/OuteTTS-0.2-500M-Demo", hf_token=os.getenv('HF_TOKEN'))
5
+ client = Client("ameerazam08/OuteTTS-0.2-500M-Demo", hf_token=os.getenv('HF_TOKEN'))
6
+
7
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
8
+ # print(endpoints)
9
+
10
+ result = client.predict(
11
+ text="Please surprise me and speak in whatever voice you enjoy.",
12
+ temperature=0.1,
13
+ repetition_penalty=1.1,
14
+ language="en",
15
+ speaker_selection="female_1",
16
+ reference_audio=None,
17
+ reference_text=None,
18
+ # reference_audio=handle_file('EN_B00004_S00051_W000213.wav'),
19
+ # reference_text="Our model manager is Graham, whom we observed leading a small team of chemical engineers within a multinational European firm we'll call",
20
+ api_name="/generate_tts"
21
+ )
22
+ print(result)
test_tts_sovits.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from test_overrides import _get_param_examples, _override_params
3
+ from gradio_client import Client, handle_file
4
+
5
+ model = "Pendrokar/GPT-SoVITS-v2"
6
+ # lj1995/GPT-SoVITS-v2
7
+ client = Client(model, hf_token=os.getenv('HF_TOKEN'))
8
+ endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
9
+ # print(endpoints)
10
+
11
+
12
+ api_name = None
13
+ fn_index = None
14
+ end_parameters = None
15
+ text = 'This is what my voice sounds like.'
16
+
17
+ # has named endpoint
18
+ # audio sync function name
19
+ api_name = '/get_tts_wav'
20
+
21
+ end_parameters = _get_param_examples(
22
+ endpoints['named_endpoints'][api_name]['parameters']
23
+ )
24
+ print(end_parameters)
25
+
26
+ # override some or all default parameters
27
+ space_inputs = _override_params(end_parameters, model)
28
+
29
+ print(space_inputs)
30
+
31
+ if(type(space_inputs) == dict):
32
+ space_inputs['text'] = text
33
+ result = client.predict(
34
+ **space_inputs,
35
+ api_name=api_name,
36
+ fn_index=fn_index
37
+ )
38
+ else:
39
+ space_inputs[0] = text
40
+ result = client.predict(
41
+ *space_inputs,
42
+ api_name=api_name,
43
+ fn_index=fn_index
44
+ )
45
+
46
+ print(result)
test_tts_styletts.py CHANGED
@@ -1,12 +1,50 @@
1
  import os
 
2
  from gradio_client import Client, file
3
 
4
- client = Client("Pendrokar/style-tts-2", hf_token=os.getenv('HF_TOKEN'))
 
5
  endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
  # print(endpoints)
7
- result = client.predict(
8
- text="Hello!!",
9
- voice="f-us-1", # voice
10
- lngsteps=8, # lngsteps
11
- api_name="/synthesize" # api_name
12
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ from test_overrides import _get_param_examples, _override_params
3
  from gradio_client import Client, file
4
 
5
+ model = "Pendrokar/style-tts-2"
6
+ client = Client(model, hf_token=os.getenv('HF_TOKEN'))
7
  endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
8
  # print(endpoints)
9
+
10
+ api_name = '/synthesize'
11
+ fn_index = None
12
+ end_parameters = None
13
+ text = 'This is what my voice sounds like.'
14
+
15
+ end_parameters = _get_param_examples(
16
+ endpoints['named_endpoints'][api_name]['parameters']
17
+ )
18
+ print(end_parameters)
19
+
20
+
21
+ space_inputs = end_parameters
22
+ # override some or all default parameters
23
+ space_inputs = _override_params(end_parameters, model)
24
+
25
+ if(type(space_inputs) == dict):
26
+ space_inputs['text'] = text
27
+ result = client.predict(
28
+ **space_inputs,
29
+ api_name=api_name,
30
+ fn_index=fn_index
31
+ )
32
+ else:
33
+ space_inputs[0] = text
34
+ result = client.predict(
35
+ *space_inputs,
36
+ api_name=api_name,
37
+ fn_index=fn_index
38
+ )
39
+ # space_inputs = {str(i): value for i, value in enumerate(space_inputs)}
40
+
41
+ print(space_inputs)
42
+ # print(*space_inputs)
43
+ # print(**space_inputs)
44
+
45
+ # result = client.predict(
46
+ # **space_inputs,
47
+ # api_name=api_name,
48
+ # fn_index=fn_index
49
+ # )
50
+ print(result)
test_tts_xva.py CHANGED
@@ -1,23 +1,30 @@
1
  import os
 
2
  from gradio_client import Client, file
3
 
 
4
  client = Client("Pendrokar/xVASynth-TTS", hf_token=os.getenv('HF_TOKEN'))
5
  endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
6
- # print(endpoints)
7
- result = client.predict(
8
- "Well, hello there!!", # str in 'Input Text' Textbox component
9
- "x_ex04", # Literal['x_ex04', 'x_ex01', 'cnc_cabal', 'ccby_nvidia_hifi_92_F', 'ccby_nvidia_hifi_6671_M', 'more'] in 'Voice' Radio component
10
- "en", # Literal['en', 'de', 'es', 'hi', 'zh', 'more'] in 'Language' Radio component
11
- 1.0, # float (numeric value between 0.5 and 2.0) in 'Duration' Slider component
12
 
13
- 0, # UNUSED; float (numeric value between 0 and 1.0) in 'Pitch' Slider component
14
- 0.1, # UNUSED; float (numeric value between 0.1 and 1.0) in 'Energy' Slider component
 
 
 
 
 
 
 
 
15
 
16
- 0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐Ÿ˜  Anger' Slider component
17
- 0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐Ÿ˜ƒ Happiness' Slider component
18
- 0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐Ÿ˜ญ Sadness' Slider component
19
- 0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐Ÿ˜ฎ Surprise' Slider component
20
- True, # bool in 'Use DeepMoji' Checkbox component
21
 
22
- api_name="/predict"
23
- )
 
 
 
 
 
 
 
1
  import os
2
+ from test_overrides import _get_param_examples, _override_params
3
  from gradio_client import Client, file
4
 
5
+ model = "Pendrokar/xVASynth-TTS"
6
  client = Client("Pendrokar/xVASynth-TTS", hf_token=os.getenv('HF_TOKEN'))
7
  endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
 
 
 
 
 
 
8
 
9
+ api_name = '/predict'
10
+ fn_index = None
11
+ end_parameters = None
12
+ text = 'This is what my voice sounds like.'
13
+
14
+ end_parameters = _get_param_examples(
15
+ endpoints['named_endpoints'][api_name]['parameters']
16
+ )
17
+ print(end_parameters)
18
+
19
 
20
+ # override some or all default parameters
21
+ space_inputs = _override_params(end_parameters, model)
 
 
 
22
 
23
+ space_inputs[0] = text
24
+
25
+ print(space_inputs)
26
+ result = client.predict(
27
+ *space_inputs,
28
+ api_name=api_name
29
+ )
30
+ print(result)
voice_samples/EN_B00004_S00051_W000213.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"id": "EN_B00004_S00051_W000213", "wav": "EN_B00004/EN_B00004_S00051/mp3/EN_B00004_S00051_W000213.mp3", "text": " Our model manager is Graham, whom we observed leading a small team of chemical engineers within a multinational European firm we'll call Kruger Bern.", "duration": 10.1535, "speaker": "EN_B00004_S00051", "language": "en", "dnsmos": 3.3549}