Spaces:
Running
on
Zero
Running
on
Zero
TTS added: Oute & SoVITS; F5 voice change
Browse files- .gitattributes +2 -0
- app/models.py +69 -21
- app/synth.py +58 -13
- test_overrides.py +84 -0
- test_tts_cosyvoice.py +19 -0
- test_tts_e2_f5_f5.py +1 -1
- test_tts_melo.py +37 -8
- test_tts_oute.py +22 -0
- test_tts_sovits.py +46 -0
- test_tts_styletts.py +45 -7
- test_tts_xva.py +22 -15
- voice_samples/EN_B00004_S00051_W000213.json +1 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
37 |
+
*.mp3 filter=lfs diff=lfs merge=lfs -text
|
app/models.py
CHANGED
@@ -45,7 +45,7 @@ AVAILABLE_MODELS = {
|
|
45 |
# 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
|
46 |
|
47 |
# # Microsoft Edge TTS
|
48 |
-
# 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', #
|
49 |
|
50 |
# IMS-Toucan
|
51 |
# 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
|
@@ -55,10 +55,15 @@ AVAILABLE_MODELS = {
|
|
55 |
'hexgrad/kokoro': 'hexgrad/kokoro',
|
56 |
|
57 |
# MaskGCT (by Amphion)
|
58 |
-
# DEMANDS 300 seconds of ZeroGPU
|
59 |
-
# '
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
# HF TTS w issues
|
64 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
@@ -230,7 +235,7 @@ HF_SPACES = {
|
|
230 |
'Pendrokar/style-tts-2': {
|
231 |
'name': 'StyleTTS v2',
|
232 |
'function': '/synthesize',
|
233 |
-
'text_param_index':
|
234 |
'return_audio_index': 0,
|
235 |
'is_zero_gpu_space': True,
|
236 |
'series': 'StyleTTS',
|
@@ -239,12 +244,12 @@ HF_SPACES = {
|
|
239 |
|
240 |
# StyleTTS v2 kokoro fine tune
|
241 |
'hexgrad/kokoro': {
|
242 |
-
'name': 'StyleTTS Kokoro',
|
243 |
'function': '/generate',
|
244 |
'text_param_index': 0,
|
245 |
'return_audio_index': 0,
|
246 |
'is_zero_gpu_space': True,
|
247 |
-
'series': '
|
248 |
},
|
249 |
|
250 |
# MaskGCT (by Amphion)
|
@@ -266,10 +271,26 @@ HF_SPACES = {
|
|
266 |
'series': 'MaskGCT',
|
267 |
'emoji': '๐ฅต', # 300s minimum ZeroGPU!
|
268 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
}
|
270 |
|
271 |
# for zero-shot TTS - voice sample used by XTTS (11 seconds)
|
272 |
-
DEFAULT_VOICE_SAMPLE_STR = '
|
273 |
DEFAULT_VOICE_SAMPLE = handle_file(DEFAULT_VOICE_SAMPLE_STR)
|
274 |
DEFAULT_VOICE_TRANSCRIPT = "The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory."
|
275 |
|
@@ -326,9 +347,12 @@ OVERRIDE_INPUTS = {
|
|
326 |
4: 'No', # split by newline
|
327 |
},
|
328 |
'mrfakename/MeloTTS': {
|
329 |
-
1: 'EN-Default', # speaker; DEFAULT_VOICE_SAMPLE=EN-Default
|
330 |
-
2: 1, # speed
|
331 |
-
3: 'EN', # language
|
|
|
|
|
|
|
332 |
},
|
333 |
'mrfakename/MetaVoice-1B-v0.1': {
|
334 |
1: 5, # float (numeric value between 0.0 and 10.0) in 'Speech Stability - improves text following for a challenging speaker' Slider component
|
@@ -362,13 +386,14 @@ OVERRIDE_INPUTS = {
|
|
362 |
10: "never", #use_memory_cache
|
363 |
},
|
364 |
|
|
|
365 |
'mrfakename/E2-F5-TTS': {
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
},
|
373 |
|
374 |
# IMS-Toucan
|
@@ -383,9 +408,9 @@ OVERRIDE_INPUTS = {
|
|
383 |
|
384 |
# StyleTTS 2
|
385 |
'Pendrokar/style-tts-2': {
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
},
|
390 |
|
391 |
# StyleTTS 2 kokoro
|
@@ -409,6 +434,29 @@ OVERRIDE_INPUTS = {
|
|
409 |
2: -1, #target_len
|
410 |
3: 25, #n_timesteps
|
411 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
}
|
413 |
|
414 |
|
|
|
45 |
# 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0
|
46 |
|
47 |
# # Microsoft Edge TTS
|
48 |
+
# 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # API disabled
|
49 |
|
50 |
# IMS-Toucan
|
51 |
# 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
|
|
|
55 |
'hexgrad/kokoro': 'hexgrad/kokoro',
|
56 |
|
57 |
# MaskGCT (by Amphion)
|
58 |
+
# 'amphion/maskgct': 'amphion/maskgct', # DEMANDS 300 seconds of ZeroGPU!
|
59 |
+
# 'Svngoku/maskgct-audio-lab': 'Svngoku/maskgct-audio-lab', # DEMANDS 300 seconds of ZeroGPU!
|
60 |
+
|
61 |
+
# GPT-SoVITS
|
62 |
+
'lj1995/GPT-SoVITS-v2': 'lj1995/GPT-SoVITS-v2',
|
63 |
+
|
64 |
+
# OuteTTS
|
65 |
+
# 'OuteAI/OuteTTS-0.2-500M-Demo': 'OuteAI/OuteTTS-0.2-500M-Demo',
|
66 |
+
'ameerazam08/OuteTTS-0.2-500M-Demo': 'ameerazam08/OuteTTS-0.2-500M-Demo', # ZeroGPU Space
|
67 |
|
68 |
# HF TTS w issues
|
69 |
# 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
|
|
|
235 |
'Pendrokar/style-tts-2': {
|
236 |
'name': 'StyleTTS v2',
|
237 |
'function': '/synthesize',
|
238 |
+
'text_param_index': 'text',
|
239 |
'return_audio_index': 0,
|
240 |
'is_zero_gpu_space': True,
|
241 |
'series': 'StyleTTS',
|
|
|
244 |
|
245 |
# StyleTTS v2 kokoro fine tune
|
246 |
'hexgrad/kokoro': {
|
247 |
+
'name': 'StyleTTS Kokoro v19',
|
248 |
'function': '/generate',
|
249 |
'text_param_index': 0,
|
250 |
'return_audio_index': 0,
|
251 |
'is_zero_gpu_space': True,
|
252 |
+
'series': 'Kokoro',
|
253 |
},
|
254 |
|
255 |
# MaskGCT (by Amphion)
|
|
|
271 |
'series': 'MaskGCT',
|
272 |
'emoji': '๐ฅต', # 300s minimum ZeroGPU!
|
273 |
},
|
274 |
+
'lj1995/GPT-SoVITS-v2': {
|
275 |
+
'name': 'GPT-SoVITS',
|
276 |
+
'function': '/get_tts_wav',
|
277 |
+
'text_param_index': 'text',
|
278 |
+
'return_audio_index': 0,
|
279 |
+
'is_zero_gpu_space': True,
|
280 |
+
'series': 'GPT-SoVITS',
|
281 |
+
},
|
282 |
+
'ameerazam08/OuteTTS-0.2-500M-Demo': {
|
283 |
+
'name': 'OuteTTS 500M',
|
284 |
+
'function': '/generate_tts',
|
285 |
+
'text_param_index': 0,
|
286 |
+
'return_audio_index': 0,
|
287 |
+
'is_zero_gpu_space': True,
|
288 |
+
'series': 'OuteTTS',
|
289 |
+
},
|
290 |
}
|
291 |
|
292 |
# for zero-shot TTS - voice sample used by XTTS (11 seconds)
|
293 |
+
DEFAULT_VOICE_SAMPLE_STR = 'voice_samples/xtts_sample.wav'
|
294 |
DEFAULT_VOICE_SAMPLE = handle_file(DEFAULT_VOICE_SAMPLE_STR)
|
295 |
DEFAULT_VOICE_TRANSCRIPT = "The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory."
|
296 |
|
|
|
347 |
4: 'No', # split by newline
|
348 |
},
|
349 |
'mrfakename/MeloTTS': {
|
350 |
+
# 1: 'EN-Default', # speaker; DEFAULT_VOICE_SAMPLE=EN-Default
|
351 |
+
# 2: 1, # speed
|
352 |
+
# 3: 'EN', # language
|
353 |
+
'speaker': 'EN-Default', # DEFAULT_VOICE_SAMPLE=EN-Default
|
354 |
+
'speed': 1.0,
|
355 |
+
'language': 'EN',
|
356 |
},
|
357 |
'mrfakename/MetaVoice-1B-v0.1': {
|
358 |
1: 5, # float (numeric value between 0.0 and 10.0) in 'Speech Stability - improves text following for a challenging speaker' Slider component
|
|
|
386 |
10: "never", #use_memory_cache
|
387 |
},
|
388 |
|
389 |
+
# F5
|
390 |
'mrfakename/E2-F5-TTS': {
|
391 |
+
'ref_audio_input': handle_file('voice_samples/EN_B00004_S00051_W000213.mp3'),
|
392 |
+
'ref_text_input': 'Our model manager is Graham, whom we observed leading a small team of chemical engineers within a multinational European firm we\'ll call Kruger Bern.',
|
393 |
+
'remove_silence': False,
|
394 |
+
'cross_fade_duration_slider': 0.15,
|
395 |
+
'nfe_slider': 32,
|
396 |
+
'speed_slider': 1,
|
397 |
},
|
398 |
|
399 |
# IMS-Toucan
|
|
|
408 |
|
409 |
# StyleTTS 2
|
410 |
'Pendrokar/style-tts-2': {
|
411 |
+
'voice': "f-us-2",
|
412 |
+
'lang': 'en-us',
|
413 |
+
'lngsteps': 8,
|
414 |
},
|
415 |
|
416 |
# StyleTTS 2 kokoro
|
|
|
434 |
2: -1, #target_len
|
435 |
3: 25, #n_timesteps
|
436 |
},
|
437 |
+
'lj1995/GPT-SoVITS-v2': {
|
438 |
+
'ref_wav_path': handle_file('voice_samples/EN_B00004_S00051_W000213.wav'),
|
439 |
+
'prompt_text': "Our model manager is Graham, whom we observed leading a small team of chemical engineers within a multinational European firm we'll call",
|
440 |
+
'prompt_language': "English",
|
441 |
+
# text: "Please surprise me and speak in whatever voice you enjoy.",
|
442 |
+
'text_language': "English",
|
443 |
+
'how_to_cut': "No slice",
|
444 |
+
'top_k': 15,
|
445 |
+
'top_p': 1,
|
446 |
+
'temperature': 1,
|
447 |
+
'ref_free': False,
|
448 |
+
'speed': 1,
|
449 |
+
'if_freeze': False,
|
450 |
+
'inp_refs': None,
|
451 |
+
},
|
452 |
+
'ameerazam08/OuteTTS-0.2-500M-Demo': {
|
453 |
+
1: 0.1, # temperature
|
454 |
+
2: 1.1, # repetition_penalty
|
455 |
+
3: "en", # language
|
456 |
+
4: "female_1", # speaker_selection
|
457 |
+
5: None, # reference_audio
|
458 |
+
6: None, # reference_text
|
459 |
+
},
|
460 |
}
|
461 |
|
462 |
|
app/synth.py
CHANGED
@@ -135,7 +135,19 @@ def synthandreturn(text, autoplay, request: gr.Request):
|
|
135 |
space_inputs[HF_SPACES[model]['text_param_index']] = text
|
136 |
|
137 |
print(f"{model}: Sending request to HF Space")
|
138 |
-
results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
# return path to audio
|
141 |
result = results
|
@@ -189,31 +201,64 @@ def synthandreturn(text, autoplay, request: gr.Request):
|
|
189 |
result_storage[model] = result
|
190 |
|
191 |
def _get_param_examples(parameters):
|
192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
for param_info in parameters:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
if (
|
195 |
param_info['component'] == 'Radio'
|
196 |
or param_info['component'] == 'Dropdown'
|
197 |
or param_info['component'] == 'Audio'
|
198 |
or param_info['python_type']['type'] == 'str'
|
199 |
):
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
if
|
209 |
-
|
210 |
-
|
|
|
|
|
|
|
211 |
|
212 |
return example_inputs
|
213 |
|
214 |
def _override_params(inputs, modelname):
|
215 |
try:
|
216 |
for key,value in OVERRIDE_INPUTS[modelname].items():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
inputs[key] = value
|
218 |
print(f"{modelname}: Default inputs overridden by Arena")
|
219 |
except:
|
|
|
135 |
space_inputs[HF_SPACES[model]['text_param_index']] = text
|
136 |
|
137 |
print(f"{model}: Sending request to HF Space")
|
138 |
+
# results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)
|
139 |
+
if(type(space_inputs) == dict):
|
140 |
+
results = mdl_space.predict(
|
141 |
+
**space_inputs,
|
142 |
+
api_name=api_name,
|
143 |
+
fn_index=fn_index
|
144 |
+
)
|
145 |
+
else:
|
146 |
+
results = mdl_space.predict(
|
147 |
+
*space_inputs,
|
148 |
+
api_name=api_name,
|
149 |
+
fn_index=fn_index
|
150 |
+
)
|
151 |
|
152 |
# return path to audio
|
153 |
result = results
|
|
|
201 |
result_storage[model] = result
|
202 |
|
203 |
def _get_param_examples(parameters):
|
204 |
+
# named or unnamed parameters
|
205 |
+
try:
|
206 |
+
param_name = parameters[0]['parameter_name']
|
207 |
+
# success => named params, use dict
|
208 |
+
example_inputs = {}
|
209 |
+
except:
|
210 |
+
# unnamed params, use list
|
211 |
+
example_inputs = []
|
212 |
+
pass
|
213 |
+
|
214 |
for param_info in parameters:
|
215 |
+
|
216 |
+
|
217 |
+
param_name = ''
|
218 |
+
param_default_value = param_info['example_input']
|
219 |
+
try:
|
220 |
+
# named params
|
221 |
+
param_name = param_info['parameter_name']
|
222 |
+
param_default_value = param_info['parameter_default']
|
223 |
+
except:
|
224 |
+
# unnamed params
|
225 |
+
pass
|
226 |
+
|
227 |
+
param_value = None
|
228 |
if (
|
229 |
param_info['component'] == 'Radio'
|
230 |
or param_info['component'] == 'Dropdown'
|
231 |
or param_info['component'] == 'Audio'
|
232 |
or param_info['python_type']['type'] == 'str'
|
233 |
):
|
234 |
+
param_value = str(param_default_value)
|
235 |
+
elif param_info['python_type']['type'] == 'int':
|
236 |
+
param_value = int(param_default_value)
|
237 |
+
elif param_info['python_type']['type'] == 'float':
|
238 |
+
param_value = float(param_default_value)
|
239 |
+
elif param_info['python_type']['type'] == 'bool':
|
240 |
+
param_value = bool(param_default_value)
|
241 |
+
|
242 |
+
if (param_name != ''):
|
243 |
+
# named param
|
244 |
+
example_inputs[param_info['parameter_name']] = param_value
|
245 |
+
else:
|
246 |
+
# just append unnamed param and hope
|
247 |
+
example_inputs.append(param_value)
|
248 |
|
249 |
return example_inputs
|
250 |
|
251 |
def _override_params(inputs, modelname):
|
252 |
try:
|
253 |
for key,value in OVERRIDE_INPUTS[modelname].items():
|
254 |
+
# if override keys are integers, make the dict into a list
|
255 |
+
if (
|
256 |
+
(type(inputs) is dict)
|
257 |
+
and (type(key) is int)
|
258 |
+
):
|
259 |
+
print(f"{modelname}: Converting unnamed override params to List")
|
260 |
+
inputs = list(inputs.values())
|
261 |
+
|
262 |
inputs[key] = value
|
263 |
print(f"{modelname}: Default inputs overridden by Arena")
|
264 |
except:
|
test_overrides.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from app.models import *
|
2 |
+
|
3 |
+
# # has named endpoint
|
4 |
+
# if '/' == HF_SPACES[model]['function'][0]:
|
5 |
+
# # audio sync function name
|
6 |
+
# api_name = HF_SPACES[model]['function']
|
7 |
+
|
8 |
+
# end_parameters = _get_param_examples(
|
9 |
+
# endpoints['named_endpoints'][api_name]['parameters']
|
10 |
+
# )
|
11 |
+
# # has unnamed endpoint
|
12 |
+
# else:
|
13 |
+
# # endpoint index is the first character
|
14 |
+
# fn_index = int(HF_SPACES[model]['function'])
|
15 |
+
|
16 |
+
# end_parameters = _get_param_examples(
|
17 |
+
# endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
|
18 |
+
# )
|
19 |
+
|
20 |
+
def _get_param_examples(parameters):
|
21 |
+
# named or unnamed parameters
|
22 |
+
try:
|
23 |
+
param_name = parameters[0]['parameter_name']
|
24 |
+
# success => named params, use dict
|
25 |
+
example_inputs = {}
|
26 |
+
except:
|
27 |
+
# unnamed params, use list
|
28 |
+
example_inputs = []
|
29 |
+
pass
|
30 |
+
|
31 |
+
for param_info in parameters:
|
32 |
+
|
33 |
+
|
34 |
+
param_name = ''
|
35 |
+
param_default_value = param_info['example_input']
|
36 |
+
try:
|
37 |
+
# named params
|
38 |
+
param_name = param_info['parameter_name']
|
39 |
+
param_default_value = param_info['parameter_default']
|
40 |
+
except:
|
41 |
+
# unnamed params
|
42 |
+
pass
|
43 |
+
|
44 |
+
param_value = None
|
45 |
+
if (
|
46 |
+
param_info['component'] == 'Radio'
|
47 |
+
or param_info['component'] == 'Dropdown'
|
48 |
+
or param_info['component'] == 'Audio'
|
49 |
+
or param_info['python_type']['type'] == 'str'
|
50 |
+
):
|
51 |
+
param_value = str(param_default_value)
|
52 |
+
elif param_info['python_type']['type'] == 'int':
|
53 |
+
param_value = int(param_default_value)
|
54 |
+
elif param_info['python_type']['type'] == 'float':
|
55 |
+
param_value = float(param_default_value)
|
56 |
+
elif param_info['python_type']['type'] == 'bool':
|
57 |
+
param_value = bool(param_default_value)
|
58 |
+
|
59 |
+
if (param_name != ''):
|
60 |
+
# named param
|
61 |
+
example_inputs[param_info['parameter_name']] = param_value
|
62 |
+
else:
|
63 |
+
# just append unnamed param and hope
|
64 |
+
example_inputs.append(param_value)
|
65 |
+
|
66 |
+
return example_inputs
|
67 |
+
|
68 |
+
def _override_params(inputs, modelname):
|
69 |
+
try:
|
70 |
+
for key,value in OVERRIDE_INPUTS[modelname].items():
|
71 |
+
# if override keys are integers, make the dict into a list
|
72 |
+
if (
|
73 |
+
(type(inputs) is dict)
|
74 |
+
and (type(key) is int)
|
75 |
+
):
|
76 |
+
print("Converting unnamed override params to List")
|
77 |
+
inputs = list(inputs.values())
|
78 |
+
|
79 |
+
inputs[key] = value
|
80 |
+
print(f"{modelname}: Default inputs overridden by Arena")
|
81 |
+
except:
|
82 |
+
pass
|
83 |
+
|
84 |
+
return inputs
|
test_tts_cosyvoice.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client, handle_file
|
3 |
+
|
4 |
+
client = Client("FunAudioLLM/CosyVoice2-0.5B", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
+
# print(endpoints)
|
7 |
+
|
8 |
+
result = client.predict(
|
9 |
+
tts_text="CosyVoice is undergoing a comprehensive upgrade, providing more accurate, stable, faster, and better voice generation capabilities.",
|
10 |
+
mode_checkbox_group="3s Voice Clone",
|
11 |
+
prompt_text='The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory.',
|
12 |
+
prompt_wav_upload=handle_file("https://cdn-uploads.huggingface.co/production/uploads/63d52e0c4e5642795617f668/V6-rMmI-P59DA4leWDIcK.wav"),
|
13 |
+
prompt_wav_record=None,
|
14 |
+
instruct_text=None,
|
15 |
+
seed=0,
|
16 |
+
stream="false",
|
17 |
+
api_name="/generate_audio"
|
18 |
+
)
|
19 |
+
print(result)
|
test_tts_e2_f5_f5.py
CHANGED
@@ -4,7 +4,7 @@ from gradio_client import Client, handle_file
|
|
4 |
client = Client("mrfakename/E2-F5-TTS", hf_token=os.getenv('HF_TOKEN'))
|
5 |
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
result = client.predict(
|
7 |
-
ref_audio_input=handle_file('
|
8 |
ref_text_input="The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory.",
|
9 |
gen_text_input="Please surprise me and speak in whatever voice you enjoy.",
|
10 |
remove_silence=False,
|
|
|
4 |
client = Client("mrfakename/E2-F5-TTS", hf_token=os.getenv('HF_TOKEN'))
|
5 |
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
result = client.predict(
|
7 |
+
ref_audio_input=handle_file('voice_samples/EN_B00004_S00051_W000213.mp3'),
|
8 |
ref_text_input="The Hispaniola was rolling scuppers under in the ocean swell. The booms were tearing at the blocks, the rudder was banging to and fro, and the whole ship creaking, groaning, and jumping like a manufactory.",
|
9 |
gen_text_input="Please surprise me and speak in whatever voice you enjoy.",
|
10 |
remove_silence=False,
|
test_tts_melo.py
CHANGED
@@ -1,13 +1,42 @@
|
|
1 |
import os
|
|
|
2 |
from gradio_client import Client
|
3 |
|
4 |
-
|
|
|
5 |
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
# print(endpoints)
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
from test_overrides import _get_param_examples, _override_params
|
3 |
from gradio_client import Client
|
4 |
|
5 |
+
model = "mrfakename/MeloTTS"
|
6 |
+
client = Client(model, hf_token=os.getenv('HF_TOKEN'))
|
7 |
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
8 |
# print(endpoints)
|
9 |
+
|
10 |
+
api_name = '/synthesize'
|
11 |
+
fn_index = None
|
12 |
+
end_parameters = None
|
13 |
+
text = 'This is what my voice sounds like.'
|
14 |
+
|
15 |
+
end_parameters = _get_param_examples(
|
16 |
+
endpoints['named_endpoints'][api_name]['parameters']
|
17 |
+
)
|
18 |
+
print(end_parameters)
|
19 |
+
|
20 |
+
|
21 |
+
# override some or all default parameters
|
22 |
+
space_inputs = _override_params(end_parameters, model)
|
23 |
+
|
24 |
+
# space_inputs[0] = text
|
25 |
+
space_inputs['text'] = text
|
26 |
+
print(space_inputs)
|
27 |
+
|
28 |
+
if(type(space_inputs) == dict):
|
29 |
+
space_inputs['text'] = text
|
30 |
+
result = client.predict(
|
31 |
+
**space_inputs,
|
32 |
+
api_name=api_name,
|
33 |
+
fn_index=fn_index
|
34 |
+
)
|
35 |
+
else:
|
36 |
+
space_inputs[0] = text
|
37 |
+
result = client.predict(
|
38 |
+
*space_inputs,
|
39 |
+
api_name=api_name,
|
40 |
+
fn_index=fn_index
|
41 |
+
)
|
42 |
+
print(result)
|
test_tts_oute.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from gradio_client import Client, handle_file
|
3 |
+
|
4 |
+
# client = Client("OuteAI/OuteTTS-0.2-500M-Demo", hf_token=os.getenv('HF_TOKEN'))
|
5 |
+
client = Client("ameerazam08/OuteTTS-0.2-500M-Demo", hf_token=os.getenv('HF_TOKEN'))
|
6 |
+
|
7 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
8 |
+
# print(endpoints)
|
9 |
+
|
10 |
+
result = client.predict(
|
11 |
+
text="Please surprise me and speak in whatever voice you enjoy.",
|
12 |
+
temperature=0.1,
|
13 |
+
repetition_penalty=1.1,
|
14 |
+
language="en",
|
15 |
+
speaker_selection="female_1",
|
16 |
+
reference_audio=None,
|
17 |
+
reference_text=None,
|
18 |
+
# reference_audio=handle_file('EN_B00004_S00051_W000213.wav'),
|
19 |
+
# reference_text="Our model manager is Graham, whom we observed leading a small team of chemical engineers within a multinational European firm we'll call",
|
20 |
+
api_name="/generate_tts"
|
21 |
+
)
|
22 |
+
print(result)
|
test_tts_sovits.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from test_overrides import _get_param_examples, _override_params
|
3 |
+
from gradio_client import Client, handle_file
|
4 |
+
|
5 |
+
model = "Pendrokar/GPT-SoVITS-v2"
|
6 |
+
# lj1995/GPT-SoVITS-v2
|
7 |
+
client = Client(model, hf_token=os.getenv('HF_TOKEN'))
|
8 |
+
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
9 |
+
# print(endpoints)
|
10 |
+
|
11 |
+
|
12 |
+
api_name = None
|
13 |
+
fn_index = None
|
14 |
+
end_parameters = None
|
15 |
+
text = 'This is what my voice sounds like.'
|
16 |
+
|
17 |
+
# has named endpoint
|
18 |
+
# audio sync function name
|
19 |
+
api_name = '/get_tts_wav'
|
20 |
+
|
21 |
+
end_parameters = _get_param_examples(
|
22 |
+
endpoints['named_endpoints'][api_name]['parameters']
|
23 |
+
)
|
24 |
+
print(end_parameters)
|
25 |
+
|
26 |
+
# override some or all default parameters
|
27 |
+
space_inputs = _override_params(end_parameters, model)
|
28 |
+
|
29 |
+
print(space_inputs)
|
30 |
+
|
31 |
+
if(type(space_inputs) == dict):
|
32 |
+
space_inputs['text'] = text
|
33 |
+
result = client.predict(
|
34 |
+
**space_inputs,
|
35 |
+
api_name=api_name,
|
36 |
+
fn_index=fn_index
|
37 |
+
)
|
38 |
+
else:
|
39 |
+
space_inputs[0] = text
|
40 |
+
result = client.predict(
|
41 |
+
*space_inputs,
|
42 |
+
api_name=api_name,
|
43 |
+
fn_index=fn_index
|
44 |
+
)
|
45 |
+
|
46 |
+
print(result)
|
test_tts_styletts.py
CHANGED
@@ -1,12 +1,50 @@
|
|
1 |
import os
|
|
|
2 |
from gradio_client import Client, file
|
3 |
|
4 |
-
|
|
|
5 |
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
# print(endpoints)
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
from test_overrides import _get_param_examples, _override_params
|
3 |
from gradio_client import Client, file
|
4 |
|
5 |
+
model = "Pendrokar/style-tts-2"
|
6 |
+
client = Client(model, hf_token=os.getenv('HF_TOKEN'))
|
7 |
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
8 |
# print(endpoints)
|
9 |
+
|
10 |
+
api_name = '/synthesize'
|
11 |
+
fn_index = None
|
12 |
+
end_parameters = None
|
13 |
+
text = 'This is what my voice sounds like.'
|
14 |
+
|
15 |
+
end_parameters = _get_param_examples(
|
16 |
+
endpoints['named_endpoints'][api_name]['parameters']
|
17 |
+
)
|
18 |
+
print(end_parameters)
|
19 |
+
|
20 |
+
|
21 |
+
space_inputs = end_parameters
|
22 |
+
# override some or all default parameters
|
23 |
+
space_inputs = _override_params(end_parameters, model)
|
24 |
+
|
25 |
+
if(type(space_inputs) == dict):
|
26 |
+
space_inputs['text'] = text
|
27 |
+
result = client.predict(
|
28 |
+
**space_inputs,
|
29 |
+
api_name=api_name,
|
30 |
+
fn_index=fn_index
|
31 |
+
)
|
32 |
+
else:
|
33 |
+
space_inputs[0] = text
|
34 |
+
result = client.predict(
|
35 |
+
*space_inputs,
|
36 |
+
api_name=api_name,
|
37 |
+
fn_index=fn_index
|
38 |
+
)
|
39 |
+
# space_inputs = {str(i): value for i, value in enumerate(space_inputs)}
|
40 |
+
|
41 |
+
print(space_inputs)
|
42 |
+
# print(*space_inputs)
|
43 |
+
# print(**space_inputs)
|
44 |
+
|
45 |
+
# result = client.predict(
|
46 |
+
# **space_inputs,
|
47 |
+
# api_name=api_name,
|
48 |
+
# fn_index=fn_index
|
49 |
+
# )
|
50 |
+
print(result)
|
test_tts_xva.py
CHANGED
@@ -1,23 +1,30 @@
|
|
1 |
import os
|
|
|
2 |
from gradio_client import Client, file
|
3 |
|
|
|
4 |
client = Client("Pendrokar/xVASynth-TTS", hf_token=os.getenv('HF_TOKEN'))
|
5 |
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
6 |
-
# print(endpoints)
|
7 |
-
result = client.predict(
|
8 |
-
"Well, hello there!!", # str in 'Input Text' Textbox component
|
9 |
-
"x_ex04", # Literal['x_ex04', 'x_ex01', 'cnc_cabal', 'ccby_nvidia_hifi_92_F', 'ccby_nvidia_hifi_6671_M', 'more'] in 'Voice' Radio component
|
10 |
-
"en", # Literal['en', 'de', 'es', 'hi', 'zh', 'more'] in 'Language' Radio component
|
11 |
-
1.0, # float (numeric value between 0.5 and 2.0) in 'Duration' Slider component
|
12 |
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐ญ Sadness' Slider component
|
19 |
-
0, # Overriden by DeepMoji; float (numeric value between 0 and 1.0) in '๐ฎ Surprise' Slider component
|
20 |
-
True, # bool in 'Use DeepMoji' Checkbox component
|
21 |
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
from test_overrides import _get_param_examples, _override_params
|
3 |
from gradio_client import Client, file
|
4 |
|
5 |
+
model = "Pendrokar/xVASynth-TTS"
|
6 |
client = Client("Pendrokar/xVASynth-TTS", hf_token=os.getenv('HF_TOKEN'))
|
7 |
endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
api_name = '/predict'
|
10 |
+
fn_index = None
|
11 |
+
end_parameters = None
|
12 |
+
text = 'This is what my voice sounds like.'
|
13 |
+
|
14 |
+
end_parameters = _get_param_examples(
|
15 |
+
endpoints['named_endpoints'][api_name]['parameters']
|
16 |
+
)
|
17 |
+
print(end_parameters)
|
18 |
+
|
19 |
|
20 |
+
# override some or all default parameters
|
21 |
+
space_inputs = _override_params(end_parameters, model)
|
|
|
|
|
|
|
22 |
|
23 |
+
space_inputs[0] = text
|
24 |
+
|
25 |
+
print(space_inputs)
|
26 |
+
result = client.predict(
|
27 |
+
*space_inputs,
|
28 |
+
api_name=api_name
|
29 |
+
)
|
30 |
+
print(result)
|
voice_samples/EN_B00004_S00051_W000213.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"id": "EN_B00004_S00051_W000213", "wav": "EN_B00004/EN_B00004_S00051/mp3/EN_B00004_S00051_W000213.mp3", "text": " Our model manager is Graham, whom we observed leading a small team of chemical engineers within a multinational European firm we'll call Kruger Bern.", "duration": 10.1535, "speaker": "EN_B00004_S00051", "language": "en", "dnsmos": 3.3549}
|