TTS-Spaces-Arena

Running on Zero

App Files Files Community

Pendrokar commited on Mar 21

Commit

39c25b5

1 Parent(s): ad6af40

new TTS: orpheus; sesame voice sample

Browse files

Files changed (5) hide show

README.md +2 -0
app/models.py +28 -7
test_tts_orpheus.py +50 -0
test_tts_sesame.py +50 -0
voice_samples/read_speech_a.wav +3 -0

README.md CHANGED Viewed

@@ -12,6 +12,7 @@ pinned: true
 short_description: Blind vote on HF TTS models!
 models:
 - amphion/MaskGCT
 - coqui/XTTS-v2
 - fishaudio/fish-speech-1.4
 - fishaudio/fish-speech-1.5
@@ -31,6 +32,7 @@ models:
 - parler-tts/parler-tts-mini-v1
 - parler-tts/parler-tts-mini-expresso
 - Pendrokar/xvapitch_expresso
 - SparkAudio/Spark-TTS-0.5B
 - SWivid/F5-TTS
 - WhisperSpeech/WhisperSpeech

 short_description: Blind vote on HF TTS models!
 models:
 - amphion/MaskGCT
+- canopylabs/orpheus-3b-0.1-ft
 - coqui/XTTS-v2
 - fishaudio/fish-speech-1.4
 - fishaudio/fish-speech-1.5
 - parler-tts/parler-tts-mini-v1
 - parler-tts/parler-tts-mini-expresso
 - Pendrokar/xvapitch_expresso
+- sesame/csm-1b
 - SparkAudio/Spark-TTS-0.5B
 - SWivid/F5-TTS
 - WhisperSpeech/WhisperSpeech

app/models.py CHANGED Viewed

@@ -104,6 +104,9 @@ AVAILABLE_MODELS = {
     # Sesame
     'sesame/csm-1b' : 'sesame/csm-1b',
     # HF TTS w issues
     # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
     # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
@@ -486,12 +489,21 @@ HF_SPACES = {
     },
     'sesame/csm-1b' : {
-        'name': 'sesame/csm-1b',
         'function': '/infer',
         'text_param_index': 'gen_conversation_input',
         'return_audio_index': 0,
         'is_zero_gpu_space': True,
-        'series': 'Spark-TTS',
     },
 }
@@ -779,13 +791,22 @@ OVERRIDE_INPUTS = {
 		'prompt_wav_record': None,
     },
-    # sesame/csm-1b
     'sesame/csm-1b' : {
-		"text_prompt_speaker_a": "And Lake turned round upon me, a little abruptly, his odd yellowish eyes, a little like those of the sea eagle, and the ghost of his smile that flickered on his singularly pale face, with a stern and insidious look, confronted me.",
-		"text_prompt_speaker_b": "And Lake turned round upon me, a little abruptly, his odd yellowish eyes, a little like those of the sea eagle, and the ghost of his smile that flickered on his singularly pale face, with a stern and insidious look, confronted me.", #second speaker unused
-		"audio_prompt_speaker_a": handle_file('voice_samples/read_speech_a.wav'),
-		"audio_prompt_speaker_b": handle_file('voice_samples/read_speech_a.wav'), #second speaker unused
     },
 }
 # minor mods to model from the same space

     # Sesame
     'sesame/csm-1b' : 'sesame/csm-1b',
+    # Orpheus
+    'MohamedRashad/Orpheus-TTS' : 'MohamedRashad/Orpheus-TTS',
     # HF TTS w issues
     # 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
     # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
     },
     'sesame/csm-1b' : {
+        'name': 'CSM 1B',
         'function': '/infer',
         'text_param_index': 'gen_conversation_input',
         'return_audio_index': 0,
         'is_zero_gpu_space': True,
+        'series': 'CSM-1B',
+    },
+    'MohamedRashad/Orpheus-TTS' : {
+        'name': 'Orpheus 3B 0.1',
+        'function': '/generate_speech',
+        'text_param_index': 'text',
+        'return_audio_index': 0,
+        'is_zero_gpu_space': True,
+        'series': 'Orpheus',
     },
 }
 		'prompt_wav_record': None,
     },
+    # csm-1b
     'sesame/csm-1b' : {
+		'text_prompt_speaker_a': 'And Lake turned round upon me, a little abruptly, his odd yellowish eyes, a little like those of the sea eagle, and the ghost of his smile that flickered on his singularly pale face, with a stern and insidious look, confronted me.',
+		'text_prompt_speaker_b': 'And Lake turned round upon me, a little abruptly, his odd yellowish eyes, a little like those of the sea eagle, and the ghost of his smile that flickered on his singularly pale face, with a stern and insidious look, confronted me.', #second speaker unused
+		'audio_prompt_speaker_a': handle_file('voice_samples/read_speech_a.wav'),
+		'audio_prompt_speaker_b': handle_file('voice_samples/read_speech_a.wav'), #second speaker unused
     },
+    # Orpheus 3B 0.1
+    'MohamedRashad/Orpheus-TTS' : {
+		'voice': 'tara',
+		'temperature': 0.6,
+		'top_p': 0.95,
+		'repetition_penalty': 1.1,
+		'max_new_tokens': 1200,
+    }
 }
 # minor mods to model from the same space

test_tts_orpheus.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os
+from test_overrides import _get_param_examples, _override_params
+from gradio_client import Client, file
+model = "MohamedRashad/Orpheus-TTS"
+client = Client(model, hf_token=os.getenv('HF_TOKEN'))
+endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+# print(endpoints)
+api_name = '/generate_speech'
+fn_index = None
+end_parameters = None
+text = 'This is what my voice sounds like.'
+end_parameters = _get_param_examples(
+	endpoints['named_endpoints'][api_name]['parameters']
+)
+print(end_parameters)
+space_inputs = end_parameters
+# override some or all default parameters
+space_inputs = _override_params(end_parameters, model)
+if(type(space_inputs) == dict):
+	space_inputs['text'] = text
+	result = client.predict(
+		**space_inputs,
+		api_name=api_name,
+		fn_index=fn_index
+	)
+else:
+	space_inputs[0] = text
+	result = client.predict(
+		*space_inputs,
+		api_name=api_name,
+		fn_index=fn_index
+	)
+	# space_inputs = {str(i): value for i, value in enumerate(space_inputs)}
+print(space_inputs)
+# print(*space_inputs)
+# print(**space_inputs)
+# result = client.predict(
+# 	**space_inputs,
+# 	api_name=api_name,
+#     fn_index=fn_index
+# )
+print(result)

test_tts_sesame.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import os
+from test_overrides import _get_param_examples, _override_params
+from gradio_client import Client, file
+model = "sesame/csm-1b"
+client = Client(model, hf_token=os.getenv('HF_TOKEN'))
+endpoints = client.view_api(all_endpoints=True, print_info=False, return_format='dict')
+# print(endpoints)
+api_name = '/infer'
+fn_index = None
+end_parameters = None
+text = 'This is what my voice sounds like.'
+end_parameters = _get_param_examples(
+	endpoints['named_endpoints'][api_name]['parameters']
+)
+print(end_parameters)
+space_inputs = end_parameters
+# override some or all default parameters
+space_inputs = _override_params(end_parameters, model)
+if(type(space_inputs) == dict):
+	space_inputs['gen_conversation_input'] = text
+	result = client.predict(
+		**space_inputs,
+		api_name=api_name,
+		fn_index=fn_index
+	)
+else:
+	space_inputs[0] = text
+	result = client.predict(
+		*space_inputs,
+		api_name=api_name,
+		fn_index=fn_index
+	)
+	# space_inputs = {str(i): value for i, value in enumerate(space_inputs)}
+print(space_inputs)
+# print(*space_inputs)
+# print(**space_inputs)
+# result = client.predict(
+# 	**space_inputs,
+# 	api_name=api_name,
+#     fn_index=fn_index
+# )
+print(result)

voice_samples/read_speech_a.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:59480708f84c77ab2967d14d821c2ccade9d7761685d060575121f49a149005b
+size 831412