leolxliu commited on
Commit
4958e9a
·
1 Parent(s): 7d2db7e

add more code

Browse files
Files changed (2) hide show
  1. app.py +187 -4
  2. requirement.txt +6 -0
app.py CHANGED
@@ -1,7 +1,190 @@
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
 
 
3
 
4
+ import os
5
+ import time
6
+ import azure.cognitiveservices.speech as speechsdk
7
+ from pyht import Client
8
+ from pyht.client import TTSOptions
9
+ import requests
10
+
11
+ text = 'Today is Sunday, the weather is sunny. I am here to test the delay of various TTS services thoroughly'
12
+
13
+
14
+ def azure_tts(text):
15
+
16
+ speech_key = os.getenv('SPEECH_KEY')
17
+ speech_regoion = os.getenv('SPEECH_REGION')
18
+ if speech_key is None or speech_regoion is None:
19
+ print('Please set the environment variables SPEECH_KEY and SPEECH_REGION')
20
+ exit(1)
21
+
22
+
23
+ speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=speech_regoion)
24
+ speech_config.speech_synthesis_voice_name = 'en-US-JennyNeural'
25
+ speech_config.speech_synthesis_language = "en-US"
26
+
27
+ speech_config.set_speech_synthesis_output_format(speechsdk.SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3)
28
+ pull_stream = speechsdk.audio.PullAudioOutputStream()
29
+ stream_config = speechsdk.audio.AudioOutputConfig(stream=pull_stream)
30
+ speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=stream_config)
31
+
32
+ speech_synthesizer.speak_text_async(text)
33
+
34
+ azure_latency = 0
35
+ start = time.perf_counter()
36
+
37
+ audio_buffer = bytes(512)
38
+ filled_size = pull_stream.read(audio_buffer)
39
+
40
+ end = time.perf_counter()
41
+ azure_latency = end - start
42
+ return azure_latency
43
+
44
+
45
+
46
+
47
+
48
+ def coqui_tts(text):
49
+
50
+ voice_id = 'c791b5b5-0558-42b8-bb0b-602ac5efc0b9'
51
+
52
+ COQUI_API_TOKEN = os.getenv["COQUI_TOKEN"]
53
+
54
+ start = time.perf_counter()
55
+ res = requests.post(
56
+ "https://app.coqui.ai/api/v2/samples/xtts/stream",
57
+ json={
58
+ "text": text,
59
+ "language": 'en',
60
+ "voice_id": voice_id},
61
+ headers={"Authorization": f"Bearer {COQUI_API_TOKEN}"},
62
+ stream=True,
63
+ )
64
+
65
+ if res.status_code != 201:
66
+ print(f"Endpoint failed with status code {res.status_code}:",
67
+ res.content.decode("utf-8"))
68
+ return 0
69
+
70
+ first = True
71
+ for chunk in res.iter_content(chunk_size=512):
72
+ if first:
73
+ end = time.perf_counter()
74
+ coqui_latency = end-start
75
+ return coqui_latency
76
+
77
+
78
+
79
+
80
+ def elevenlab_tts(text):
81
+ voice_id = '21m00Tcm4TlvDq8ikWAM'
82
+ CHUNK_SIZE = 512
83
+ url = f'https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream'
84
+
85
+ xi_api_key = os.getenv['ELEVENLAB_KEY']
86
+ if xi_api_key is None:
87
+ print('Please set the environment variable ELEVENLAB_KEY')
88
+ exit(1)
89
+
90
+ headers = {
91
+ "Accept": "audio/mpeg",
92
+ "Content-Type": "application/json",
93
+ "xi-api-key": xi_api_key
94
+ }
95
+
96
+ data = {
97
+ "text": text,
98
+ "model_id": "eleven_multilingual_v2",
99
+ "voice_settings": {
100
+ "stability": 0.5,
101
+ "similarity_boost": 0.5
102
+ }
103
+ }
104
+
105
+ start = time.perf_counter()
106
+ response = requests.post(url, json=data, headers=headers, stream=True)
107
+
108
+
109
+ first = True
110
+ for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
111
+ if first:
112
+ first = False
113
+ end = time.perf_counter()
114
+ elevenlab_latency = end - start
115
+ return elevenlab_latency
116
+
117
+
118
+
119
+ def playht_tts(text):
120
+ userid = os.getenv("PLAY_HT_USER_ID")
121
+ api_key = os.getenv("PLAY_HT_API_KEY")
122
+
123
+ if userid is None or api_key is None:
124
+ print('Please set the environment variables PLAY_HT_USER_ID and PLAY_HT_API_KEY')
125
+ exit(1)
126
+
127
+ client = Client(
128
+ user_id=userid,
129
+ api_key=api_key)
130
+
131
+
132
+ options = TTSOptions(voice="s3://voice-cloning-zero-shot/d9ff78ba-d016-47f6-b0ef-dd630f59414e/female-cs/manifest.json",speed=5.0)
133
+ first = True
134
+ start = time.perf_counter()
135
+
136
+ res = client.tts(text, options)
137
+ for chunk in res:
138
+ # do something with the audio chunk
139
+ if first:
140
+ first = False
141
+ end = time.perf_counter()
142
+ playht_latency = end - start
143
+ return playht_latency
144
+
145
+
146
+
147
+ title = """<h1 align="center">🔥TRTC 文档机器人🚀</h1>"""
148
+
149
+ def greet(input):
150
+
151
+ azure_latency = azure_tts(input)
152
+
153
+ coqui_latency = coqui_tts(input)
154
+
155
+ elevenlab_latency = elevenlab_tts(input)
156
+
157
+ playht_latency = playht_tts(input)
158
+
159
+ print(f'Elevenlab TTS Delay, Time to first chunk {elevenlab_latency}s \n Azure TTS Delay, Time to first chunk {azure_latency}s \n Coqui TTS Delay, Time to first chunk {coqui_latency}s \n Pyht TTS Delay, Time to first chunk {playht_latency}s')
160
+
161
+ return f'Elevenlab TTS Delay, Time to first chunk {elevenlab_latency}s \n Azure TTS Delay, Time to first chunk {azure_latency}s \n Coqui TTS Delay, Time to first chunk {coqui_latency}s \n Pyht TTS Delay, Time to first chunk {playht_latency}s'
162
+
163
+
164
+ with gr.Blocks(theme=gr.themes.Default(spacing_size=gr.themes.sizes.spacing_sm, radius_size=gr.themes.sizes.radius_sm, text_size=gr.themes.sizes.text_sm)) as demo:
165
+
166
+ gr.HTML(title)
167
+
168
+ with gr.Row():
169
+ txt = gr.Textbox(show_label=False, lines=1,
170
+ placeholder='input the text to run ')
171
+ outtxt = gr.Textbox(show_label=False, lines=4,
172
+ placeholder='the output text')
173
+
174
+ txt.submit(greet, [txt], [outtxt])
175
+ submit = gr.Button(value="Submmit", variant="secondary").style(
176
+ full_width=False)
177
+ submit.click(greet, [txt], [outtxt])
178
+
179
+ gr.Examples(
180
+ label="for example",
181
+ examples=[
182
+ "Today is Sunday, the weather is sunny. I am here to test the delay of various TTS services thoroughly",
183
+ ],
184
+ inputs=txt,
185
+ )
186
+
187
+ demo.launch()
188
+
189
+
190
+
requirement.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ azure-cognitiveservices-speech==1.28.0
2
+ xtts
3
+ pyht==0.0.16
4
+ requests==2.31.0
5
+ gradio==3.32.0
6
+ gradio_client==0.2.5