ysharma HF staff commited on
Commit
e01375e
·
1 Parent(s): e2d0484

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import io
4
+ import os
5
+ from openai import OpenAI
6
+ from pydub import AudioSegment
7
+ from pydub.playback import play
8
+
9
+
10
+ # Set an environment variable for key
11
+ os.environ['OPENAI_API_KEY'] = os.environ.get('OPENAI_API_KEY')
12
+
13
+ client = OpenAI() # add api_key
14
+
15
+ def stream_and_yield_audio(text, model, voice):
16
+ response = client.audio.speech.create(
17
+ model=model, #"tts-1", for example
18
+ voice=voice , #"alloy", for example
19
+ input=text,
20
+ )
21
+
22
+ # Convert the binary response content to a byte stream
23
+ byte_stream = io.BytesIO(response.content)
24
+
25
+ # Read the audio data from the byte stream
26
+ audio = AudioSegment.from_file(byte_stream, format="mp3")
27
+
28
+ # Export the audio as WAV format
29
+ sample_width = audio.sample_width
30
+ sample_rate = audio.frame_rate
31
+ audio_data = np.array(audio.get_array_of_samples(), dtype=np.int16)
32
+
33
+ # Yield the audio data
34
+ yield sample_rate, audio_data #audio_data.tobytes(), sample_width
35
+
36
+
37
+ # demo using older gradio version (3.50.2)
38
+ with gr.Blocks() as demo:
39
+ with gr.Row():
40
+ model = gr.Dropdown(choices=['tts-1','tts-1-hd'], label='Model', value='tts-1')
41
+ voice = gr.Dropdown(choices=['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'], label='Voice Options', value='alloy')
42
+
43
+ text = gr.Textbox(label="Input text")
44
+ btn = gr.Button("Greet")
45
+ output_audio = gr.Audio(label="Speech Output", streaming=True, autoplay=True)
46
+
47
+ btn.click(fn=stream_and_yield_audio, inputs=[text,model, voice], outputs=output_audio, api_name="tts-stream")
48
+
49
+ demo.queue().launch()