rumaisa1054 commited on
Commit
03f470d
·
verified ·
1 Parent(s): 2d45530

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -40
app.py CHANGED
@@ -5,6 +5,7 @@ import speech_recognition as sr
5
  from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
6
  import numpy as np
7
  from gtts import gTTS
 
8
 
9
  # Function to convert text to speech and return audio file
10
  def text_to_speech(text):
@@ -15,11 +16,20 @@ def text_to_speech(text):
15
  return audio_file
16
 
17
  # Function to convert speech to text using SpeechRecognition
18
- def speech_to_text(audio_file):
19
  recognizer = sr.Recognizer()
20
- with sr.AudioFile(audio_file) as source:
21
- audio_data = recognizer.record(source)
22
- return recognizer.recognize_google(audio_data)
 
 
 
 
 
 
 
 
 
23
 
24
  def main():
25
  st.title("Real-Time Audio Chat with AI")
@@ -36,42 +46,52 @@ def main():
36
  st.write("AI:")
37
  st.audio(message["audio"], format="audio/mp3")
38
 
39
- # Upload audio input from the user
40
- user_audio_file = st.file_uploader("Upload your voice message (in WAV format):", type=["wav"])
41
-
42
- if user_audio_file is not None:
43
- # Convert speech to text
44
- user_text = speech_to_text(user_audio_file)
45
-
46
- # Display the converted text (optional)
47
- st.write(f"Recognized Text: {user_text}")
48
-
49
- # Convert user's speech input to audio for playback
50
- user_audio = text_to_speech(user_text)
51
-
52
- # Add user message (as audio) to chat history
53
- st.session_state.chat_messages.append({
54
- "role": "user",
55
- "content": user_text,
56
- "audio": user_audio.getvalue()
57
- })
58
-
59
- # Get AI response using responsr function
60
- response = responsr(user_text)
61
-
62
- # Convert AI response to speech
63
- response_audio = text_to_speech(response)
64
-
65
- # Add assistant's response (as audio) to chat history
66
- st.session_state.chat_messages.append({
67
- "role": "assistant",
68
- "content": response,
69
- "audio": response_audio.getvalue()
70
- })
71
-
72
- # Display the audio files for both user input and AI response
73
- st.audio(user_audio, format="audio/mp3")
74
- st.audio(response_audio, format="audio/mp3")
 
 
 
 
 
 
 
 
 
 
75
 
76
  if __name__ == "__main__":
77
  main()
 
5
  from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
6
  import numpy as np
7
  from gtts import gTTS
8
+ import asyncio
9
 
10
  # Function to convert text to speech and return audio file
11
  def text_to_speech(text):
 
16
  return audio_file
17
 
18
  # Function to convert speech to text using SpeechRecognition
19
+ def speech_to_text(audio_data):
20
  recognizer = sr.Recognizer()
21
+ with sr.AudioFile(BytesIO(audio_data)) as source:
22
+ audio = recognizer.record(source)
23
+ return recognizer.recognize_google(audio)
24
+
25
+ class AudioProcessor(AudioProcessorBase):
26
+ def __init__(self):
27
+ self.audio_buffer = BytesIO()
28
+
29
+ def recv(self, frame):
30
+ audio_data = frame.to_ndarray().tobytes()
31
+ self.audio_buffer.write(audio_data)
32
+ return frame
33
 
34
  def main():
35
  st.title("Real-Time Audio Chat with AI")
 
46
  st.write("AI:")
47
  st.audio(message["audio"], format="audio/mp3")
48
 
49
+ # Initialize WebRTC audio streamer
50
+ webrtc_ctx = webrtc_streamer(
51
+ key="speech-to-text",
52
+ mode=WebRtcMode.SENDRECV,
53
+ audio_processor_factory=AudioProcessor,
54
+ rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]},
55
+ media_stream_constraints={"audio": True, "video": False},
56
+ async_processing=True,
57
+ )
58
+
59
+ if webrtc_ctx.state.playing:
60
+ processor = webrtc_ctx.audio_processor
61
+ if processor and processor.audio_buffer.getvalue():
62
+ # Convert audio buffer to speech
63
+ audio_data = processor.audio_buffer.getvalue()
64
+ user_text = speech_to_text(audio_data)
65
+
66
+ # Convert the user's speech input to audio for playback
67
+ user_audio = text_to_speech(user_text)
68
+
69
+ # Add user message (as audio) to chat history
70
+ st.session_state.chat_messages.append({
71
+ "role": "user",
72
+ "content": user_text,
73
+ "audio": user_audio.getvalue()
74
+ })
75
+
76
+ # Get AI response using the responsr function
77
+ response = responsr(user_text)
78
+
79
+ # Convert AI response to speech
80
+ response_audio = text_to_speech(response)
81
+
82
+ # Add assistant's response (as audio) to chat history
83
+ st.session_state.chat_messages.append({
84
+ "role": "assistant",
85
+ "content": response,
86
+ "audio": response_audio.getvalue()
87
+ })
88
+
89
+ # Display the audio files for both user input and AI response
90
+ st.audio(user_audio, format="audio/mp3")
91
+ st.audio(response_audio, format="audio/mp3")
92
+
93
+ # Clear the audio buffer
94
+ processor.audio_buffer = BytesIO()
95
 
96
  if __name__ == "__main__":
97
  main()