Manojkumarpandi commited on
Commit
6ad8e7e
Β·
verified Β·
1 Parent(s): 65ec2b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -50
app.py CHANGED
@@ -2,14 +2,13 @@ import os
2
  import streamlit as st
3
  import speech_recognition as sr
4
  from gtts import gTTS
5
- import google.generativeai as genai # Import Google Generative AI
6
  import base64
7
  from transformers import pipeline
8
 
 
9
  genai.configure(api_key=os.getenv("GENAI_API_KEY"))
10
-
11
-
12
- # Initialize recognizer
13
  recognizer = sr.Recognizer()
14
 
15
  # Emotion Detection Model
@@ -17,98 +16,88 @@ emotion_model = pipeline("text-classification", model="bhadresh-savani/distilber
17
 
18
  # Function to detect emotion
19
  def detect_emotion(text):
20
- emotion = emotion_model(text)[0]['label']
21
- return emotion
22
 
23
- # Function to listen to customer
24
- def listen_to_customer():
25
  with sr.Microphone() as source:
26
  st.write("Listening...")
27
  audio = recognizer.listen(source)
28
  try:
29
- text = recognizer.recognize_google(audio)
30
- st.write(f"Customer said: {text}")
31
- return text
32
  except Exception as e:
33
  st.error(f"Speech Recognition Error: {str(e)}")
34
  return None
35
 
36
- # Function to process text
37
- def process_text(customer_input):
38
- if customer_input:
39
  try:
40
- model = genai.GenerativeModel('gemini-1.5-flash') # Updated model
41
- response = model.generate_content(customer_input)
42
  return response.text
43
  except Exception as e:
44
  return f"Error in AI response: {str(e)}"
45
- else:
46
- return "Sorry, I didn't catch that. Could you please repeat?"
47
 
48
- # Function to convert text to speech
49
- def text_to_speech(text, voice_option, language):
50
  lang_code = {"English": "en", "Spanish": "es", "French": "fr", "Hindi": "hi"}.get(language, "en")
51
- tts = gTTS(text=text, lang=lang_code, tld='com' if voice_option == "Male" else 'co.uk')
52
  file_path = "response.mp3"
53
  tts.save(file_path)
54
  return file_path
55
 
56
- # Function to autoplay audio
57
  def autoplay_audio(file_path):
58
  with open(file_path, "rb") as f:
59
- data = f.read()
60
- b64 = base64.b64encode(data).decode()
61
- audio_html = f"""
62
  <audio controls autoplay>
63
  <source src="data:audio/mp3;base64,{b64}" type="audio/mp3">
64
  </audio>
65
- """
66
- st.markdown(audio_html, unsafe_allow_html=True)
67
 
68
- # Main function
69
  def main():
70
- st.title("Vocacity AI Voice Agent πŸŽ™οΈ")
71
  st.sidebar.header("Settings")
72
 
73
  # User settings
74
  language = st.sidebar.selectbox("Choose Language:", ["English", "Spanish", "French", "Hindi"])
75
- voice_option = st.sidebar.selectbox("Choose AI Voice:", ["Male", "Female"])
76
  clear_chat = st.sidebar.button("πŸ—‘οΈ Clear Chat")
77
 
78
  if "chat_history" not in st.session_state:
79
  st.session_state.chat_history = []
80
-
81
- # Text Input
82
- user_text_input = st.text_input("Type your query here:", "")
83
-
84
- # Voice Input Button
85
  if st.button("πŸŽ™οΈ Speak"):
86
- customer_input = listen_to_customer()
87
- else:
88
- customer_input = user_text_input if user_text_input.strip() else None
89
-
90
- if customer_input:
91
- emotion = detect_emotion(customer_input)
92
- ai_response = process_text(customer_input)
93
- st.session_state.chat_history.append((customer_input, ai_response))
94
 
95
- st.write(f"**AI Response:** {ai_response} (Emotion: {emotion})")
96
 
97
- # Convert response to speech and autoplay it
98
- audio_file = text_to_speech(ai_response, voice_option, language)
99
  autoplay_audio(audio_file)
100
  os.remove(audio_file)
101
-
102
  # Display chat history
103
  st.write("### Chat History")
104
  for user, ai in st.session_state.chat_history[-5:]:
105
  st.write(f"πŸ‘€ {user}")
106
  st.write(f"πŸ€– {ai}")
107
-
108
- # Clear chat
109
  if clear_chat:
110
  st.session_state.chat_history = []
111
  st.experimental_rerun()
112
 
113
  if __name__ == "__main__":
114
- main()
 
2
  import streamlit as st
3
  import speech_recognition as sr
4
  from gtts import gTTS
5
+ import google.generativeai as genai
6
  import base64
7
  from transformers import pipeline
8
 
9
+ # Set up Google AI API key
10
  genai.configure(api_key=os.getenv("GENAI_API_KEY"))
11
+ # Initialize speech recognizer
 
 
12
  recognizer = sr.Recognizer()
13
 
14
  # Emotion Detection Model
 
16
 
17
  # Function to detect emotion
18
  def detect_emotion(text):
19
+ return emotion_model(text)[0]['label']
 
20
 
21
+ # Function to listen to the user
22
+ def listen_to_user():
23
  with sr.Microphone() as source:
24
  st.write("Listening...")
25
  audio = recognizer.listen(source)
26
  try:
27
+ return recognizer.recognize_google(audio)
 
 
28
  except Exception as e:
29
  st.error(f"Speech Recognition Error: {str(e)}")
30
  return None
31
 
32
+ # AI Response Generation
33
+ def generate_ai_response(user_input):
34
+ if user_input:
35
  try:
36
+ model = genai.GenerativeModel('gemini-1.5-flash')
37
+ response = model.generate_content(user_input)
38
  return response.text
39
  except Exception as e:
40
  return f"Error in AI response: {str(e)}"
41
+ return "Sorry, I didn't catch that."
 
42
 
43
+ # Convert Text to Speech
44
+ def text_to_speech(text, language):
45
  lang_code = {"English": "en", "Spanish": "es", "French": "fr", "Hindi": "hi"}.get(language, "en")
46
+ tts = gTTS(text=text, lang=lang_code)
47
  file_path = "response.mp3"
48
  tts.save(file_path)
49
  return file_path
50
 
51
+ # Autoplay Audio in Streamlit
52
  def autoplay_audio(file_path):
53
  with open(file_path, "rb") as f:
54
+ b64 = base64.b64encode(f.read()).decode()
55
+ st.markdown(f"""
 
56
  <audio controls autoplay>
57
  <source src="data:audio/mp3;base64,{b64}" type="audio/mp3">
58
  </audio>
59
+ """, unsafe_allow_html=True)
 
60
 
61
+ # Streamlit UI
62
  def main():
63
+ st.title("πŸŽ™οΈ Vocacity AI Voice Assistant")
64
  st.sidebar.header("Settings")
65
 
66
  # User settings
67
  language = st.sidebar.selectbox("Choose Language:", ["English", "Spanish", "French", "Hindi"])
 
68
  clear_chat = st.sidebar.button("πŸ—‘οΈ Clear Chat")
69
 
70
  if "chat_history" not in st.session_state:
71
  st.session_state.chat_history = []
72
+
73
+ # User Input
74
+ user_input = st.text_input("Type your query here:", "")
75
+
 
76
  if st.button("πŸŽ™οΈ Speak"):
77
+ user_input = listen_to_user()
78
+
79
+ if user_input:
80
+ emotion = detect_emotion(user_input)
81
+ ai_response = generate_ai_response(user_input)
82
+ st.session_state.chat_history.append((user_input, ai_response))
 
 
83
 
84
+ st.write(f"**AI:** {ai_response} (Emotion: {emotion})")
85
 
86
+ # Convert AI response to speech
87
+ audio_file = text_to_speech(ai_response, language)
88
  autoplay_audio(audio_file)
89
  os.remove(audio_file)
90
+
91
  # Display chat history
92
  st.write("### Chat History")
93
  for user, ai in st.session_state.chat_history[-5:]:
94
  st.write(f"πŸ‘€ {user}")
95
  st.write(f"πŸ€– {ai}")
96
+
97
+ # Clear Chat
98
  if clear_chat:
99
  st.session_state.chat_history = []
100
  st.experimental_rerun()
101
 
102
  if __name__ == "__main__":
103
+ main()