Ansaribinhyder commited on
Commit
9e41949
·
1 Parent(s): facadcf

Updated to gradio

Browse files
Files changed (5) hide show
  1. Dockerfile +3 -15
  2. Dockerfile_backups +40 -0
  3. app.py +48 -160
  4. app_flask.py +85 -0
  5. templates/index.html +2 -2
Dockerfile CHANGED
@@ -3,12 +3,6 @@
3
 
4
  FROM python:3.12
5
 
6
- # # Install system dependencies
7
- # RUN apt-get update && apt-get install -y \
8
- # portaudio19-dev \
9
- # python3-dev \
10
- # && rm -rf /var/lib/apt/lists/*
11
-
12
  # Install ALSA dependencies and other packages
13
  RUN apt-get update && apt-get install -y \
14
  alsa-utils \
@@ -26,15 +20,9 @@ COPY requirements.txt .
26
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
27
 
28
  COPY . .
29
- # CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
30
-
31
- # Set the environment variable for Flask
32
- ENV FLASK_APP=app.py
33
- ENV FLASK_RUN_HOST=0.0.0.0
34
- ENV FLASK_RUN_PORT=7860
35
 
36
- # Expose the port Flask will run on
37
  EXPOSE 7860
38
 
39
- # Command to run the Flask app
40
- CMD ["flask", "run"]
 
3
 
4
  FROM python:3.12
5
 
 
 
 
 
 
 
6
  # Install ALSA dependencies and other packages
7
  RUN apt-get update && apt-get install -y \
8
  alsa-utils \
 
20
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
21
 
22
  COPY . .
 
 
 
 
 
 
23
 
24
+ # Expose the port Gradio will run on
25
  EXPOSE 7860
26
 
27
+ # Command to run the Gradio app
28
+ CMD ["python", "app.py"]
Dockerfile_backups ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.12
5
+
6
+ # # Install system dependencies
7
+ # RUN apt-get update && apt-get install -y \
8
+ # portaudio19-dev \
9
+ # python3-dev \
10
+ # && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Install ALSA dependencies and other packages
13
+ RUN apt-get update && apt-get install -y \
14
+ alsa-utils \
15
+ libasound2 \
16
+ portaudio19-dev \
17
+ python3-dev \
18
+ && rm -rf /var/lib/apt/lists/*
19
+
20
+ # The two following lines are requirements for the Dev Mode to be functional
21
+ # Learn more about the Dev Mode at https://huggingface.co/dev-mode-explorers
22
+ RUN useradd -m -u 1000 user
23
+ WORKDIR /app
24
+
25
+ COPY requirements.txt .
26
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
27
+
28
+ COPY . .
29
+ # CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
30
+
31
+ # Set the environment variable for Flask
32
+ ENV FLASK_APP=app.py
33
+ ENV FLASK_RUN_HOST=0.0.0.0
34
+ ENV FLASK_RUN_PORT=7860
35
+
36
+ # Expose the port Flask will run on
37
+ EXPOSE 7860
38
+
39
+ # Command to run the Flask app
40
+ CMD ["flask", "run"]
app.py CHANGED
@@ -1,177 +1,65 @@
1
- from flask import Flask, render_template, request, jsonify
2
  import speech_recognition as sr
3
  from googletrans import Translator
4
  import requests
5
  import os
6
-
7
- app = Flask(__name__)
8
-
9
  # Initialize recognizer and translator
10
  recognizer = sr.Recognizer()
11
  translator = Translator()
12
-
13
  # Language settings mapping
14
  language_mapping = {
15
- 1: ('ta-IN', 'ta', 'en'), # Tamil
16
- 2: ('en-US', 'en', 'en'), # English
17
- 3: ('hi-IN', 'hi', 'en'), # Hindi
18
- 4: ('ms-MY', 'ms', 'en') # Malay
19
  }
20
-
21
- # Check microphone availability at startup
22
- try:
23
- mic_list = sr.Microphone.list_microphone_names()
24
- print("Available microphones:", mic_list)
25
- if not mic_list:
26
- print("WARNING: No microphones detected - audio input disabled")
27
- except OSError as e:
28
- print(f"Microphone check failed: {str(e)}")
29
-
30
- @app.route('/')
31
- def index():
32
- return render_template("index.html")
33
-
34
- @app.route('/speech-to-text', methods=['POST'])
35
- def speech_to_text():
36
  """Convert speech to text, translate, and query the /ask endpoint"""
37
- try:
38
- language = int(request.form.get("language", 0))
39
- if language not in language_mapping:
40
- return jsonify({"error": "Invalid language selection"}), 400
41
-
42
- recognition_lang, src_lang, dest_lang = language_mapping[language]
43
-
44
- # Get available microphone devices
45
- mic_devices = sr.Microphone.list_microphone_names()
46
- if not mic_devices:
47
- return jsonify({"error": "No audio input devices available"}), 400
48
-
49
- # Use first available microphone explicitly
50
- with sr.Microphone(device_index=0) as source:
51
- print(f"Listening for {recognition_lang}...")
52
- recognizer.adjust_for_ambient_noise(source, duration=1)
53
- audio = recognizer.listen(source, timeout=10)
54
-
55
  text = recognizer.recognize_google(audio, language=recognition_lang)
56
  print(f"Recognized: {text}")
57
-
58
  # Translate text
59
  translated = translator.translate(text, src=src_lang, dest=dest_lang)
60
  print(f"Translated: {translated.text}")
61
-
62
- # API call with error handling
63
  API_KEY = os.getenv("RAG_API_KEY")
64
- if not API_KEY:
65
- return jsonify({"error": "API key not configured"}), 500
66
-
67
- response = requests.post(
68
- url="https://aitestingworkspace-aiagentchatup.hf.space/ask",
69
- headers={"Authorization": f"Bearer {API_KEY}"},
70
- data={"question": translated.text},
71
- timeout=15
72
- )
73
-
74
- return jsonify({
75
- "recognized_text": text,
76
- "translated_text": translated.text,
77
- "server_response": response.json().get('answer', 'No response')
78
- })
79
-
80
- except sr.UnknownValueError:
81
- return jsonify({"error": "Could not understand audio"}), 400
82
- except sr.RequestError as e:
83
- return jsonify({"error": f"Speech recognition error: {str(e)}"}), 500
84
- except OSError as e:
85
- return jsonify({"error": f"Audio device error: {str(e)}"}), 500
86
- except Exception as e:
87
- return jsonify({"error": f"Unexpected error: {str(e)}"}), 500
88
-
89
- if __name__ == "__main__":
90
- app.run(host="0.0.0.0", port=5000, debug=False) # Disable debug for production
91
-
92
-
93
- ###################This is the Existing CODE########################################
94
- # from flask import Flask, render_template, request, jsonify
95
- # import speech_recognition as sr
96
- # from googletrans import Translator
97
- # import requests
98
- # import os
99
- # # import pyttsx3
100
-
101
- # app = Flask(__name__)
102
-
103
- # # Initialize recognizer and translator
104
- # recognizer = sr.Recognizer()
105
- # translator = Translator()
106
- # # engine = pyttsx3.init()
107
-
108
- # # Language settings mapping
109
- # language_mapping = {
110
- # 1: ('ta-IN', 'ta', 'en'), # Tamil
111
- # 2: ('en-US', 'en', 'en'), # English
112
- # 3: ('hi-IN', 'hi', 'en'), # Hindi
113
- # 4: ('ms-MY', 'ms', 'en') # Malay
114
- # }
115
-
116
- # @app.route('/')
117
- # def index():
118
- # return render_template("index.html")
119
-
120
- # # def speak(text):
121
- # # engine.say(text)
122
- # # engine.runAndWait()
123
-
124
- # @app.route('/speech-to-text', methods=['POST'])
125
- # def speech_to_text():
126
- # """Convert speech to text, translate, and query the /ask endpoint"""
127
- # language = int(request.form.get("language", 0))
128
-
129
- # if language not in language_mapping:
130
- # return jsonify({"error": "Invalid language selection"}), 400
131
-
132
- # recognition_lang, src_lang, dest_lang = language_mapping[language]
133
-
134
- # with sr.Microphone() as source:
135
- # print(f"Listening for {recognition_lang}...")
136
- # recognizer.adjust_for_ambient_noise(source)
137
- # audio = recognizer.listen(source)
138
-
139
- # try:
140
- # text = recognizer.recognize_google(audio, language=recognition_lang)
141
- # print(f"Recognized: {text}")
142
-
143
- # # Translate text
144
- # translated = translator.translate(text, src=src_lang, dest=dest_lang)
145
- # print(f"Translated: {translated.text}")
146
-
147
- # # Send to /ask API
148
- # API_KEY = os.getenv("RAG_API_KEY")
149
-
150
- # # Hugging Face API endpoint
151
- # url = "https://aitestingworkspace-aiagentchatup.hf.space/ask"
152
-
153
- # # Headers with authentication
154
- # headers = {
155
- # "Authorization": f"Bearer {API_KEY}"
156
- # }
157
-
158
- # response = requests.post(url=url,data={"question": translated.text},headers=headers)
159
-
160
- # # # Get response and speak it
161
- # # server_response = response.json().get('answer', 'No response received.')
162
- # # print(f"Server Response: {server_response}")
163
- # # speak(server_response)
164
-
165
- # return jsonify({
166
- # "recognized_text": text,
167
- # "translated_text": translated.text,
168
- # "server_response": response.json()['answer']
169
- # })
170
-
171
- # except sr.UnknownValueError:
172
- # return jsonify({"error": "Could not understand audio"}), 400
173
- # except Exception as e:
174
- # return jsonify({"error": str(e)}), 500
175
-
176
- # if __name__ == "__main__":
177
- # app.run(host="0.0.0.0", port=5000, debug=True)
 
1
+ import gradio as gr
2
  import speech_recognition as sr
3
  from googletrans import Translator
4
  import requests
5
  import os
6
+
 
 
7
  # Initialize recognizer and translator
8
  recognizer = sr.Recognizer()
9
  translator = Translator()
10
+
11
  # Language settings mapping
12
  language_mapping = {
13
+ "Tamil": ('ta-IN', 'ta', 'en'),
14
+ "English": ('en-US', 'en', 'en'),
15
+ "Hindi": ('hi-IN', 'hi', 'en'),
16
+ "Malay": ('ms-MY', 'ms', 'en')
17
  }
18
+
19
+ def speech_to_text(language):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  """Convert speech to text, translate, and query the /ask endpoint"""
21
+ if language not in language_mapping:
22
+ return "Invalid language selection", "", ""
23
+
24
+ recognition_lang, src_lang, dest_lang = language_mapping[language]
25
+
26
+ with sr.Microphone() as source:
27
+ print(f"Listening for {recognition_lang}...")
28
+ recognizer.adjust_for_ambient_noise(source)
29
+ audio = recognizer.listen(source)
30
+
31
+ try:
 
 
 
 
 
 
 
32
  text = recognizer.recognize_google(audio, language=recognition_lang)
33
  print(f"Recognized: {text}")
34
+
35
  # Translate text
36
  translated = translator.translate(text, src=src_lang, dest=dest_lang)
37
  print(f"Translated: {translated.text}")
38
+
39
+ # Send to /ask API
40
  API_KEY = os.getenv("RAG_API_KEY")
41
+ url = "https://aitestingworkspace-aiagentchatup.hf.space/ask"
42
+ headers = {"Authorization": f"Bearer {API_KEY}"}
43
+ response = requests.post(url=url, data={"question": translated.text}, headers=headers)
44
+
45
+ return text, translated.text, response.json().get("answer", "No response received.")
46
+
47
+ except sr.UnknownValueError:
48
+ return "Could not understand audio", "", ""
49
+ except Exception as e:
50
+ return str(e), "", ""
51
+
52
+ # Gradio Interface
53
+ demo = gr.Interface(
54
+ fn=speech_to_text,
55
+ inputs=gr.Dropdown(choices=list(language_mapping.keys()), label="Select Language"),
56
+ outputs=[
57
+ gr.Textbox(label="Recognized Text"),
58
+ gr.Textbox(label="Translated Text"),
59
+ gr.Textbox(label="Server Response")
60
+ ],
61
+ title="Speech to Text & Translation",
62
+ description="Speak in the selected language, and the system will transcribe, translate, and fetch a response."
63
+ )
64
+
65
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app_flask.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ###################This is the Existing CODE########################################
2
+ from flask import Flask, render_template, request, jsonify
3
+ import speech_recognition as sr
4
+ from googletrans import Translator
5
+ import requests
6
+ import os
7
+ # import pyttsx3
8
+
9
+ app = Flask(__name__)
10
+
11
+ # Initialize recognizer and translator
12
+ recognizer = sr.Recognizer()
13
+ translator = Translator()
14
+ # engine = pyttsx3.init()
15
+
16
+ # Language settings mapping
17
+ language_mapping = {
18
+ 1: ('ta-IN', 'ta', 'en'), # Tamil
19
+ 2: ('en-US', 'en', 'en'), # English
20
+ 3: ('hi-IN', 'hi', 'en'), # Hindi
21
+ 4: ('ms-MY', 'ms', 'en') # Malay
22
+ }
23
+
24
+ @app.route('/')
25
+ def index():
26
+ return render_template("index.html")
27
+
28
+ # def speak(text):
29
+ # engine.say(text)
30
+ # engine.runAndWait()
31
+
32
+ @app.route('/speech-to-text', methods=['POST'])
33
+ def speech_to_text():
34
+ """Convert speech to text, translate, and query the /ask endpoint"""
35
+ language = int(request.form.get("language", 0))
36
+
37
+ if language not in language_mapping:
38
+ return jsonify({"error": "Invalid language selection"}), 400
39
+
40
+ recognition_lang, src_lang, dest_lang = language_mapping[language]
41
+
42
+ with sr.Microphone() as source:
43
+ print(f"Listening for {recognition_lang}...")
44
+ recognizer.adjust_for_ambient_noise(source)
45
+ audio = recognizer.listen(source)
46
+
47
+ try:
48
+ text = recognizer.recognize_google(audio, language=recognition_lang)
49
+ print(f"Recognized: {text}")
50
+
51
+ # Translate text
52
+ translated = translator.translate(text, src=src_lang, dest=dest_lang)
53
+ print(f"Translated: {translated.text}")
54
+
55
+ # Send to /ask API
56
+ API_KEY = os.getenv("RAG_API_KEY")
57
+
58
+ # Hugging Face API endpoint
59
+ url = "https://aitestingworkspace-aiagentchatup.hf.space/ask"
60
+
61
+ # Headers with authentication
62
+ headers = {
63
+ "Authorization": f"Bearer {API_KEY}"
64
+ }
65
+
66
+ response = requests.post(url=url,data={"question": translated.text},headers=headers)
67
+
68
+ # # Get response and speak it
69
+ # server_response = response.json().get('answer', 'No response received.')
70
+ # print(f"Server Response: {server_response}")
71
+ # speak(server_response)
72
+
73
+ return jsonify({
74
+ "recognized_text": text,
75
+ "translated_text": translated.text,
76
+ "server_response": response.json()['answer']
77
+ })
78
+
79
+ except sr.UnknownValueError:
80
+ return jsonify({"error": "Could not understand audio"}), 400
81
+ except Exception as e:
82
+ return jsonify({"error": str(e)}), 500
83
+
84
+ if __name__ == "__main__":
85
+ app.run(host="0.0.0.0", port=5000, debug=True)
templates/index.html CHANGED
@@ -3,12 +3,12 @@
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Speech to Text</title>
7
  <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
8
  </head>
9
  <body>
10
  <div class="container">
11
- <h1>Speech to Text Converter</h1>
12
  <form id="speechForm">
13
  <label for="language">Select Language:</label>
14
  <select id="language" name="language">
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>UST CHATBOT</title>
7
  <link rel="stylesheet" href="{{ url_for('static', filename='styles.css') }}">
8
  </head>
9
  <body>
10
  <div class="container">
11
+ <h1>UST CHATBOT</h1>
12
  <form id="speechForm">
13
  <label for="language">Select Language:</label>
14
  <select id="language" name="language">