Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1 |
import os
|
2 |
from flask import Flask, request, jsonify, render_template
|
3 |
import google.generativeai as genai
|
|
|
|
|
4 |
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
|
5 |
-
from langchain.vectorstores import FAISS
|
6 |
from langchain.chains import ConversationalRetrievalChain
|
7 |
from langchain.memory import ConversationBufferMemory
|
8 |
from dotenv import load_dotenv
|
@@ -75,7 +76,7 @@ def initialize_chatbot():
|
|
75 |
# Initialize the language model
|
76 |
try:
|
77 |
llm = ChatGoogleGenerativeAI(
|
78 |
-
model="gemini-2.0-flash",
|
79 |
temperature=0.2,
|
80 |
top_p=0.85,
|
81 |
google_api_key=GOOGLE_API_KEY
|
@@ -94,7 +95,7 @@ def initialize_chatbot():
|
|
94 |
retriever=retriever,
|
95 |
memory=memory,
|
96 |
verbose=True,
|
97 |
-
return_source_documents=False,
|
98 |
combine_docs_chain_kwargs={"prompt": get_custom_prompt()},
|
99 |
)
|
100 |
logger.info("QA chain created successfully")
|
@@ -108,67 +109,50 @@ def initialize_chatbot():
|
|
108 |
def format_links_as_html(text):
|
109 |
# Detect markdown style links [text](url)
|
110 |
markdown_pattern = r'\[(.*?)\]\((https?://[^\s\)]+)\)'
|
111 |
-
|
112 |
-
|
113 |
-
return text
|
114 |
-
|
115 |
# Handle URLs in square brackets [url]
|
116 |
bracket_pattern = r'\[(https?://[^\s\]]+)\]'
|
117 |
-
|
118 |
-
text = re.sub(bracket_pattern, r'<a href="\1" target="_blank">\1</a>', text)
|
119 |
-
return text
|
120 |
-
|
121 |
-
# Regular URL pattern
|
122 |
-
url_pattern = r'(https?://[^\s\])+)'
|
123 |
|
124 |
-
#
|
125 |
-
|
|
|
126 |
|
127 |
-
#
|
128 |
-
|
129 |
-
for url in urls[1:]:
|
130 |
-
text = text.replace(url, "")
|
131 |
-
|
132 |
-
# Replace the remaining URL with an HTML anchor tag
|
133 |
-
if urls:
|
134 |
-
text = re.sub(url_pattern, r'<a href="\1" target="_blank">\1</a>', text, count=1)
|
135 |
|
136 |
return text
|
137 |
|
138 |
# Function to properly escape asterisks for markdown rendering
|
139 |
def escape_markdown(text):
|
140 |
-
|
141 |
-
# This regex looks for asterisks that aren't part of markdown patterns
|
142 |
-
return re.sub(r'(?<!\*)\*(?!\*)', r'\*', text)
|
143 |
|
144 |
# Function to format markdown and handle asterisks with proper line breaks
|
145 |
def format_markdown_with_breaks(text):
|
146 |
-
# First remove escaped asterisks (\*) and replace with just asterisks (*)
|
147 |
text = text.replace('\\*', '*')
|
148 |
-
|
149 |
-
# Handle bold text (convert **text** to <strong>text</strong>)
|
150 |
text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', text)
|
151 |
-
|
152 |
-
# Now split text by lines for processing asterisk line breaks
|
153 |
lines = text.split('\n')
|
154 |
formatted_lines = []
|
155 |
|
156 |
for i, line in enumerate(lines):
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
#
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
formatted_lines.append(f"• {content}")
|
166 |
-
else:
|
167 |
-
formatted_lines.append(f"<br>• {content}")
|
168 |
else:
|
169 |
formatted_lines.append(line)
|
170 |
|
171 |
-
|
|
|
|
|
|
|
|
|
|
|
172 |
|
173 |
@app.route('/')
|
174 |
def home():
|
@@ -180,86 +164,34 @@ def health():
|
|
180 |
try:
|
181 |
current_time = time.time()
|
182 |
uptime_seconds = current_time - app_start_time
|
183 |
-
uptime_hours = uptime_seconds / 3600
|
184 |
|
185 |
-
# Check if critical components are available
|
186 |
health_status = {
|
187 |
"status": "healthy",
|
188 |
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S UTC", time.gmtime()),
|
189 |
"uptime_seconds": round(uptime_seconds, 2),
|
190 |
-
"uptime_hours": round(uptime_hours, 2),
|
191 |
-
"api_key_configured": bool(GOOGLE_API_KEY and GOOGLE_API_KEY != "your_api_key_here"),
|
192 |
"chatbot_initialized": qa_chain is not None
|
193 |
}
|
194 |
-
|
195 |
-
# Return 200 status for healthy
|
196 |
return jsonify(health_status), 200
|
197 |
|
198 |
except Exception as e:
|
199 |
logger.error(f"Health check failed: {str(e)}")
|
200 |
return jsonify({
|
201 |
-
"status": "unhealthy",
|
202 |
-
"error": str(e),
|
203 |
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S UTC", time.gmtime())
|
204 |
}), 500
|
205 |
|
206 |
-
@app.route('/_health')
|
207 |
-
def health_check():
|
208 |
-
"""Legacy health check endpoint for Docker (keeping for compatibility)"""
|
209 |
-
return jsonify({"status": "healthy"}), 200
|
210 |
-
|
211 |
@app.route('/ping')
|
212 |
def ping():
|
213 |
"""Simple ping endpoint for basic uptime checks"""
|
214 |
return "pong", 200
|
215 |
|
216 |
-
@app.route('/status')
|
217 |
-
def status():
|
218 |
-
"""Detailed status endpoint"""
|
219 |
-
try:
|
220 |
-
current_time = time.time()
|
221 |
-
uptime_seconds = current_time - app_start_time
|
222 |
-
|
223 |
-
status_info = {
|
224 |
-
"application": "Flask Chatbot",
|
225 |
-
"status": "running",
|
226 |
-
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S UTC", time.gmtime()),
|
227 |
-
"uptime": {
|
228 |
-
"seconds": round(uptime_seconds, 2),
|
229 |
-
"minutes": round(uptime_seconds / 60, 2),
|
230 |
-
"hours": round(uptime_seconds / 3600, 2),
|
231 |
-
"days": round(uptime_seconds / 86400, 2)
|
232 |
-
},
|
233 |
-
"environment": {
|
234 |
-
"python_version": os.sys.version,
|
235 |
-
"port": os.environ.get('PORT', 7860)
|
236 |
-
},
|
237 |
-
"services": {
|
238 |
-
"api_key_configured": bool(GOOGLE_API_KEY and GOOGLE_API_KEY != "your_api_key_here"),
|
239 |
-
"chatbot_initialized": qa_chain is not None,
|
240 |
-
"memory_initialized": memory is not None
|
241 |
-
}
|
242 |
-
}
|
243 |
-
|
244 |
-
return jsonify(status_info), 200
|
245 |
-
|
246 |
-
except Exception as e:
|
247 |
-
logger.error(f"Status check failed: {str(e)}")
|
248 |
-
return jsonify({
|
249 |
-
"status": "error",
|
250 |
-
"error": str(e),
|
251 |
-
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S UTC", time.gmtime())
|
252 |
-
}), 500
|
253 |
-
|
254 |
@app.route('/api/chat', methods=['POST'])
|
255 |
def chat():
|
256 |
global qa_chain
|
257 |
|
258 |
-
# Initialize on first request if not already done
|
259 |
if qa_chain is None:
|
260 |
-
|
261 |
-
|
262 |
-
return jsonify({"error": "Failed to initialize chatbot. Check server logs for details."}), 500
|
263 |
|
264 |
data = request.json
|
265 |
user_message = data.get('message', '')
|
@@ -270,29 +202,25 @@ def chat():
|
|
270 |
try:
|
271 |
logger.info(f"Processing user query: {user_message}")
|
272 |
|
273 |
-
#
|
274 |
-
result = qa_chain({"question": user_message})
|
275 |
|
276 |
-
# Extract the answer
|
277 |
answer = result.get("answer", "I'm sorry, I couldn't generate a response.")
|
278 |
|
279 |
-
# Format the answer
|
280 |
answer = escape_markdown(answer)
|
281 |
answer = format_links_as_html(answer)
|
282 |
answer = format_markdown_with_breaks(answer)
|
283 |
|
284 |
logger.info("Query processed successfully")
|
285 |
|
286 |
-
return jsonify({
|
287 |
-
"answer": answer,
|
288 |
-
# No sources included in the response
|
289 |
-
})
|
290 |
|
291 |
except Exception as e:
|
292 |
-
|
293 |
-
|
|
|
294 |
|
295 |
if __name__ == '__main__':
|
296 |
-
# For Docker deployment, bind to all interfaces and use port 7860
|
297 |
port = int(os.environ.get('PORT', 7860))
|
298 |
app.run(host='0.0.0.0', port=port, debug=False)
|
|
|
1 |
import os
|
2 |
from flask import Flask, request, jsonify, render_template
|
3 |
import google.generativeai as genai
|
4 |
+
# LangChain Community has the updated vector stores
|
5 |
+
from langchain_community.vectorstores import FAISS
|
6 |
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
|
|
|
7 |
from langchain.chains import ConversationalRetrievalChain
|
8 |
from langchain.memory import ConversationBufferMemory
|
9 |
from dotenv import load_dotenv
|
|
|
76 |
# Initialize the language model
|
77 |
try:
|
78 |
llm = ChatGoogleGenerativeAI(
|
79 |
+
model="gemini-2.0-flash", # Updated to a newer recommended model
|
80 |
temperature=0.2,
|
81 |
top_p=0.85,
|
82 |
google_api_key=GOOGLE_API_KEY
|
|
|
95 |
retriever=retriever,
|
96 |
memory=memory,
|
97 |
verbose=True,
|
98 |
+
return_source_documents=False,
|
99 |
combine_docs_chain_kwargs={"prompt": get_custom_prompt()},
|
100 |
)
|
101 |
logger.info("QA chain created successfully")
|
|
|
109 |
def format_links_as_html(text):
|
110 |
# Detect markdown style links [text](url)
|
111 |
markdown_pattern = r'\[(.*?)\]\((https?://[^\s\)]+)\)'
|
112 |
+
text = re.sub(markdown_pattern, r'<a href="\2" target="_blank">\1</a>', text)
|
113 |
+
|
|
|
|
|
114 |
# Handle URLs in square brackets [url]
|
115 |
bracket_pattern = r'\[(https?://[^\s\]]+)\]'
|
116 |
+
text = re.sub(bracket_pattern, r'<a href="\1" target="_blank">\1</a>', text)
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
+
# Regular URL pattern - THIS IS THE FIX
|
119 |
+
# The previous pattern r'(https?://[^\s\])+)' was invalid.
|
120 |
+
url_pattern = r'(?<!href=")(https?://[^\s<]+)'
|
121 |
|
122 |
+
# Replace URLs with HTML anchor tags
|
123 |
+
text = re.sub(url_pattern, r'<a href="\1" target="_blank">\1</a>', text)
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
return text
|
126 |
|
127 |
# Function to properly escape asterisks for markdown rendering
|
128 |
def escape_markdown(text):
|
129 |
+
return re.sub(r'(?<!\*)\*(?!\*)', r'\\*', text)
|
|
|
|
|
130 |
|
131 |
# Function to format markdown and handle asterisks with proper line breaks
|
132 |
def format_markdown_with_breaks(text):
|
|
|
133 |
text = text.replace('\\*', '*')
|
|
|
|
|
134 |
text = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', text)
|
|
|
|
|
135 |
lines = text.split('\n')
|
136 |
formatted_lines = []
|
137 |
|
138 |
for i, line in enumerate(lines):
|
139 |
+
stripped_line = line.strip()
|
140 |
+
if stripped_line.startswith('* '):
|
141 |
+
content = stripped_line[2:].strip()
|
142 |
+
# Use a bullet point character for lists
|
143 |
+
formatted_lines.append(f"<br>• {content}")
|
144 |
+
elif stripped_line.startswith('*'):
|
145 |
+
content = stripped_line[1:].strip()
|
146 |
+
formatted_lines.append(f"<br>• {content}")
|
|
|
|
|
|
|
147 |
else:
|
148 |
formatted_lines.append(line)
|
149 |
|
150 |
+
# Join the lines, but remove the initial <br> if it exists
|
151 |
+
result = '\n'.join(formatted_lines)
|
152 |
+
if result.startswith('<br>'):
|
153 |
+
result = result[4:]
|
154 |
+
|
155 |
+
return result
|
156 |
|
157 |
@app.route('/')
|
158 |
def home():
|
|
|
164 |
try:
|
165 |
current_time = time.time()
|
166 |
uptime_seconds = current_time - app_start_time
|
|
|
167 |
|
|
|
168 |
health_status = {
|
169 |
"status": "healthy",
|
170 |
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S UTC", time.gmtime()),
|
171 |
"uptime_seconds": round(uptime_seconds, 2),
|
|
|
|
|
172 |
"chatbot_initialized": qa_chain is not None
|
173 |
}
|
|
|
|
|
174 |
return jsonify(health_status), 200
|
175 |
|
176 |
except Exception as e:
|
177 |
logger.error(f"Health check failed: {str(e)}")
|
178 |
return jsonify({
|
179 |
+
"status": "unhealthy", "error": str(e),
|
|
|
180 |
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S UTC", time.gmtime())
|
181 |
}), 500
|
182 |
|
|
|
|
|
|
|
|
|
|
|
183 |
@app.route('/ping')
|
184 |
def ping():
|
185 |
"""Simple ping endpoint for basic uptime checks"""
|
186 |
return "pong", 200
|
187 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
@app.route('/api/chat', methods=['POST'])
|
189 |
def chat():
|
190 |
global qa_chain
|
191 |
|
|
|
192 |
if qa_chain is None:
|
193 |
+
if not initialize_chatbot():
|
194 |
+
return jsonify({"error": "Failed to initialize chatbot. Check server logs."}), 500
|
|
|
195 |
|
196 |
data = request.json
|
197 |
user_message = data.get('message', '')
|
|
|
202 |
try:
|
203 |
logger.info(f"Processing user query: {user_message}")
|
204 |
|
205 |
+
# Use .invoke() instead of the deprecated __call__ method
|
206 |
+
result = qa_chain.invoke({"question": user_message})
|
207 |
|
|
|
208 |
answer = result.get("answer", "I'm sorry, I couldn't generate a response.")
|
209 |
|
210 |
+
# Format the answer
|
211 |
answer = escape_markdown(answer)
|
212 |
answer = format_links_as_html(answer)
|
213 |
answer = format_markdown_with_breaks(answer)
|
214 |
|
215 |
logger.info("Query processed successfully")
|
216 |
|
217 |
+
return jsonify({"answer": answer})
|
|
|
|
|
|
|
218 |
|
219 |
except Exception as e:
|
220 |
+
# Log the full traceback for better debugging
|
221 |
+
logger.exception(f"Error processing request: {str(e)}")
|
222 |
+
return jsonify({"error": f"An internal error occurred: {str(e)}"}), 500
|
223 |
|
224 |
if __name__ == '__main__':
|
|
|
225 |
port = int(os.environ.get('PORT', 7860))
|
226 |
app.run(host='0.0.0.0', port=port, debug=False)
|