david-oplatka commited on
Commit
fb49b3d
1 Parent(s): f775289

Add utils.py

Browse files
Files changed (3) hide show
  1. app.py +15 -203
  2. query.py +1 -135
  3. utils.py +69 -0
app.py CHANGED
@@ -1,15 +1,15 @@
1
  from omegaconf import OmegaConf
2
  from query import VectaraQuery
3
  import os
4
- import requests
5
- import json
6
  import uuid
7
 
8
  import streamlit as st
9
  from streamlit_pills import pills
10
  from streamlit_feedback import streamlit_feedback
11
 
12
- from PIL import Image
 
13
 
14
  max_examples = 6
15
  languages = {'English': 'eng', 'Spanish': 'spa', 'French': 'frs', 'Chinese': 'zho', 'German': 'deu', 'Hindi': 'hin', 'Arabic': 'ara',
@@ -21,36 +21,6 @@ languages = {'English': 'eng', 'Spanish': 'spa', 'French': 'frs', 'Chinese': 'zh
21
  if 'device_id' not in st.session_state:
22
  st.session_state.device_id = str(uuid.uuid4())
23
 
24
- headers = {
25
- 'Content-Type': 'application/json',
26
- 'Accept': '*/*'
27
- }
28
- amp_api_key = os.getenv('AMPLITUDE_TOKEN')
29
-
30
- def thumbs_feedback(feedback, **kwargs):
31
- """
32
- Sends feedback to Amplitude Analytics
33
- """
34
- data = {
35
- "api_key": amp_api_key,
36
- "events": [{
37
- "device_id": st.session_state.device_id,
38
- "event_type": "provided_feedback",
39
- "event_properties": {
40
- "Space Name": kwargs.get("title", "Unknown Space Name"),
41
- "Demo Type": "Chat bot",
42
- "query": kwargs.get("prompt", "No user input"),
43
- "response": kwargs.get("response", "No chat response"),
44
- "feedback": feedback["score"],
45
- "Response Language": st.session_state.language
46
- }
47
- }]
48
- }
49
- response = requests.post('https://api2.amplitude.com/2/httpapi', headers=headers, data=json.dumps(data))
50
- if response.status_code != 200:
51
- print(f"Request failed with status code {response.status_code}. Response Text: {response.text}")
52
-
53
- st.session_state.feedback_key += 1
54
 
55
  if "feedback_key" not in st.session_state:
56
  st.session_state.feedback_key = 0
@@ -157,7 +127,7 @@ def launch_bot():
157
  if st.session_state.messages[-1]["role"] != "assistant":
158
  with st.chat_message("assistant"):
159
  if cfg.streaming:
160
- stream = generate_streaming_response(prompt)
161
  response = st.write_stream(stream)
162
  else:
163
  with st.spinner("Thinking..."):
@@ -167,178 +137,20 @@ def launch_bot():
167
  st.session_state.messages.append(message)
168
 
169
  # Send query and response to Amplitude Analytics
170
- data = {
171
- "api_key": amp_api_key,
172
- "events": [{
173
- "device_id": st.session_state.device_id,
174
- "event_type": "submitted_query",
175
- "event_properties": {
176
- "Space Name": cfg["title"],
177
- "Demo Type": "Chat bot",
178
- "query": st.session_state.messages[-2]["content"],
179
- "response": st.session_state.messages[-1]["content"],
180
- "Response Language": st.session_state.language
181
- }
182
- }]
183
- }
184
- response = requests.post('https://api2.amplitude.com/2/httpapi', headers=headers, data=json.dumps(data))
185
- if response.status_code != 200:
186
- print(f"Request failed with status code {response.status_code}. Response Text: {response.text}")
187
  st.rerun()
188
 
189
  if (st.session_state.messages[-1]["role"] == "assistant") & (st.session_state.messages[-1]["content"] != "How may I help you?"):
190
  streamlit_feedback(feedback_type="thumbs", on_submit = thumbs_feedback, key = st.session_state.feedback_key,
191
- kwargs = {"prompt": st.session_state.messages[-2]["content"],
192
- "response": st.session_state.messages[-1]["content"],
193
- "title": cfg["title"]})
 
194
 
195
  if __name__ == "__main__":
196
- launch_bot()
197
-
198
-
199
-
200
- # from omegaconf import OmegaConf
201
- # from query import VectaraQuery
202
- # import os
203
-
204
- # import streamlit as st
205
- # from streamlit_pills import pills
206
- # from streamlit_feedback import streamlit_feedback
207
-
208
- # from PIL import Image
209
-
210
- # max_examples = 4
211
- # languages = {'English': 'eng', 'Spanish': 'spa', 'French': 'frs', 'Chinese': 'zho', 'German': 'deu', 'Hindi': 'hin', 'Arabic': 'ara',
212
- # 'Portuguese': 'por', 'Italian': 'ita', 'Japanese': 'jpn', 'Korean': 'kor', 'Russian': 'rus', 'Turkish': 'tur', 'Persian (Farsi)': 'fas',
213
- # 'Vietnamese': 'vie', 'Thai': 'tha', 'Hebrew': 'heb', 'Dutch': 'nld', 'Indonesian': 'ind', 'Polish': 'pol', 'Ukrainian': 'ukr',
214
- # 'Romanian': 'ron', 'Swedish': 'swe', 'Czech': 'ces', 'Greek': 'ell', 'Bengali': 'ben', 'Malay (or Malaysian)': 'msa', 'Urdu': 'urd'}
215
-
216
- # def isTrue(x) -> bool:
217
- # if isinstance(x, bool):
218
- # return x
219
- # return x.strip().lower() == 'true'
220
-
221
- # def thumbs_feedback(feedback, **kwargs):
222
- # print(f'Debug: Feedback Received {feedback["score"]} FROM user question {kwargs.get("prompt", "No user input")} AND chat response {kwargs.get("response", "No chat response")}. Detected response language {kwargs.get("language", "unknown")}')
223
- # st.session_state.feedback_key += 1
224
-
225
- # if "feedback_key" not in st.session_state:
226
- # st.session_state.feedback_key = 0
227
-
228
- # def launch_bot():
229
- # def generate_response(question):
230
- # response = vq.submit_query(question, languages[st.session_state.language])
231
- # return response
232
-
233
- # def generate_streaming_response(question):
234
- # response = vq.submit_query_streaming(question, languages[st.session_state.language])
235
- # return response
236
-
237
- # def show_example_questions():
238
- # if len(st.session_state.example_messages) > 0 and st.session_state.first_turn:
239
- # selected_example = pills("Queries to Try:", st.session_state.example_messages, index=None)
240
- # if selected_example:
241
- # st.session_state.ex_prompt = selected_example
242
- # st.session_state.first_turn = False
243
- # return True
244
- # return False
245
-
246
- # if 'cfg' not in st.session_state:
247
- # corpus_keys = str(os.environ['corpus_keys']).split(',')
248
- # cfg = OmegaConf.create({
249
- # 'corpus_keys': corpus_keys,
250
- # 'api_key': str(os.environ['api_key']),
251
- # 'title': os.environ['title'],
252
- # 'source_data_desc': os.environ['source_data_desc'],
253
- # 'streaming': isTrue(os.environ.get('streaming', False)),
254
- # 'prompt_name': os.environ.get('prompt_name', None),
255
- # 'examples': os.environ.get('examples', None),
256
- # 'language': 'English'
257
- # })
258
- # st.session_state.cfg = cfg
259
- # st.session_state.ex_prompt = None
260
- # st.session_state.first_turn = True
261
- # st.session_state.language = cfg.language
262
- # example_messages = [example.strip() for example in cfg.examples.split(",")]
263
- # st.session_state.example_messages = [em for em in example_messages if len(em)>0][:max_examples]
264
-
265
- # st.session_state.vq = VectaraQuery(cfg.api_key, cfg.corpus_keys, cfg.prompt_name)
266
-
267
- # cfg = st.session_state.cfg
268
- # vq = st.session_state.vq
269
- # st.set_page_config(page_title=cfg.title, layout="wide")
270
-
271
- # # left side content
272
- # with st.sidebar:
273
- # image = Image.open('Vectara-logo.png')
274
- # st.image(image, width=175)
275
- # st.markdown(f"## About\n\n"
276
- # f"This demo uses Retrieval Augmented Generation to ask questions about {cfg.source_data_desc}\n")
277
-
278
- # cfg.language = st.selectbox('Language:', languages.keys())
279
- # if st.session_state.language != cfg.language:
280
- # st.session_state.language = cfg.language
281
- # print(f"DEBUG: Language changed to {st.session_state.language}")
282
- # st.rerun()
283
-
284
- # st.markdown("---")
285
- # st.markdown(
286
- # "## How this works?\n"
287
- # "This app was built with [Vectara](https://vectara.com).\n"
288
- # "Vectara's [Indexing API](https://docs.vectara.com/docs/api-reference/indexing-apis/indexing) was used to ingest the data into a Vectara corpus (or index).\n\n"
289
- # "This app uses Vectara [Chat API](https://docs.vectara.com/docs/console-ui/vectara-chat-overview) to query the corpus and present the results to you, answering your question.\n\n"
290
- # )
291
- # st.markdown("---")
292
-
293
-
294
- # st.markdown(f"<center> <h2> Vectara AI Assistant: {cfg.title} </h2> </center>", unsafe_allow_html=True)
295
-
296
- # if "messages" not in st.session_state.keys():
297
- # st.session_state.messages = [{"role": "assistant", "content": "How may I help you?"}]
298
-
299
-
300
-
301
- # # Display chat messages
302
- # for message in st.session_state.messages:
303
- # with st.chat_message(message["role"]):
304
- # st.write(message["content"])
305
-
306
- # example_container = st.empty()
307
- # with example_container:
308
- # if show_example_questions():
309
- # example_container.empty()
310
- # st.rerun()
311
-
312
- # # select prompt from example question or user provided input
313
- # if st.session_state.ex_prompt:
314
- # prompt = st.session_state.ex_prompt
315
- # else:
316
- # prompt = st.chat_input()
317
- # if prompt:
318
- # st.session_state.messages.append({"role": "user", "content": prompt})
319
- # with st.chat_message("user"):
320
- # st.write(prompt)
321
- # st.session_state.ex_prompt = None
322
-
323
- # # Generate a new response if last message is not from assistant
324
- # if st.session_state.messages[-1]["role"] != "assistant":
325
- # with st.chat_message("assistant"):
326
- # if cfg.streaming:
327
- # stream = generate_streaming_response(prompt)
328
- # response = st.write_stream(stream)
329
- # else:
330
- # with st.spinner("Thinking..."):
331
- # response = generate_response(prompt)
332
- # st.write(response)
333
- # message = {"role": "assistant", "content": response}
334
- # st.session_state.messages.append(message)
335
- # st.rerun()
336
-
337
- # if (st.session_state.messages[-1]["role"] == "assistant") & (st.session_state.messages[-1]["content"] != "How may I help you?"):
338
- # streamlit_feedback(feedback_type="thumbs", on_submit = thumbs_feedback, key = st.session_state.feedback_key,
339
- # kwargs = {"prompt": st.session_state.messages[-2]["content"],
340
- # "response": st.session_state.messages[-1]["content"],
341
- # "language": st.session_state.language})
342
-
343
- # if __name__ == "__main__":
344
- # launch_bot()
 
1
  from omegaconf import OmegaConf
2
  from query import VectaraQuery
3
  import os
4
+ from PIL import Image
 
5
  import uuid
6
 
7
  import streamlit as st
8
  from streamlit_pills import pills
9
  from streamlit_feedback import streamlit_feedback
10
 
11
+ from utils import thumbs_feedback, send_amplitude_data
12
+
13
 
14
  max_examples = 6
15
  languages = {'English': 'eng', 'Spanish': 'spa', 'French': 'frs', 'Chinese': 'zho', 'German': 'deu', 'Hindi': 'hin', 'Arabic': 'ara',
 
21
  if 'device_id' not in st.session_state:
22
  st.session_state.device_id = str(uuid.uuid4())
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  if "feedback_key" not in st.session_state:
26
  st.session_state.feedback_key = 0
 
127
  if st.session_state.messages[-1]["role"] != "assistant":
128
  with st.chat_message("assistant"):
129
  if cfg.streaming:
130
+ stream = generate_streaming_response(prompt)
131
  response = st.write_stream(stream)
132
  else:
133
  with st.spinner("Thinking..."):
 
137
  st.session_state.messages.append(message)
138
 
139
  # Send query and response to Amplitude Analytics
140
+ send_amplitude_data(
141
+ user_query=st.session_state.messages[-2]["content"],
142
+ chat_response=st.session_state.messages[-1]["content"],
143
+ demo_name=cfg["title"],
144
+ language=st.session_state.language
145
+ )
 
 
 
 
 
 
 
 
 
 
 
146
  st.rerun()
147
 
148
  if (st.session_state.messages[-1]["role"] == "assistant") & (st.session_state.messages[-1]["content"] != "How may I help you?"):
149
  streamlit_feedback(feedback_type="thumbs", on_submit = thumbs_feedback, key = st.session_state.feedback_key,
150
+ kwargs = {"user_query": st.session_state.messages[-2]["content"],
151
+ "chat_response": st.session_state.messages[-1]["content"],
152
+ "demo_name": cfg["title"],
153
+ "response_language": st.session_state.language})
154
 
155
  if __name__ == "__main__":
156
+ launch_bot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
query.py CHANGED
@@ -125,138 +125,4 @@ class VectaraQuery():
125
  chunks.append(chunk)
126
  yield chunk
127
 
128
- return ''.join(chunks)
129
-
130
-
131
- # import requests
132
- # import json
133
-
134
-
135
- # class VectaraQuery():
136
- # def __init__(self, api_key: str, corpus_keys: list[str], prompt_name: str = None):
137
- # self.corpus_keys = corpus_keys
138
- # self.api_key = api_key
139
- # self.prompt_name = prompt_name if prompt_name else "vectara-experimental-summary-ext-2023-12-11-sml"
140
- # self.conv_id = None
141
-
142
-
143
- # def get_body(self, query_str: str, response_lang: str, stream: False):
144
- # corpora_list = [{
145
- # 'corpus_key': corpus_key, 'lexical_interpolation': 0.005
146
- # } for corpus_key in self.corpus_keys
147
- # ]
148
-
149
- # return {
150
- # 'query': query_str,
151
- # 'search':
152
- # {
153
- # 'corpora': corpora_list,
154
- # 'offset': 0,
155
- # 'limit': 50,
156
- # 'context_configuration':
157
- # {
158
- # 'sentences_before': 2,
159
- # 'sentences_after': 2,
160
- # 'start_tag': "%START_SNIPPET%",
161
- # 'end_tag': "%END_SNIPPET%",
162
- # },
163
- # 'reranker':
164
- # {
165
- # 'type': 'mmr'
166
- # },
167
- # },
168
- # 'generation':
169
- # {
170
- # 'prompt_name': self.prompt_name,
171
- # 'max_used_search_results': 10,
172
- # 'response_language': response_lang,
173
- # 'citations':
174
- # {
175
- # 'style': 'none'
176
- # },
177
- # 'enable_factual_consistency_score': False
178
- # },
179
- # 'chat':
180
- # {
181
- # 'store': True
182
- # },
183
- # 'stream_response': stream
184
- # }
185
-
186
-
187
- # def get_headers(self):
188
- # return {
189
- # "Content-Type": "application/json",
190
- # "Accept": "application/json",
191
- # "x-api-key": self.api_key,
192
- # "grpc-timeout": "60S"
193
- # }
194
-
195
- # def get_stream_headers(self):
196
- # return {
197
- # "Content-Type": "application/json",
198
- # "Accept": "text/event-stream",
199
- # "x-api-key": self.api_key,
200
- # "grpc-timeout": "60S"
201
- # }
202
-
203
- # def submit_query(self, query_str: str, language: str):
204
-
205
- # if self.conv_id:
206
- # endpoint = f"https://api.vectara.io/v2/chats/{self.conv_id}/turns"
207
- # else:
208
- # endpoint = "https://api.vectara.io/v2/chats"
209
-
210
- # body = self.get_body(query_str, language, stream=False)
211
-
212
- # response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_headers())
213
- # if response.status_code != 200:
214
- # print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
215
- # return "Sorry, something went wrong in my brain. Please try again later."
216
-
217
- # res = response.json()
218
-
219
- # if self.conv_id is None:
220
- # self.conv_id = res['chat_id']
221
-
222
- # summary = res['answer']
223
-
224
- # # FIGURE OUT HOW TO IMPLEMENT THIS IN APIV2
225
- # # if chat and chat['status'] is not None:
226
- # # st_code = chat['status']
227
- # # print(f"Chat query failed with code {st_code}")
228
- # # if st_code == 'RESOURCE_EXHAUSTED':
229
- # # self.conv_id = None
230
- # # return 'Sorry, Vectara chat turns exceeds plan limit.'
231
- # # return 'Sorry, something went wrong in my brain. Please try again later.'
232
-
233
- # return summary
234
-
235
- # def submit_query_streaming(self, query_str: str, language: str):
236
-
237
- # if self.conv_id:
238
- # endpoint = f"https://api.vectara.io/v2/chats/{self.conv_id}/turns"
239
- # else:
240
- # endpoint = "https://api.vectara.io/v2/chats"
241
-
242
- # body = self.get_body(query_str, language, stream=True)
243
-
244
- # response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=self.get_stream_headers(), stream=True)
245
-
246
- # if response.status_code != 200:
247
- # print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
248
- # return "Sorry, something went wrong in my brain. Please try again later."
249
-
250
- # chunks = []
251
- # for line in response.iter_lines():
252
- # line = line.decode('utf-8')
253
- # if line: # filter out keep-alive new lines
254
- # key, value = line.split(':', 1)
255
- # if key == 'data':
256
- # line = json.loads(value)
257
- # if line['type'] == 'generation_chunk':
258
- # chunk = line['generation_chunk']
259
- # chunks.append(chunk)
260
- # yield chunk
261
-
262
- # return ''.join(chunks)
 
125
  chunks.append(chunk)
126
  yield chunk
127
 
128
+ return ''.join(chunks)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import json
4
+ # import re
5
+
6
+ import streamlit as st
7
+
8
+ headers = {
9
+ 'Content-Type': 'application/json',
10
+ 'Accept': '*/*'
11
+ }
12
+
13
+ def thumbs_feedback(feedback, **kwargs):
14
+ """
15
+ Sends feedback to Amplitude Analytics
16
+ """
17
+
18
+ send_amplitude_data(
19
+ user_query=kwargs.get("user_query", "No user input"),
20
+ chat_response=kwargs.get("chat_response", "No bot response"),
21
+ demo_name=kwargs.get("demo_name", "Unknown"),
22
+ language = kwargs.get("response_language", "Unknown"),
23
+ feedback=feedback["score"],
24
+ )
25
+ st.session_state.feedback_key += 1
26
+
27
+ def send_amplitude_data(user_query, chat_response, demo_name, language, feedback=None):
28
+ # Send query and response to Amplitude Analytics
29
+ data = {
30
+ "api_key": os.getenv('AMPLITUDE_TOKEN'),
31
+ "events": [{
32
+ "device_id": st.session_state.device_id,
33
+ "event_type": "submitted_query",
34
+ "event_properties": {
35
+ "Space Name": demo_name,
36
+ "Demo Type": "chatbot",
37
+ "query": user_query,
38
+ "response": chat_response,
39
+ "Response Language": language
40
+ }
41
+ }]
42
+ }
43
+ if feedback:
44
+ data["events"][0]["event_properties"]["feedback"] = feedback
45
+
46
+ response = requests.post('https://api2.amplitude.com/2/httpapi', headers=headers, data=json.dumps(data))
47
+ if response.status_code != 200:
48
+ print(f"Amplitude request failed with status code {response.status_code}. Response Text: {response.text}")
49
+
50
+ # def escape_dollars_outside_latex(text):
51
+ # # Define a regex pattern to find LaTeX equations (either single $ or double $$)
52
+ # pattern = re.compile(r'(\$\$.*?\$\$|\$.*?\$)')
53
+ # latex_matches = pattern.findall(text)
54
+
55
+ # # Placeholder to temporarily store LaTeX equations
56
+ # placeholders = {}
57
+ # for i, match in enumerate(latex_matches):
58
+ # placeholder = f'__LATEX_PLACEHOLDER_{i}__'
59
+ # placeholders[placeholder] = match
60
+ # text = text.replace(match, placeholder)
61
+
62
+ # # Escape dollar signs in the rest of the text
63
+ # text = text.replace('$', '\\$')
64
+
65
+ # # Replace placeholders with the original LaTeX equations
66
+ # for placeholder, original in placeholders.items():
67
+ # text = text.replace(placeholder, original)
68
+ # return text
69
+