epalvarez commited on
Commit
f8e9437
·
verified ·
1 Parent(s): 637e2cd

latest update of app.py, newer version of langchain package in requirements.txt, and tesla_db Vector database with 448 embedding dimensions and overlap of 24

Browse files
app.py CHANGED
@@ -1,10 +1,17 @@
 
 
 
 
1
  import os
 
2
  import uuid
3
  import json
 
 
 
 
4
  from pathlib import Path
5
 
6
- # ATTENTION: some versions in the requirements.txt file are more current than the ones used in the notebook
7
-
8
  # GUI components
9
  import gradio as gr
10
 
@@ -14,6 +21,7 @@ import gradio as gr
14
  from openai import OpenAI, OpenAIError
15
 
16
  # Embedding operations & Vector DB creation
 
17
  from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
18
  from langchain_community.vectorstores import Chroma
19
 
@@ -23,13 +31,17 @@ from huggingface_hub import CommitScheduler
23
  # Set working directory (HuggingFace space)
24
  hf_space_dir = os.getcwd()
25
 
 
 
 
 
26
  # Anyscale model hosting platform NOT USED in this notebook. Open AI model hosting platform used instead.
27
  # client = OpenAI(
28
  # base_url="https://api.endpoints.anyscale.com/v1",
29
  # api_key=os.environ['ANYSCALE_API_KEY']
30
  # )
31
 
32
- #----------------------------------------------------------------------
33
  # OpenAI API key stored as a "secret" HuggingFace Space
34
  # OPENAI_API_KEY
35
 
@@ -63,8 +75,11 @@ client = OpenAI(
63
  # api_key=os.environ.get("OPENAI_API_KEY"),
64
  api_key=openai_api_key,
65
  )
66
- #---------------------------------------------------------------------
67
 
 
 
 
 
68
  # embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-small')
69
  # The gte-small model from OpenAI's family of models, which includes the GTE models designed for retrieval tasks, uses a specific number of embedding dimensions. The gte-small model has 384 embedding dimensions.
70
  # This dimensionality allows the model to capture semantic information effectively while maintaining a relatively small model size for efficiency in retrieval tasks.
@@ -77,27 +92,46 @@ embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
77
  tesla_10k_collection = 'tesla-10k-2019-to-2023'
78
 
79
  # Example: Creating a collection with the correct dimensionality
80
- # tesla_10k_collection = Chroma.create_collection("tesla-10k-2019-to-2023", embedding_dim=384)
 
81
 
 
82
 
 
 
 
 
83
 
84
  # vector database constructor Chroma()
85
  vectorstore_persisted = Chroma(
86
- collection_name=tesla_10k_collection,
87
- persist_directory='./tesla_db',
88
- embedding_function=embedding_model
89
  )
90
 
 
91
  retriever = vectorstore_persisted.as_retriever(
92
- search_type='similarity',
93
- search_kwargs={'k': 5}
94
  )
 
 
 
 
 
 
 
 
 
95
 
96
- # Prepare the logging functionality
97
 
 
98
  log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
99
  log_folder = log_file.parent
 
 
100
 
 
101
  scheduler = CommitScheduler(
102
  repo_id="document-qna-chroma-openai-logs", # name of the log folder containing json elements --> HuggingFace dataset # OLD name: "document-qna-chroma-anyscale-logs",
103
  repo_type="dataset",
@@ -106,60 +140,115 @@ scheduler = CommitScheduler(
106
  every=2 # execute every two API calls
107
  )
108
 
 
 
 
109
  qna_system_message = """
110
  You are an assistant to a financial services firm who answers user queries on annual reports.
111
- Users will ask questions delimited by triple backticks, that is, ```.
112
  User input will have the context required by you to answer user questions.
113
  This context will begin with the token: ###Context.
114
  The context contains references to specific portions of a document relevant to the user query.
115
- Please answer only using the context provided in the input. However, do not mention anything about the context in your answer.
 
 
 
 
116
  If the answer is not found in the context, respond "I don't know".
117
  """
118
 
 
119
  qna_user_message_template = """
120
  ###Context
121
- Here are some documents that are relevant to the question.
122
  {context}
 
 
123
  ```
124
  {question}
125
  ```
126
  """
127
 
128
- # Define the predict function that runs when 'Submit' is clicked or when a API request is made
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
  def predict(user_input):
130
 
131
- relevant_document_chunks = retriever.invoke(user_input)
132
- # relevant_document_chunks = retriever.get_relevant_documents(query = user_input)
133
- context_list = [doc_chunk.page_content for doc_chunk in relevant_document_chunks]
134
- context_for_query = ". ".join(context_list)
 
 
 
 
 
 
 
 
 
 
 
135
 
 
 
 
 
 
 
 
136
  prompt = [
137
  {'role':'system', 'content': qna_system_message},
138
- {'role':'user', 'content': qna_user_message_template.format(
139
- context=context_for_query,
140
- question=user_input
141
  )
142
  }
143
  ]
144
 
145
  try:
146
  response = client.chat.completions.create(
147
- model=model_name, # previous model used: 'mlabonne/NeuralHermes-2.5-Mistral-7B',
148
  messages=prompt,
149
- temperature=0, # Temperature > 0 to encourage creative answer... Temperature = 0.7: A common setting that provides a balance between creativity and coherence.
150
- # max_tokens=200 # Limit the number of tokens in the response
151
  )
152
-
153
  prediction = response.choices[0].message.content.strip() # Access response attributes directly
154
-
 
 
155
  except Exception as e:
156
- prediction = e
 
157
 
158
  # While the prediction is made, log both the inputs and outputs to a local log file (i.e., HuggingFace dataset)
159
  # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
160
- # access (i.e., put a lock on the state of the log_file in case user are entering queries while the log operation is in progress.)
 
 
161
  # Note: the log_file is a json file.
162
-
163
  with scheduler.lock:
164
  with log_file.open("a") as f:
165
  # json.dumps turns the dictionary into a json string containing 'user_input', 'context_for_query', and 'prediction'
@@ -171,19 +260,31 @@ def predict(user_input):
171
  }
172
  ))
173
  f.write("\n") # write a new line to prepare for the next observation to be logged
174
-
175
- return prediction
 
 
 
 
176
 
177
 
178
- textbox = gr.Textbox(placeholder="Enter your query here", lines=6)
 
 
 
 
 
179
 
180
  # Create the interface
181
  demo = gr.Interface(
182
- inputs=textbox, fn=predict, outputs="text",
183
- title="Ask Me Anything (AMA) on Tesla 10-K statements",
184
- description="This web API presents an interface to ask questions on contents of the Tesla 10-K reports for the period 2019 - 2023.",
185
- article="Note that questions that are not relevant to the Tesla 10-K report will not be answered.",
186
- concurrency_limit=16
 
 
 
187
  )
188
 
189
 
@@ -200,5 +301,7 @@ demo = gr.Interface(
200
  # concurrency_limit=16
201
  # )
202
 
 
203
  demo.queue()
204
- demo.launch()
 
 
1
+ # +++
2
+ # Import the libraries
3
+ #---------------------------------------------------------------------------------------------------------
4
+ # Import libraries for issuing OS commands. In addition to the built-in format using the '!' scape character prefix
5
  import os
6
+ # Tokenizing and data formatting
7
  import uuid
8
  import json
9
+ # Data management
10
+ import numpy as np
11
+ import pandas as pd
12
+ # For File path operations
13
  from pathlib import Path
14
 
 
 
15
  # GUI components
16
  import gradio as gr
17
 
 
21
  from openai import OpenAI, OpenAIError
22
 
23
  # Embedding operations & Vector DB creation
24
+ # from langchain_core.documents import Document
25
  from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
26
  from langchain_community.vectorstores import Chroma
27
 
 
31
  # Set working directory (HuggingFace space)
32
  hf_space_dir = os.getcwd()
33
 
34
+ # Obtain current directory and data file path
35
+ hf_space_app_dir_path = Path.cwd()
36
+ print(f"HuggingFace Space application directory: {hf_space_app_dir_path}\n")
37
+
38
  # Anyscale model hosting platform NOT USED in this notebook. Open AI model hosting platform used instead.
39
  # client = OpenAI(
40
  # base_url="https://api.endpoints.anyscale.com/v1",
41
  # api_key=os.environ['ANYSCALE_API_KEY']
42
  # )
43
 
44
+ #--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
45
  # OpenAI API key stored as a "secret" HuggingFace Space
46
  # OPENAI_API_KEY
47
 
 
75
  # api_key=os.environ.get("OPENAI_API_KEY"),
76
  api_key=openai_api_key,
77
  )
 
78
 
79
+ print(f"OpenAI client created and authenticated with API key.\nUsing OpenAI model: {model_name}\n")
80
+ #-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
81
+
82
+ print(f"Loading Vector DB from HuggingFace Space file space...\n")
83
  # embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-small')
84
  # The gte-small model from OpenAI's family of models, which includes the GTE models designed for retrieval tasks, uses a specific number of embedding dimensions. The gte-small model has 384 embedding dimensions.
85
  # This dimensionality allows the model to capture semantic information effectively while maintaining a relatively small model size for efficiency in retrieval tasks.
 
92
  tesla_10k_collection = 'tesla-10k-2019-to-2023'
93
 
94
  # Example: Creating a collection with the correct dimensionality
95
+ # tesla_10k_collection = Chroma.create_collection("tesla-10k-2019-to-2023", embedding_dim=1024)
96
+
97
 
98
+ persisted_vectordb_path = Path.joinpath(hf_space_app_dir_path, 'tesla_db') # this is a pathlib object
99
 
100
+ # persisted_vectordb_location = persisted_vectordb_path # this is a pathlib object ... this produces error in the Chroma parameter "persist_directory", as it is expecting a string object, and not a pathlib object.
101
+ persisted_vectordb_location = str(persisted_vectordb_path) # convert path to string
102
+
103
+ print(f"Vector database location:\n{persisted_vectordb_location}\n")
104
 
105
  # vector database constructor Chroma()
106
  vectorstore_persisted = Chroma(
107
+ collection_name = tesla_10k_collection,
108
+ persist_directory = persisted_vectordb_location # './tesla_db',
109
+ embedding_function = embedding_model
110
  )
111
 
112
+ # Return VectorStoreRetriever initialized from this VectorStore.
113
  retriever = vectorstore_persisted.as_retriever(
114
+ search_type = 'similarity',
115
+ search_kwargs = {'k': 5}
116
  )
117
+ # Args:
118
+ # search_type (Optional[str]): Defines the type of search that the Retriever should perform.
119
+ # Can be "similarity" (default), "mmr", or "similarity_score_threshold".
120
+ # search_kwargs (Optional[Dict]): Keyword arguments to pass to the
121
+ # search function. Can include things like:
122
+ # k: Amount of documents to return (Default: 4)
123
+ # score_threshold: Minimum relevance threshold for similarity_score_threshold
124
+
125
+ print(f"Successfully obtained VectorStoreRetriever initialized from the Vector database.\n")
126
 
 
127
 
128
+ # Prepare the logging functionality
129
  log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
130
  log_folder = log_file.parent
131
+ print(f"\nLogging dataset information:\n\tlog_file: {log_file}\n\tlog_folder: {log_folder}\n")
132
+
133
 
134
+ # Scheduler will log every 2 API calls:
135
  scheduler = CommitScheduler(
136
  repo_id="document-qna-chroma-openai-logs", # name of the log folder containing json elements --> HuggingFace dataset # OLD name: "document-qna-chroma-anyscale-logs",
137
  repo_type="dataset",
 
140
  every=2 # execute every two API calls
141
  )
142
 
143
+ print(f"Retrieval Augmented Generation (RAG) Q&A\nLLM Prompt initialization... (System prompt and user_input template)\n")
144
+
145
+ # LLM System Prompt
146
  qna_system_message = """
147
  You are an assistant to a financial services firm who answers user queries on annual reports.
 
148
  User input will have the context required by you to answer user questions.
149
  This context will begin with the token: ###Context.
150
  The context contains references to specific portions of a document relevant to the user query.
151
+
152
+ User questions will begin with the token: ###Question, and the question text will be delimited by triple backticks, that is, ```.
153
+
154
+ Please answer only using the context provided in the input. Do not mention anything about the context in your final answer.
155
+
156
  If the answer is not found in the context, respond "I don't know".
157
  """
158
 
159
+ # LLM user_input template
160
  qna_user_message_template = """
161
  ###Context
162
+ Here are some documents that are relevant to the question mentioned below.
163
  {context}
164
+
165
+ ###Question
166
  ```
167
  {question}
168
  ```
169
  """
170
 
171
+ # ANOTHER VERSION:
172
+ # # LLM System Prompt
173
+ # qna_system_message = """
174
+ # You are an assistant to a financial services firm who answers user queries on annual reports.
175
+ # Users will ask questions delimited by triple backticks, that is, ```.
176
+ # User input will have the context required by you to answer user questions.
177
+ # This context will begin with the token: ###Context.
178
+ # The context contains references to specific portions of a document relevant to the user query.
179
+ # Please answer only using the context provided in the input. However, do not mention anything about the context in your answer.
180
+ # If the answer is not found in the context, respond "I don't know".
181
+ # """
182
+
183
+ # # LLM user_input template
184
+ # qna_user_message_template = """
185
+ # ###Context
186
+ # Here are some documents that are relevant to the question.
187
+ # {context}
188
+ # ```
189
+ # {question}
190
+ # ```
191
+ # """
192
+
193
+ # Define the "predict function" which will take the user_input, obtain the relevant context to answer the user question more accurately, and pass
194
+ # both to the OpenAI client to make predictions using the OpenAI LLM model
195
+ # The function runs when 'Submit' is clicked or when a API request is made
196
+ #-------------------------------------------------------------------------------------------------------------------------------------------------------------
197
  def predict(user_input):
198
 
199
+ # COMPOSING THE RESPONSE
200
+
201
+ # Retrieving relevant documents
202
+ relevant_document_chunks = retriever.get_relevant_documents(query = user_input) # relevant_document_chunks = retriever.invoke(user_input)
203
+ print(f"Relevant document chunks = {len(relevant_document_chunks)}")
204
+ print(f"RELEVANT DOCUMENT CHUNKS TO BE USED AS CONTEXT TO ANSWER THE USER QUESTION:\n")
205
+ print("-"*80)
206
+ i = 0
207
+ for document in relevant_document_chunks:
208
+ print(f"\nDocument chunk {i+1}:")
209
+ i += 1
210
+ print(f"Metadata:\nSource: {document.metadata['source']}\nPage: {document.metadata['page']}\n")
211
+ print(f"Page content:\n-------------")
212
+ print(document.page_content.replace('\t', ' ')) # replace all tabs used as separators by default with a single space
213
+ print("-"*80)
214
 
215
+ context_list = [doc_chunk.page_content for doc_chunk in relevant_document_chunks] # doc_chunk.page_content.replace('\t', ' ') # replace all tabs used as separators by default with a single space
216
+
217
+ context_for_query = ". ".join(context_list)
218
+ # (method)
219
+ # join(__iterable: Iterable[LiteralString], /) -> LiteralString
220
+ # join(__iterable: Iterable[str], /) -> str
221
+
222
  prompt = [
223
  {'role':'system', 'content': qna_system_message},
224
+ {'role': 'user', 'content': qna_user_message_template.format(
225
+ context = context_for_query,
226
+ question = user_input
227
  )
228
  }
229
  ]
230
 
231
  try:
232
  response = client.chat.completions.create(
233
+ model=model_name, # previous model used: 'mlabonne/NeuralHermes-2.5-Mistral-7B',
234
  messages=prompt,
235
+ temperature=0, # Temperature > 0 to encourage creative answer... Temperature = 0.7: A common setting that provides a balance between creativity and coherence.
236
+ # max_tokens=400 # Limit the number of tokens in the response
237
  )
 
238
  prediction = response.choices[0].message.content.strip() # Access response attributes directly
239
+ # Handle API errors
240
+ except openai.OpenAIError as e:
241
+ prediction = f'Sorry, I encountered the following OpenAI error: \n {e}'
242
  except Exception as e:
243
+ prediction = f'Sorry, I encountered the following error: \n {e}'
244
+
245
 
246
  # While the prediction is made, log both the inputs and outputs to a local log file (i.e., HuggingFace dataset)
247
  # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
248
+ # access (i.e., put a lock on the state of the log_file in case users are entering queries while the log operation is in progress.)
249
+ # Write user_input, context and prediction to a HuggingFace dataset repo for logging
250
+ # Each time we get a prediction we will determine if we should log it to a hugging_face dataset according to the scheduler definition outside this function
251
  # Note: the log_file is a json file.
 
252
  with scheduler.lock:
253
  with log_file.open("a") as f:
254
  # json.dumps turns the dictionary into a json string containing 'user_input', 'context_for_query', and 'prediction'
 
260
  }
261
  ))
262
  f.write("\n") # write a new line to prepare for the next observation to be logged
263
+
264
+ prediction_result = prediction
265
+ print(f"\nPrediction result: {prediction_result} - {type(prediction_result)}\n")
266
+
267
+ return (prediction_result)
268
+ #-------------------------------------------------------------------------------------------------------------------------------------------------------------
269
 
270
 
271
+ # Set up UI components for input and output
272
+ # Input components
273
+ user_question_textbox = gr.Textbox(placeholder="Enter your query here", lines=6)
274
+ # Output components
275
+ model_prediction = gr.Label(label="Model prediction")
276
+ # model_prediction = "text"
277
 
278
  # Create the interface
279
  demo = gr.Interface(
280
+ fn = predict,
281
+ inputs = user_question_textbox,
282
+ outputs = model_prediction # "text",
283
+ title = "Ask Me Anything (AMA) on Tesla 10-K statements",
284
+ description= " This web API presents an interface to ask questions about the contents of the Tesla 10-K reports for the period 2019 - 2023.",
285
+ article = "Note that questions that are not relevant to the Tesla 10-K report will not be answered.",
286
+ allow_flagging="auto", # automatically push to the HuggingFace Dataset
287
+ concurrency_limit = 16
288
  )
289
 
290
 
 
301
  # concurrency_limit=16
302
  # )
303
 
304
+ # Launch container hosted by HuggingFace with a load balancer
305
  demo.queue()
306
+ demo.launch(share=False)
307
+ # To create a public link, set "share=True" in launch() .... but if I execute this app.py locally, then I have to have my computer on for the public users to access the browser interface
requirements.txt CHANGED
@@ -1,5 +1,8 @@
1
  openai==1.23.2
2
  chromadb==0.4.22
3
- langchain==0.1.1
4
- langchain-community==0.0.13
5
- sentence-transformers==2.3.1
 
 
 
 
1
  openai==1.23.2
2
  chromadb==0.4.22
3
+ langchain==0.1.9
4
+ langchain-community==0.0.32
5
+ sentence-transformers==2.3.1
6
+ pathlib==1.0.1
7
+ pandas==2.1.4
8
+ numpy==1.26.4
tesla_db/chroma.sqlite3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06ed87d12e6ee1b0b1f1f81fdfa4576e32f095c86fb7a2a66d0200a7640da76a
3
- size 46223360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16fbd2946c0267a17248a40998bc5a3446a65dee1b8b5b356b6018da86cabbf0
3
+ size 47091712
tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:250d230dee83b006de1787c35948d598ddcffe236f81eafabbe31805e7469749
3
+ size 12708000
tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e07096989a6d97db01f79643d6aa43690488b6a066eb20e594135825e0e34a70
3
+ size 100
tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbc8823be5a3c3b78663ff93c9d05602df74d072b371471e88126f8df52eaabd
3
+ size 172004
tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe6a12fdf9e9d1787634155c2f404ffea281dfd121ddd597deb71e17317f2576
3
+ size 12000
tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6322a3a45d75cc9f31cc0eafac6458a325d4c8e51525be50cc57030505294c41
3
+ size 25736