Spaces:

epalvarez
/

QnA_Tesla_10k_Reports

Running

App Files Files Community

epalvarez commited on Aug 14, 2024

Commit

f8e9437

verified ·

1 Parent(s): 637e2cd

latest update of app.py, newer version of langchain package in requirements.txt, and tesla_db Vector database with 448 embedding dimensions and overlap of 24

Browse files

Files changed (8) hide show

app.py +142 -39
requirements.txt +6 -3
tesla_db/chroma.sqlite3 +2 -2
tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/data_level0.bin +3 -0
tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/header.bin +3 -0
tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/index_metadata.pickle +3 -0
tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/length.bin +3 -0
tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/link_lists.bin +3 -0

app.py CHANGED Viewed

@@ -1,10 +1,17 @@
 import os
 import uuid
 import json
 from pathlib import Path
-# ATTENTION: some versions in the requirements.txt file are more current than the ones used in the notebook
 # GUI components
 import gradio as gr
@@ -14,6 +21,7 @@ import gradio as gr
 from openai import OpenAI, OpenAIError
 # Embedding operations & Vector DB creation
 from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
 from langchain_community.vectorstores import Chroma
@@ -23,13 +31,17 @@ from huggingface_hub import CommitScheduler
 # Set working directory (HuggingFace space)
 hf_space_dir = os.getcwd()
 # Anyscale model hosting platform NOT USED in this notebook.  Open AI model hosting platform used instead.
 # client = OpenAI(
 #     base_url="https://api.endpoints.anyscale.com/v1",
 #     api_key=os.environ['ANYSCALE_API_KEY']
 # )
-#----------------------------------------------------------------------
 # OpenAI API key stored as a "secret" HuggingFace Space
 # OPENAI_API_KEY
@@ -63,8 +75,11 @@ client = OpenAI(
     # api_key=os.environ.get("OPENAI_API_KEY"),
     api_key=openai_api_key,
 )
-#---------------------------------------------------------------------
 # embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-small')
 # The gte-small model from OpenAI's family of models, which includes the GTE models designed for retrieval tasks, uses a specific number of embedding dimensions. The gte-small model has 384 embedding dimensions.
 # This dimensionality allows the model to capture semantic information effectively while maintaining a relatively small model size for efficiency in retrieval tasks.
@@ -77,27 +92,46 @@ embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
 tesla_10k_collection = 'tesla-10k-2019-to-2023'
 # Example: Creating a collection with the correct dimensionality
-# tesla_10k_collection = Chroma.create_collection("tesla-10k-2019-to-2023", embedding_dim=384)
 # vector database constructor Chroma()
 vectorstore_persisted = Chroma(
-    collection_name=tesla_10k_collection,
-    persist_directory='./tesla_db',
-    embedding_function=embedding_model
 )
 retriever = vectorstore_persisted.as_retriever(
-    search_type='similarity',
-    search_kwargs={'k': 5}
 )
-# Prepare the logging functionality
 log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
 log_folder = log_file.parent
 scheduler = CommitScheduler(
     repo_id="document-qna-chroma-openai-logs",      # name of the log folder containing json elements -->  HuggingFace dataset       # OLD name: "document-qna-chroma-anyscale-logs",
     repo_type="dataset",
@@ -106,60 +140,115 @@ scheduler = CommitScheduler(
     every=2                                         # execute every two API calls
 )
 qna_system_message = """
 You are an assistant to a financial services firm who answers user queries on annual reports.
-Users will ask questions delimited by triple backticks, that is, ```.
 User input will have the context required by you to answer user questions.
 This context will begin with the token: ###Context.
 The context contains references to specific portions of a document relevant to the user query.
-Please answer only using the context provided in the input. However, do not mention anything about the context in your answer.
 If the answer is not found in the context, respond "I don't know".
 """
 qna_user_message_template = """
 ###Context
-Here are some documents that are relevant to the question.
 {context}
 ```
 {question}
 ```
 """
-# Define the predict function that runs when 'Submit' is clicked or when a API request is made
 def predict(user_input):
-    relevant_document_chunks = retriever.invoke(user_input)
-    # relevant_document_chunks = retriever.get_relevant_documents(query = user_input)
-    context_list = [doc_chunk.page_content for doc_chunk in relevant_document_chunks]
-    context_for_query = ". ".join(context_list)
     prompt = [
         {'role':'system', 'content': qna_system_message},
-        {'role':'user', 'content': qna_user_message_template.format(
-            context=context_for_query,
-            question=user_input
             )
         }
     ]
     try:
         response = client.chat.completions.create(
-            model=model_name,                # previous model used: 'mlabonne/NeuralHermes-2.5-Mistral-7B',
             messages=prompt,
-            temperature=0,                   # Temperature > 0 to encourage creative answer... Temperature = 0.7: A common setting that provides a balance between creativity and coherence.
-            # max_tokens=200                 # Limit the number of tokens in the response
         )
         prediction = response.choices[0].message.content.strip()   # Access response attributes directly
     except Exception as e:
-        prediction = e
     # While the prediction is made, log both the inputs and outputs to a local log file (i.e., HuggingFace dataset)
     # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
-    # access (i.e., put a lock on the state of the log_file in case user are entering queries while the log operation is in progress.)
     # Note: the log_file is a json file.
     with scheduler.lock:
         with log_file.open("a") as f:
             # json.dumps turns the dictionary into a json string containing 'user_input', 'context_for_query', and 'prediction'
@@ -171,19 +260,31 @@ def predict(user_input):
                 }
             ))
             f.write("\n")     # write a new line to prepare for the next observation to be logged
-    return prediction
-textbox = gr.Textbox(placeholder="Enter your query here", lines=6)
 # Create the interface
 demo = gr.Interface(
-    inputs=textbox, fn=predict, outputs="text",
-    title="Ask Me Anything (AMA) on Tesla 10-K statements",
-    description="This web API presents an interface to ask questions on contents of the Tesla 10-K reports for the period 2019 - 2023.",
-    article="Note that questions that are not relevant to the Tesla 10-K report will not be answered.",
-    concurrency_limit=16
 )
@@ -200,5 +301,7 @@ demo = gr.Interface(
 #     concurrency_limit=16
 # )
 demo.queue()
-demo.launch()

+# +++
+# Import the libraries
+#---------------------------------------------------------------------------------------------------------
+# Import libraries for issuing OS commands.  In addition to the built-in format using the '!' scape character prefix
 import os
+# Tokenizing and data formatting
 import uuid
 import json
+# Data management
+import numpy as np
+import pandas as pd
+# For File path operations
 from pathlib import Path
 # GUI components
 import gradio as gr
 from openai import OpenAI, OpenAIError
 # Embedding operations & Vector DB creation
+# from langchain_core.documents import Document
 from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
 from langchain_community.vectorstores import Chroma
 # Set working directory (HuggingFace space)
 hf_space_dir = os.getcwd()
+# Obtain current directory and data file path
+hf_space_app_dir_path = Path.cwd()
+print(f"HuggingFace Space application directory: {hf_space_app_dir_path}\n")
 # Anyscale model hosting platform NOT USED in this notebook.  Open AI model hosting platform used instead.
 # client = OpenAI(
 #     base_url="https://api.endpoints.anyscale.com/v1",
 #     api_key=os.environ['ANYSCALE_API_KEY']
 # )
+#--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 # OpenAI API key stored as a "secret" HuggingFace Space
 # OPENAI_API_KEY
     # api_key=os.environ.get("OPENAI_API_KEY"),
     api_key=openai_api_key,
 )
+print(f"OpenAI client created and authenticated with API key.\nUsing OpenAI model: {model_name}\n")
+#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+print(f"Loading Vector DB from HuggingFace Space file space...\n")
 # embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-small')
 # The gte-small model from OpenAI's family of models, which includes the GTE models designed for retrieval tasks, uses a specific number of embedding dimensions. The gte-small model has 384 embedding dimensions.
 # This dimensionality allows the model to capture semantic information effectively while maintaining a relatively small model size for efficiency in retrieval tasks.
 tesla_10k_collection = 'tesla-10k-2019-to-2023'
 # Example: Creating a collection with the correct dimensionality
+# tesla_10k_collection = Chroma.create_collection("tesla-10k-2019-to-2023", embedding_dim=1024)
+persisted_vectordb_path = Path.joinpath(hf_space_app_dir_path, 'tesla_db')   # this is a pathlib object
+# persisted_vectordb_location = persisted_vectordb_path   # this is a pathlib object  ... this produces error in the Chroma parameter "persist_directory", as it is expecting a string object, and not a pathlib object.
+persisted_vectordb_location = str(persisted_vectordb_path)   # convert path to string
+print(f"Vector database location:\n{persisted_vectordb_location}\n")
 # vector database constructor Chroma()
 vectorstore_persisted = Chroma(
+    collection_name = tesla_10k_collection,
+    persist_directory = persisted_vectordb_location     # './tesla_db',
+    embedding_function = embedding_model
 )
+# Return VectorStoreRetriever initialized from this VectorStore.
 retriever = vectorstore_persisted.as_retriever(
+    search_type = 'similarity',
+    search_kwargs = {'k': 5}
 )
+# Args:
+#     search_type (Optional[str]): Defines the type of search that the Retriever should perform.
+# Can be "similarity" (default), "mmr", or "similarity_score_threshold".
+#     search_kwargs (Optional[Dict]): Keyword arguments to pass to the
+#         search function. Can include things like:
+#             k: Amount of documents to return (Default: 4)
+#             score_threshold: Minimum relevance threshold for similarity_score_threshold
+print(f"Successfully obtained VectorStoreRetriever initialized from the Vector database.\n")
+# Prepare the logging functionality
 log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
 log_folder = log_file.parent
+print(f"\nLogging dataset information:\n\tlog_file: {log_file}\n\tlog_folder: {log_folder}\n")
+# Scheduler will log every 2 API calls:
 scheduler = CommitScheduler(
     repo_id="document-qna-chroma-openai-logs",      # name of the log folder containing json elements -->  HuggingFace dataset       # OLD name: "document-qna-chroma-anyscale-logs",
     repo_type="dataset",
     every=2                                         # execute every two API calls
 )
+print(f"Retrieval Augmented Generation (RAG) Q&A\nLLM Prompt initialization... (System prompt and user_input template)\n")
+# LLM System Prompt
 qna_system_message = """
 You are an assistant to a financial services firm who answers user queries on annual reports.
 User input will have the context required by you to answer user questions.
 This context will begin with the token: ###Context.
 The context contains references to specific portions of a document relevant to the user query.
+User questions will begin with the token: ###Question, and the question text will be delimited by triple backticks, that is, ```.
+Please answer only using the context provided in the input. Do not mention anything about the context in your final answer.
 If the answer is not found in the context, respond "I don't know".
 """
+# LLM user_input template
 qna_user_message_template = """
 ###Context
+Here are some documents that are relevant to the question mentioned below.
 {context}
+###Question
 ```
 {question}
 ```
 """
+# ANOTHER VERSION:
+# # LLM System Prompt
+# qna_system_message = """
+# You are an assistant to a financial services firm who answers user queries on annual reports.
+# Users will ask questions delimited by triple backticks, that is, ```.
+# User input will have the context required by you to answer user questions.
+# This context will begin with the token: ###Context.
+# The context contains references to specific portions of a document relevant to the user query.
+# Please answer only using the context provided in the input. However, do not mention anything about the context in your answer.
+# If the answer is not found in the context, respond "I don't know".
+# """
+# # LLM user_input template
+# qna_user_message_template = """
+# ###Context
+# Here are some documents that are relevant to the question.
+# {context}
+# ```
+# {question}
+# ```
+# """
+# Define the "predict function" which will take the user_input, obtain the relevant context to answer the user question more accurately, and pass
+# both to the OpenAI client to make predictions using the OpenAI LLM model
+# The function runs when 'Submit' is clicked or when a API request is made
+#-------------------------------------------------------------------------------------------------------------------------------------------------------------
 def predict(user_input):
+    # COMPOSING THE RESPONSE
+    # Retrieving relevant documents
+    relevant_document_chunks = retriever.get_relevant_documents(query = user_input)      # relevant_document_chunks = retriever.invoke(user_input)
+    print(f"Relevant document chunks = {len(relevant_document_chunks)}")
+    print(f"RELEVANT DOCUMENT CHUNKS TO BE USED AS CONTEXT TO ANSWER THE USER QUESTION:\n")
+    print("-"*80)
+    i = 0
+    for document in relevant_document_chunks:
+        print(f"\nDocument chunk {i+1}:")
+        i += 1
+        print(f"Metadata:\nSource: {document.metadata['source']}\nPage: {document.metadata['page']}\n")
+        print(f"Page content:\n-------------")
+        print(document.page_content.replace('\t', ' '))   # replace all tabs used as separators by default with a single space
+        print("-"*80)
+    context_list = [doc_chunk.page_content for doc_chunk in relevant_document_chunks]   # doc_chunk.page_content.replace('\t', ' ')   # replace all tabs used as separators by default with a single space
+    context_for_query = ". ".join(context_list)
+    # (method)
+    # join(__iterable: Iterable[LiteralString], /) -> LiteralString
+    # join(__iterable: Iterable[str], /) -> str
     prompt = [
         {'role':'system', 'content': qna_system_message},
+        {'role': 'user', 'content': qna_user_message_template.format(
+            context = context_for_query,
+            question = user_input
             )
         }
     ]
     try:
         response = client.chat.completions.create(
+            model=model_name,               # previous model used: 'mlabonne/NeuralHermes-2.5-Mistral-7B',
             messages=prompt,
+            temperature=0,                  # Temperature > 0 to encourage creative answer... Temperature = 0.7: A common setting that provides a balance between creativity and coherence.
+            # max_tokens=400                # Limit the number of tokens in the response
         )
         prediction = response.choices[0].message.content.strip()   # Access response attributes directly
+    # Handle API errors
+    except openai.OpenAIError as e:
+        prediction = f'Sorry, I encountered the following OpenAI error: \n {e}'
     except Exception as e:
+        prediction = f'Sorry, I encountered the following error: \n {e}'
     # While the prediction is made, log both the inputs and outputs to a local log file (i.e., HuggingFace dataset)
     # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
+    # access (i.e., put a lock on the state of the log_file in case users are entering queries while the log operation is in progress.)
+    # Write user_input, context and prediction to a HuggingFace dataset repo for logging
+    # Each time we get a prediction we will determine if we should log it to a hugging_face dataset according to the scheduler definition outside this function
     # Note: the log_file is a json file.
     with scheduler.lock:
         with log_file.open("a") as f:
             # json.dumps turns the dictionary into a json string containing 'user_input', 'context_for_query', and 'prediction'
                 }
             ))
             f.write("\n")     # write a new line to prepare for the next observation to be logged
+    prediction_result = prediction
+    print(f"\nPrediction result: {prediction_result} - {type(prediction_result)}\n")
+    return (prediction_result)
+#-------------------------------------------------------------------------------------------------------------------------------------------------------------
+# Set up UI components for input and output
+# Input components
+user_question_textbox = gr.Textbox(placeholder="Enter your query here", lines=6)
+# Output components
+model_prediction = gr.Label(label="Model prediction")
+# model_prediction = "text"
 # Create the interface
 demo = gr.Interface(
+    fn = predict,
+    inputs = user_question_textbox,
+    outputs = model_prediction    # "text",
+    title = "Ask Me Anything (AMA) on Tesla 10-K statements",
+    description= " This web API presents an interface to ask questions about the contents of the Tesla 10-K reports for the period 2019 - 2023.",
+    article = "Note that questions that are not relevant to the Tesla 10-K report will not be answered.",
+    allow_flagging="auto",    # automatically push to the HuggingFace Dataset
+    concurrency_limit = 16
 )
 #     concurrency_limit=16
 # )
+# Launch container hosted by HuggingFace with a load balancer
 demo.queue()
+demo.launch(share=False)
+# To create a public link, set "share=True" in launch() ....  but if I execute this app.py locally, then I have to have my computer on for the public users to access the browser interface

requirements.txt CHANGED Viewed

@@ -1,5 +1,8 @@
 openai==1.23.2
 chromadb==0.4.22
-langchain==0.1.1
-langchain-community==0.0.13
-sentence-transformers==2.3.1

 openai==1.23.2
 chromadb==0.4.22
+langchain==0.1.9
+langchain-community==0.0.32
+sentence-transformers==2.3.1
+pathlib==1.0.1
+pandas==2.1.4
+numpy==1.26.4

tesla_db/chroma.sqlite3 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:06ed87d12e6ee1b0b1f1f81fdfa4576e32f095c86fb7a2a66d0200a7640da76a
-size 46223360

 version https://git-lfs.github.com/spec/v1
+oid sha256:16fbd2946c0267a17248a40998bc5a3446a65dee1b8b5b356b6018da86cabbf0
+size 47091712

tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:250d230dee83b006de1787c35948d598ddcffe236f81eafabbe31805e7469749
+size 12708000

tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e07096989a6d97db01f79643d6aa43690488b6a066eb20e594135825e0e34a70
+size 100

tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/index_metadata.pickle ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbc8823be5a3c3b78663ff93c9d05602df74d072b371471e88126f8df52eaabd
+size 172004

tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe6a12fdf9e9d1787634155c2f404ffea281dfd121ddd597deb71e17317f2576
+size 12000

tesla_db/e8d4536e-a48d-4048-ad1d-9aaa0bb6ec69/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6322a3a45d75cc9f31cc0eafac6458a325d4c8e51525be50cc57030505294c41
+size 25736