Spaces:

epalvarez
/

QnA_Tesla_10k_Reports

Sleeping

File size: 16,230 Bytes

f8e9437
 
 
 
4c20ecd
f8e9437
4c20ecd
 
f8e9437
 
 
 
4c20ecd
 
 
 
 
 
 
 
 
 
 
f8e9437
4c20ecd
 
 
 
 
 
 
 
 
f8e9437
 
 
 
4c20ecd
 
 
 
 
 
f8e9437
4c20ecd
 
 
 
 
c3f98dc
4c20ecd
a5e0b85
4c20ecd
a5e0b85
4c20ecd
c3f98dc
4c20ecd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8e9437
 
 
 
637e2cd
 
 
 
 
 
 
 
4c20ecd
 
 
637e2cd
f8e9437
 
637e2cd
f8e9437
637e2cd
f8e9437
 
 
 
637e2cd
4c20ecd
 
f8e9437
0d2f87c
f8e9437
4c20ecd
 
f8e9437
4c20ecd
f8e9437
 
4c20ecd
f8e9437
 
 
 
 
 
 
 
 
4c20ecd
 
f8e9437
4c20ecd
 
f8e9437
 
4c20ecd
f8e9437
4c20ecd
 
 
 
 
 
a6645e0
4c20ecd
f8e9437
 
 
4c20ecd
 
 
 
 
f8e9437
 
 
 
 
4c20ecd
 
 
f8e9437
4c20ecd
 
f8e9437
4c20ecd
f8e9437
 
4c20ecd
 
 
 
 
f8e9437
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c20ecd
 
f8e9437
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4c20ecd
f8e9437
 
 
 
 
 
 
4c20ecd
 
f8e9437
 
 
4c20ecd
 
 
 
 
 
f8e9437
4c20ecd
f8e9437
 
4c20ecd
 
f8e9437
 
 
4c20ecd
f8e9437
 
4c20ecd
 
 
f8e9437
 
 
4c20ecd
 
 
 
 
 
 
 
 
 
 
 
f8e9437
 
 
 
 
 
4c20ecd
 
f8e9437
 
64a3045
f8e9437
64a3045
2e20e5d
296e060
 
4c20ecd
 
 
f8e9437
 
fecb7e2
f8e9437
 
 
64a3045
8d0a45d
64a3045
 
8d0a45d
f8e9437
 
4c20ecd
 
a5e0b85
 
 
 
 
 
 
 
 
 
 
 
 
 
f8e9437
4c20ecd
f8e9437
296e060

# +++
# Import the libraries
#---------------------------------------------------------------------------------------------------------
# Import libraries for issuing OS commands.  In addition to the built-in format using the '!' scape character prefix
import os
# Tokenizing and data formatting
import uuid
import json
# Data management
import numpy as np
import pandas as pd
# For File path operations
from pathlib import Path

# GUI components
import gradio as gr

# Accessing external environment with endpoint and secret
# Using openai as a dummy container to connect to an endpoint and send HTTP requests (note: one could also use the python "request" package functions)
# Accessing OpenAI Model hosting platform
from openai import OpenAI, OpenAIError

# Embedding operations & Vector DB creation
# from langchain_core.documents import Document
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_community.vectorstores import Chroma

# HuggingFace platform
from huggingface_hub import CommitScheduler

# Set working directory (HuggingFace space)
hf_space_dir = os.getcwd()

# Obtain current directory and data file path
hf_space_app_dir_path = Path.cwd()
print(f"HuggingFace Space application directory: {hf_space_app_dir_path}\n")

# Anyscale model hosting platform NOT USED in this notebook.  Open AI model hosting platform used instead.
# client = OpenAI(
#     base_url="https://api.endpoints.anyscale.com/v1",
#     api_key=os.environ['ANYSCALE_API_KEY']
# )

#--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# OpenAI API key stored as a "secret" HuggingFace Space
# OPENAI_API_KEY 

#import os
# Set your API key from an environment variable: get("OPENAI_API_KEY")
# environ is a dictionary with all the environment variables, so to access the value, provide the key
# app.py should have access to the OpenAI key stored as a HuggingFace secret in the HuggingFace space where the app.py will be uploaded, by accessing it through the environment variables
openai_api_key = os.getenv("OpenAI_API_key_GL_Adv_Python_Project")  
# or:
# openai_api_key = os.environ.get("OpenAI_API_key_GL_Adv_Python_Project")
# or:
# openai_api_key = os.environ("OpenAI_API_key_GL_Adv_Python_Project")  # this produces error while "starting" app.py in HuggingFace:  TypeError: '_Environ' object is not callable

# Set Up Your API Key: You'll need an API key from OpenAI. You can obtain one by signing up on the OpenAI website and navigating to your API keys in the dashboard.
# This approach ensures that your API key remains secure and is not exposed in your code.

# OpenAI models:

# model_name = "gpt-4o"           # Cost:  Input: $5 / 1M tokens     ;  Output: $15 / 1M tokens
                                # GPT-4o is OpenAI's most advanced multimodal model that’s faster and cheaper than GPT-4 Turbo with stronger vision capabilities.
                                # The model has 128K context and an October 2023 knowledge cutoff.

model_name = "gpt-4o-mini"      # Cost:  Input: $0.15 / 1M tokens  ;  Output: $0.60 / 1M tokens
                                # GPT-4o mini is our most cost-efficient small model that’s smarter and cheaper than GPT-3.5 Turbo, and has vision capabilities.
                                # The model has 128K context and an October 2023 knowledge cutoff.

# Create an OpenAI Client: setting up the client with new version of OpenAI Python library - version OpenAI 1.0.0 and above

client = OpenAI(
    # This is the default and can be omitted
    # api_key=os.environ.get("OPENAI_API_KEY"),
    api_key=openai_api_key,
)

print(f"OpenAI client created and authenticated with API key.\nUsing OpenAI model: {model_name}\n")
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

print(f"Loading Vector DB from HuggingFace Space file space...\n")
# embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-small')  
# The gte-small model from OpenAI's family of models, which includes the GTE models designed for retrieval tasks, uses a specific number of embedding dimensions. The gte-small model has 384 embedding dimensions.
# This dimensionality allows the model to capture semantic information effectively while maintaining a relatively small model size for efficiency in retrieval tasks.

# However the vector database was encoded with model 'gte-large' which has 1024 embedding dimensions, so we need to use gte-large model here to match the embedding dimensions of the tesla_db vector database
# otherwise we get the following runtime error: "chromadb.errors.InvalidDimensionException: Embedding dimension 384 does not match collection dimensionality 1024"
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')


tesla_10k_collection = 'tesla-10k-2019-to-2023'

# Example: Creating a collection with the correct dimensionality
# tesla_10k_collection = Chroma.create_collection("tesla-10k-2019-to-2023", embedding_dim=1024)


persisted_vectordb_path = Path.joinpath(hf_space_app_dir_path, 'tesla_db')   # this is a pathlib object

# persisted_vectordb_location = persisted_vectordb_path   # this is a pathlib object  ... this produces error in the Chroma parameter "persist_directory", as it is expecting a string object, and not a pathlib object.
persisted_vectordb_location = str(persisted_vectordb_path)   # convert path to string

print(f"Vector database location:\n{persisted_vectordb_location}\n")

# vector database constructor Chroma()
vectorstore_persisted = Chroma(
    collection_name = tesla_10k_collection,
    persist_directory = persisted_vectordb_location,     # './tesla_db',
    embedding_function = embedding_model
)

# Return VectorStoreRetriever initialized from this VectorStore.
retriever = vectorstore_persisted.as_retriever(
    search_type = 'similarity',
    search_kwargs = {'k': 5}
)
# Args:
#     search_type (Optional[str]): Defines the type of search that the Retriever should perform.
# Can be "similarity" (default), "mmr", or "similarity_score_threshold".
#     search_kwargs (Optional[Dict]): Keyword arguments to pass to the
#         search function. Can include things like:
#             k: Amount of documents to return (Default: 4)
#             score_threshold: Minimum relevance threshold for similarity_score_threshold

print(f"Successfully obtained VectorStoreRetriever initialized from the Vector database.\n")


# Prepare the logging functionality
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_folder = log_file.parent
print(f"\nLogging dataset information:\n\tlog_file: {log_file}\n\tlog_folder: {log_folder}\n")


# Scheduler will log every 2 API calls:
scheduler = CommitScheduler(
    repo_id="document-qna-chroma-openai-logs",      # name of the log folder containing json elements -->  HuggingFace dataset       # OLD name: "document-qna-chroma-anyscale-logs",
    repo_type="dataset",
    folder_path=log_folder,
    path_in_repo="data",
    every=2                                         # execute every two API calls
)

print(f"Retrieval Augmented Generation (RAG) Q&A\nLLM Prompt initialization... (System prompt and user_input template)\n")

# LLM System Prompt
qna_system_message = """

You are an assistant to a financial services firm who answers user queries on annual reports.

User input will have the context required by you to answer user questions.

This context will begin with the token: ###Context.

The context contains references to specific portions of a document relevant to the user query.



User questions will begin with the token: ###Question, and the question text will be delimited by triple backticks, that is, ```.



Please answer only using the context provided in the input. Do not mention anything about the context in your final answer.



If the answer is not found in the context, respond "I don't know".

"""

# LLM user_input template
qna_user_message_template = """

###Context

Here are some documents that are relevant to the question mentioned below.

{context}



###Question

```

{question}

```

"""

# ANOTHER VERSION:
# # LLM System Prompt
# qna_system_message = """
# You are an assistant to a financial services firm who answers user queries on annual reports.
# Users will ask questions delimited by triple backticks, that is, ```.
# User input will have the context required by you to answer user questions.
# This context will begin with the token: ###Context.
# The context contains references to specific portions of a document relevant to the user query.
# Please answer only using the context provided in the input. However, do not mention anything about the context in your answer. 
# If the answer is not found in the context, respond "I don't know".
# """

# # LLM user_input template
# qna_user_message_template = """
# ###Context
# Here are some documents that are relevant to the question.
# {context}
# ```
# {question}
# ```
# """

# Define the "predict function" which will take the user_input, obtain the relevant context to answer the user question more accurately, and pass
# both to the OpenAI client to make predictions using the OpenAI LLM model
# The function runs when 'Submit' is clicked or when a API request is made
#-------------------------------------------------------------------------------------------------------------------------------------------------------------
def predict(user_input):

    # COMPOSING THE RESPONSE

    # Retrieving relevant documents
    relevant_document_chunks = retriever.get_relevant_documents(query = user_input)      # relevant_document_chunks = retriever.invoke(user_input)
    print(f"Relevant document chunks = {len(relevant_document_chunks)}")
    print(f"RELEVANT DOCUMENT CHUNKS TO BE USED AS CONTEXT TO ANSWER THE USER QUESTION:\n")
    print("-"*80)
    i = 0
    for document in relevant_document_chunks:
        print(f"\nDocument chunk {i+1}:")
        i += 1
        print(f"Metadata:\nSource: {document.metadata['source']}\nPage: {document.metadata['page']}\n")
        print(f"Page content:\n-------------")
        print(document.page_content.replace('\t', ' '))   # replace all tabs used as separators by default with a single space
        print("-"*80)
    
    context_list = [doc_chunk.page_content for doc_chunk in relevant_document_chunks]   # doc_chunk.page_content.replace('\t', ' ')   # replace all tabs used as separators by default with a single space

    context_for_query = ". ".join(context_list)
    # (method)
    # join(__iterable: Iterable[LiteralString], /) -> LiteralString
    # join(__iterable: Iterable[str], /) -> str

    prompt = [
        {'role':'system', 'content': qna_system_message},
        {'role': 'user', 'content': qna_user_message_template.format(
            context = context_for_query,
            question = user_input
            )
        }
    ]

    try:
        response = client.chat.completions.create(
            model=model_name,               # previous model used: 'mlabonne/NeuralHermes-2.5-Mistral-7B',
            messages=prompt,
            temperature=0,                  # Temperature > 0 to encourage creative answer... Temperature = 0.7: A common setting that provides a balance between creativity and coherence.
            # max_tokens=400                # Limit the number of tokens in the response
        )
        prediction = response.choices[0].message.content.strip()   # Access response attributes directly
    # Handle API errors
    except openai.OpenAIError as e:
        prediction = f'Sorry, I encountered the following OpenAI error: \n {e}'
    except Exception as e:
        prediction = f'Sorry, I encountered the following error: \n {e}'


    # While the prediction is made, log both the inputs and outputs to a local log file (i.e., HuggingFace dataset)
    # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
    # access (i.e., put a lock on the state of the log_file in case users are entering queries while the log operation is in progress.)
    # Write user_input, context and prediction to a HuggingFace dataset repo for logging
    # Each time we get a prediction we will determine if we should log it to a hugging_face dataset according to the scheduler definition outside this function
    # Note: the log_file is a json file.
    with scheduler.lock:
        with log_file.open("a") as f:
            # json.dumps turns the dictionary into a json string containing 'user_input', 'context_for_query', and 'prediction'
            f.write(json.dumps(
                {
                    'user_input': user_input,
                    'retrieved_context': context_for_query,
                    'model_response': prediction
                }
            ))
            f.write("\n")     # write a new line to prepare for the next observation to be logged

    prediction_result = prediction
    print(f"\nPrediction result: {prediction_result} - {type(prediction_result)}\n")

    return (prediction_result)
#-------------------------------------------------------------------------------------------------------------------------------------------------------------


# Set up UI components for input and output
# Input components
user_question_textbox = gr.Textbox(label="User question", placeholder="Enter your query here", lines=6)
# Output components
# model_prediction = "text"   # OK, works.
model_prediction = gr.Textbox(label="Model prediction", placeholder="Model prediction will show here", lines=6)
# model_prediction = gr.Label(label="Model prediction")  # This produces large font (not appropriate UI component)


# Create the interface
demo = gr.Interface(
    fn = predict, 
    inputs = user_question_textbox, 
    outputs = model_prediction,    # "text",
    title = "Ask Me Anything (AMA) on Tesla 10-K statements",
    description= " This web API presents an interface to ask questions about the contents of the Tesla 10-K reports for the period 2019 - 2023.",
    article = "Note that questions that are not relevant to the Tesla 10-K report will not be answered.",
    examples=[["What was the total revenue of the company in 2022?", ""],
            ["Summarize the Management Discussion and Analysis section of the 2021 report in 50 words.", ""],
            ["What was the company's debt level in 2021?", ""],
            ["What are the risks related to the company's ability to grow its business in 2023? Respond with bullet point summaries.", ""]
    ],    
    allow_flagging="auto",    # automatically push to the HuggingFace Dataset
    concurrency_limit = 16
)


# demo = gr.Interface(
#     inputs=textbox, fn=predict, outputs="text",
#     title="Ask Me Anything (AMA) on Tesla 10-K statements",
#     description="This web API presents an interface to ask questions on contents of the Tesla 10-K reports for the period 2019 - 2023.",
#     article="Note that questions that are not relevant to the Tesla 10-K report will not be answered.",
#     examples=[["What was the total revenue of the company in 2022?", "$ 81.46 Billion"],
#             ["Summarize the Management Discussion and Analysis section of the 2021 report in 50 words.", ""],
#             ["What was the company's debt level in 2020?", ""],
#             ["Identify 5 key risks identified in the 2019 10k report? Respond with bullet point summaries.", ""]
#     ],
#     concurrency_limit=16
# )

# Launch container hosted by HuggingFace with a load balancer
demo.queue()
demo.launch(share=False)
# To create a public link, set "share=True" in launch() ....  but if I execute this app.py locally, then I have to have my computer on for the public users to access the browser interface

# +++