Spaces:
Sleeping
Sleeping
# +++ | |
# Import the libraries | |
#--------------------------------------------------------------------------------------------------------- | |
# Import libraries for issuing OS commands. In addition to the built-in format using the '!' scape character prefix | |
import os | |
# Tokenizing and data formatting | |
import uuid | |
import json | |
# Data management | |
import numpy as np | |
import pandas as pd | |
# For File path operations | |
from pathlib import Path | |
# GUI components | |
import gradio as gr | |
# Accessing external environment with endpoint and secret | |
# Using openai as a dummy container to connect to an endpoint and send HTTP requests (note: one could also use the python "request" package functions) | |
# Accessing OpenAI Model hosting platform | |
from openai import OpenAI, OpenAIError | |
# Embedding operations & Vector DB creation | |
# from langchain_core.documents import Document | |
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings | |
from langchain_community.vectorstores import Chroma | |
# HuggingFace platform | |
from huggingface_hub import CommitScheduler | |
# Set working directory (HuggingFace space) | |
hf_space_dir = os.getcwd() | |
# Obtain current directory and data file path | |
hf_space_app_dir_path = Path.cwd() | |
print(f"HuggingFace Space application directory: {hf_space_app_dir_path}\n") | |
# Anyscale model hosting platform NOT USED in this notebook. Open AI model hosting platform used instead. | |
# client = OpenAI( | |
# base_url="https://api.endpoints.anyscale.com/v1", | |
# api_key=os.environ['ANYSCALE_API_KEY'] | |
# ) | |
#-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
# OpenAI API key stored as a "secret" HuggingFace Space | |
# OPENAI_API_KEY | |
#import os | |
# Set your API key from an environment variable: get("OPENAI_API_KEY") | |
# environ is a dictionary with all the environment variables, so to access the value, provide the key | |
# app.py should have access to the OpenAI key stored as a HuggingFace secret in the HuggingFace space where the app.py will be uploaded, by accessing it through the environment variables | |
openai_api_key = os.getenv("OpenAI_API_key_GL_Adv_Python_Project") | |
# or: | |
# openai_api_key = os.environ.get("OpenAI_API_key_GL_Adv_Python_Project") | |
# or: | |
# openai_api_key = os.environ("OpenAI_API_key_GL_Adv_Python_Project") # this produces error while "starting" app.py in HuggingFace: TypeError: '_Environ' object is not callable | |
# Set Up Your API Key: You'll need an API key from OpenAI. You can obtain one by signing up on the OpenAI website and navigating to your API keys in the dashboard. | |
# This approach ensures that your API key remains secure and is not exposed in your code. | |
# OpenAI models: | |
# model_name = "gpt-4o" # Cost: Input: $5 / 1M tokens ; Output: $15 / 1M tokens | |
# GPT-4o is OpenAI's most advanced multimodal model that’s faster and cheaper than GPT-4 Turbo with stronger vision capabilities. | |
# The model has 128K context and an October 2023 knowledge cutoff. | |
model_name = "gpt-4o-mini" # Cost: Input: $0.15 / 1M tokens ; Output: $0.60 / 1M tokens | |
# GPT-4o mini is our most cost-efficient small model that’s smarter and cheaper than GPT-3.5 Turbo, and has vision capabilities. | |
# The model has 128K context and an October 2023 knowledge cutoff. | |
# Create an OpenAI Client: setting up the client with new version of OpenAI Python library - version OpenAI 1.0.0 and above | |
client = OpenAI( | |
# This is the default and can be omitted | |
# api_key=os.environ.get("OPENAI_API_KEY"), | |
api_key=openai_api_key, | |
) | |
print(f"OpenAI client created and authenticated with API key.\nUsing OpenAI model: {model_name}\n") | |
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
print(f"Loading Vector DB from HuggingFace Space file space...\n") | |
# embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-small') | |
# The gte-small model from OpenAI's family of models, which includes the GTE models designed for retrieval tasks, uses a specific number of embedding dimensions. The gte-small model has 384 embedding dimensions. | |
# This dimensionality allows the model to capture semantic information effectively while maintaining a relatively small model size for efficiency in retrieval tasks. | |
# However the vector database was encoded with model 'gte-large' which has 1024 embedding dimensions, so we need to use gte-large model here to match the embedding dimensions of the tesla_db vector database | |
# otherwise we get the following runtime error: "chromadb.errors.InvalidDimensionException: Embedding dimension 384 does not match collection dimensionality 1024" | |
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large') | |
tesla_10k_collection = 'tesla-10k-2019-to-2023' | |
# Example: Creating a collection with the correct dimensionality | |
# tesla_10k_collection = Chroma.create_collection("tesla-10k-2019-to-2023", embedding_dim=1024) | |
persisted_vectordb_path = Path.joinpath(hf_space_app_dir_path, 'tesla_db') # this is a pathlib object | |
# persisted_vectordb_location = persisted_vectordb_path # this is a pathlib object ... this produces error in the Chroma parameter "persist_directory", as it is expecting a string object, and not a pathlib object. | |
persisted_vectordb_location = str(persisted_vectordb_path) # convert path to string | |
print(f"Vector database location:\n{persisted_vectordb_location}\n") | |
# vector database constructor Chroma() | |
vectorstore_persisted = Chroma( | |
collection_name = tesla_10k_collection, | |
persist_directory = persisted_vectordb_location, # './tesla_db', | |
embedding_function = embedding_model | |
) | |
# Return VectorStoreRetriever initialized from this VectorStore. | |
retriever = vectorstore_persisted.as_retriever( | |
search_type = 'similarity', | |
search_kwargs = {'k': 5} | |
) | |
# Args: | |
# search_type (Optional[str]): Defines the type of search that the Retriever should perform. | |
# Can be "similarity" (default), "mmr", or "similarity_score_threshold". | |
# search_kwargs (Optional[Dict]): Keyword arguments to pass to the | |
# search function. Can include things like: | |
# k: Amount of documents to return (Default: 4) | |
# score_threshold: Minimum relevance threshold for similarity_score_threshold | |
print(f"Successfully obtained VectorStoreRetriever initialized from the Vector database.\n") | |
# Prepare the logging functionality | |
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json" | |
log_folder = log_file.parent | |
print(f"\nLogging dataset information:\n\tlog_file: {log_file}\n\tlog_folder: {log_folder}\n") | |
# Scheduler will log every 2 API calls: | |
scheduler = CommitScheduler( | |
repo_id="document-qna-chroma-openai-logs", # name of the log folder containing json elements --> HuggingFace dataset # OLD name: "document-qna-chroma-anyscale-logs", | |
repo_type="dataset", | |
folder_path=log_folder, | |
path_in_repo="data", | |
every=2 # execute every two API calls | |
) | |
print(f"Retrieval Augmented Generation (RAG) Q&A\nLLM Prompt initialization... (System prompt and user_input template)\n") | |
# LLM System Prompt | |
qna_system_message = """ | |
You are an assistant to a financial services firm who answers user queries on annual reports. | |
User input will have the context required by you to answer user questions. | |
This context will begin with the token: ###Context. | |
The context contains references to specific portions of a document relevant to the user query. | |
User questions will begin with the token: ###Question, and the question text will be delimited by triple backticks, that is, ```. | |
Please answer only using the context provided in the input. Do not mention anything about the context in your final answer. | |
If the answer is not found in the context, respond "I don't know". | |
""" | |
# LLM user_input template | |
qna_user_message_template = """ | |
###Context | |
Here are some documents that are relevant to the question mentioned below. | |
{context} | |
###Question | |
``` | |
{question} | |
``` | |
""" | |
# ANOTHER VERSION: | |
# # LLM System Prompt | |
# qna_system_message = """ | |
# You are an assistant to a financial services firm who answers user queries on annual reports. | |
# Users will ask questions delimited by triple backticks, that is, ```. | |
# User input will have the context required by you to answer user questions. | |
# This context will begin with the token: ###Context. | |
# The context contains references to specific portions of a document relevant to the user query. | |
# Please answer only using the context provided in the input. However, do not mention anything about the context in your answer. | |
# If the answer is not found in the context, respond "I don't know". | |
# """ | |
# # LLM user_input template | |
# qna_user_message_template = """ | |
# ###Context | |
# Here are some documents that are relevant to the question. | |
# {context} | |
# ``` | |
# {question} | |
# ``` | |
# """ | |
# Define the "predict function" which will take the user_input, obtain the relevant context to answer the user question more accurately, and pass | |
# both to the OpenAI client to make predictions using the OpenAI LLM model | |
# The function runs when 'Submit' is clicked or when a API request is made | |
#------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
def predict(user_input): | |
# COMPOSING THE RESPONSE | |
# Retrieving relevant documents | |
relevant_document_chunks = retriever.get_relevant_documents(query = user_input) # relevant_document_chunks = retriever.invoke(user_input) | |
print(f"Relevant document chunks = {len(relevant_document_chunks)}") | |
print(f"RELEVANT DOCUMENT CHUNKS TO BE USED AS CONTEXT TO ANSWER THE USER QUESTION:\n") | |
print("-"*80) | |
i = 0 | |
for document in relevant_document_chunks: | |
print(f"\nDocument chunk {i+1}:") | |
i += 1 | |
print(f"Metadata:\nSource: {document.metadata['source']}\nPage: {document.metadata['page']}\n") | |
print(f"Page content:\n-------------") | |
print(document.page_content.replace('\t', ' ')) # replace all tabs used as separators by default with a single space | |
print("-"*80) | |
context_list = [doc_chunk.page_content for doc_chunk in relevant_document_chunks] # doc_chunk.page_content.replace('\t', ' ') # replace all tabs used as separators by default with a single space | |
context_for_query = ". ".join(context_list) | |
# (method) | |
# join(__iterable: Iterable[LiteralString], /) -> LiteralString | |
# join(__iterable: Iterable[str], /) -> str | |
prompt = [ | |
{'role':'system', 'content': qna_system_message}, | |
{'role': 'user', 'content': qna_user_message_template.format( | |
context = context_for_query, | |
question = user_input | |
) | |
} | |
] | |
try: | |
response = client.chat.completions.create( | |
model=model_name, # previous model used: 'mlabonne/NeuralHermes-2.5-Mistral-7B', | |
messages=prompt, | |
temperature=0, # Temperature > 0 to encourage creative answer... Temperature = 0.7: A common setting that provides a balance between creativity and coherence. | |
# max_tokens=400 # Limit the number of tokens in the response | |
) | |
prediction = response.choices[0].message.content.strip() # Access response attributes directly | |
# Handle API errors | |
except openai.OpenAIError as e: | |
prediction = f'Sorry, I encountered the following OpenAI error: \n {e}' | |
except Exception as e: | |
prediction = f'Sorry, I encountered the following error: \n {e}' | |
# While the prediction is made, log both the inputs and outputs to a local log file (i.e., HuggingFace dataset) | |
# While writing to the log file, ensure that the commit scheduler is locked to avoid parallel | |
# access (i.e., put a lock on the state of the log_file in case users are entering queries while the log operation is in progress.) | |
# Write user_input, context and prediction to a HuggingFace dataset repo for logging | |
# Each time we get a prediction we will determine if we should log it to a hugging_face dataset according to the scheduler definition outside this function | |
# Note: the log_file is a json file. | |
with scheduler.lock: | |
with log_file.open("a") as f: | |
# json.dumps turns the dictionary into a json string containing 'user_input', 'context_for_query', and 'prediction' | |
f.write(json.dumps( | |
{ | |
'user_input': user_input, | |
'retrieved_context': context_for_query, | |
'model_response': prediction | |
} | |
)) | |
f.write("\n") # write a new line to prepare for the next observation to be logged | |
prediction_result = prediction | |
print(f"\nPrediction result: {prediction_result} - {type(prediction_result)}\n") | |
return (prediction_result) | |
#------------------------------------------------------------------------------------------------------------------------------------------------------------- | |
# Set up UI components for input and output | |
# Input components | |
user_question_textbox = gr.Textbox(label="User question", placeholder="Enter your query here", lines=6) | |
# Output components | |
# model_prediction = "text" # OK, works. | |
model_prediction = gr.Textbox(label="Model prediction", placeholder="Model prediction will show here", lines=6) | |
# model_prediction = gr.Label(label="Model prediction") # This produces large font (not appropriate UI component) | |
# Create the interface | |
demo = gr.Interface( | |
fn = predict, | |
inputs = user_question_textbox, | |
outputs = model_prediction, # "text", | |
title = "Ask Me Anything (AMA) on Tesla 10-K statements", | |
description= " This web API presents an interface to ask questions about the contents of the Tesla 10-K reports for the period 2019 - 2023.", | |
article = "Note that questions that are not relevant to the Tesla 10-K report will not be answered.", | |
examples=[["What was the total revenue of the company in 2022?", ""], | |
["Summarize the Management Discussion and Analysis section of the 2021 report in 50 words.", ""], | |
["What was the company's debt level in 2021?", ""], | |
["What are the risks related to the company's ability to grow its business in 2023? Respond with bullet point summaries.", ""] | |
], | |
allow_flagging="auto", # automatically push to the HuggingFace Dataset | |
concurrency_limit = 16 | |
) | |
# demo = gr.Interface( | |
# inputs=textbox, fn=predict, outputs="text", | |
# title="Ask Me Anything (AMA) on Tesla 10-K statements", | |
# description="This web API presents an interface to ask questions on contents of the Tesla 10-K reports for the period 2019 - 2023.", | |
# article="Note that questions that are not relevant to the Tesla 10-K report will not be answered.", | |
# examples=[["What was the total revenue of the company in 2022?", "$ 81.46 Billion"], | |
# ["Summarize the Management Discussion and Analysis section of the 2021 report in 50 words.", ""], | |
# ["What was the company's debt level in 2020?", ""], | |
# ["Identify 5 key risks identified in the 2019 10k report? Respond with bullet point summaries.", ""] | |
# ], | |
# concurrency_limit=16 | |
# ) | |
# Launch container hosted by HuggingFace with a load balancer | |
demo.queue() | |
demo.launch(share=False) | |
# To create a public link, set "share=True" in launch() .... but if I execute this app.py locally, then I have to have my computer on for the public users to access the browser interface | |
# +++ |