Spaces:
Sleeping
Sleeping
Upload 21 files
Browse files- app.py +97 -0
- app_config.py +78 -0
- data/Annexure-(I).pdf +0 -0
- data/Annexure-(II).pdf +0 -0
- data/Annexure-(III).pdf +0 -0
- data/Annexure-(IV).pdf +0 -0
- data/Annexure-I-(1).pdf +0 -0
- data/Annexure-II-(1).pdf +0 -0
- data/Claims Settlement Requirements.pdf +0 -0
- data/Customer Service.pdf +0 -0
- data/Form-for-contact-details-of-PH.pdf +0 -0
- data/Guide-to-Policyholders.pdf +0 -0
- data/INCOME-TAX-BENEFIT.pdf +0 -0
- data/NRI Centre.pdf +0 -0
- data/Phone Help Line.pdf +0 -0
- data/Policy Guidelines & Helpline.pdf +0 -0
- data/Policy Status.pdf +0 -0
- data/Spurious-calls.pdf +0 -0
- data/Term of Insurance.pdf +0 -0
- data/final-LIC-self-certfication-individuals-23-April-2023.pdf +0 -0
- requirements.txt +11 -0
app.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import random
|
3 |
+
from app_config import SYSTEM_PROMPT, NLP_MODEL_NAME, NUMBER_OF_VECTORS_FOR_RAG, NLP_MODEL_TEMPERATURE, NLP_MODEL_MAX_TOKENS, VECTOR_MAX_TOKENS,my_vector_store,chat,tiktoken_len
|
4 |
+
from langchain.memory import ConversationSummaryBufferMemory
|
5 |
+
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
|
6 |
+
from langchain.chains.summarize import load_summarize_chain
|
7 |
+
from langchain.prompts import PromptTemplate
|
8 |
+
from langchain_groq import ChatGroq
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
from pathlib import Path
|
11 |
+
import os
|
12 |
+
env_path = Path('.') / '.env'
|
13 |
+
load_dotenv(dotenv_path=env_path)
|
14 |
+
|
15 |
+
def response_generator(prompt: str) -> str:
|
16 |
+
"""this function can be used for general quetion answers which are related to tyrex and tyre recycling
|
17 |
+
|
18 |
+
Args:
|
19 |
+
prompt (string): user query
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
string: answer of the query
|
23 |
+
"""
|
24 |
+
|
25 |
+
try:
|
26 |
+
retriever = st.session_state.retriever
|
27 |
+
docs = retriever.invoke(prompt)
|
28 |
+
my_context = [doc.page_content for doc in docs]
|
29 |
+
my_context = '\n\n'.join(my_context)
|
30 |
+
|
31 |
+
system_message = SystemMessage(content = SYSTEM_PROMPT.format(context=my_context, previous_message_summary=st.session_state.rag_memory.moving_summary_buffer))
|
32 |
+
chat_messages = (system_message + st.session_state.rag_memory.chat_memory.messages + HumanMessage(content=prompt)).messages
|
33 |
+
print("total tokens: ", tiktoken_len(str(chat_messages)))
|
34 |
+
# print("my_context*********",my_context)
|
35 |
+
response = st.session_state.llm.invoke(chat_messages)
|
36 |
+
return response.content
|
37 |
+
|
38 |
+
except Exception as error:
|
39 |
+
print(error)
|
40 |
+
return "Oops! something went wrong, please try again."
|
41 |
+
|
42 |
+
|
43 |
+
st.markdown(
|
44 |
+
"""
|
45 |
+
<style>
|
46 |
+
.st-emotion-cache-janbn0 {
|
47 |
+
flex-direction: row-reverse;
|
48 |
+
text-align: right;
|
49 |
+
}
|
50 |
+
</style>
|
51 |
+
""",
|
52 |
+
unsafe_allow_html=True,
|
53 |
+
)
|
54 |
+
|
55 |
+
# When user gives input
|
56 |
+
|
57 |
+
print("SYSTEM MESSAGE")
|
58 |
+
if "messages" not in st.session_state:
|
59 |
+
st.session_state.messages=[{"role": "system", "content": SYSTEM_PROMPT}]
|
60 |
+
|
61 |
+
print("SYSTEM MODEL")
|
62 |
+
if "llm" not in st.session_state:
|
63 |
+
st.session_state.llm = ChatGroq(temperature=NLP_MODEL_TEMPERATURE, groq_api_key=str(os.getenv('GROQ_API_KEY')), model_name=NLP_MODEL_NAME)
|
64 |
+
|
65 |
+
print("rag")
|
66 |
+
if "rag_memory" not in st.session_state:
|
67 |
+
st.session_state.rag_memory = ConversationSummaryBufferMemory(llm=st.session_state.llm, max_token_limit= 5000)
|
68 |
+
|
69 |
+
print("retrival")
|
70 |
+
if "retriever" not in st.session_state:
|
71 |
+
# vector_store = get_vectorstore_with_doc_from_pdf('GPT OUTPUT.pdf')
|
72 |
+
st.session_state.retriever = my_vector_store.as_retriever(k=NUMBER_OF_VECTORS_FOR_RAG)
|
73 |
+
|
74 |
+
|
75 |
+
st.title("LIC Mitra: Customer Support for LIC Policies")
|
76 |
+
print("container")
|
77 |
+
# Display chat messages from history
|
78 |
+
container = st.container(height=600)
|
79 |
+
for message in st.session_state.messages:
|
80 |
+
if message["role"] != "system":
|
81 |
+
with container.chat_message(message["role"]):
|
82 |
+
st.write(message["content"])
|
83 |
+
|
84 |
+
if prompt := st.chat_input("Enter your query here... "):
|
85 |
+
with container.chat_message("user"):
|
86 |
+
st.write(prompt)
|
87 |
+
st.session_state.messages.append({"role":"user" , "content":prompt})
|
88 |
+
|
89 |
+
with container.chat_message("assistant"):
|
90 |
+
response = response_generator(prompt=prompt)
|
91 |
+
print("******************************************************** Response ********************************************************")
|
92 |
+
print("MY RESPONSE IS:", response)
|
93 |
+
st.write(response)
|
94 |
+
|
95 |
+
print("Response is:", response)
|
96 |
+
st.session_state.rag_memory.save_context({'input': prompt}, {'output': response})
|
97 |
+
st.session_state.messages.append({"role":"assistant" , "content":response})
|
app_config.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import tiktoken
|
2 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
3 |
+
from langchain_chroma import Chroma
|
4 |
+
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
|
5 |
+
from langchain.document_loaders import PyPDFLoader
|
6 |
+
from langchain.memory import ConversationSummaryBufferMemory
|
7 |
+
from langchain_groq import ChatGroq
|
8 |
+
import os
|
9 |
+
tokenizer = tiktoken.get_encoding('cl100k_base')
|
10 |
+
FILE_NAMEs = os.listdir('data')
|
11 |
+
|
12 |
+
# system_template = """ you are LIC Customer Service Chatbot.
|
13 |
+
# Use the following pieces of context to answer the user's question.
|
14 |
+
# If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
15 |
+
# ----------------
|
16 |
+
# {context}"""
|
17 |
+
|
18 |
+
|
19 |
+
SYSTEM_PROMPT = """
|
20 |
+
you are LIC Customer Service Chatbot.
|
21 |
+
Use the following pieces of context to answer the user's question.
|
22 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
23 |
+
|
24 |
+
context: {context}
|
25 |
+
previous message summary: {previous_message_summary}
|
26 |
+
"""
|
27 |
+
|
28 |
+
human_template = "{question}"
|
29 |
+
|
30 |
+
NLP_MODEL_NAME = "llama3-70b-8192"
|
31 |
+
REASONING_MODEL_NAME = "mixtral-8x7b-32768"
|
32 |
+
REASONING_MODEL_TEMPERATURE = 0
|
33 |
+
NLP_MODEL_TEMPERATURE = 0
|
34 |
+
NLP_MODEL_MAX_TOKENS = 5400
|
35 |
+
VECTOR_MAX_TOKENS = 100
|
36 |
+
VECTORS_TOKEN_OVERLAP_SIZE = 20
|
37 |
+
NUMBER_OF_VECTORS_FOR_RAG = 7
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
# create the length function
|
42 |
+
def tiktoken_len(text):
|
43 |
+
tokens = tokenizer.encode(
|
44 |
+
text,
|
45 |
+
disallowed_special=()
|
46 |
+
)
|
47 |
+
return len(tokens)
|
48 |
+
def get_vectorstore():
|
49 |
+
model_name = "BAAI/bge-small-en"
|
50 |
+
model_kwargs = {"device": "cpu"}
|
51 |
+
encode_kwargs = {"normalize_embeddings": True}
|
52 |
+
hf = HuggingFaceBgeEmbeddings(
|
53 |
+
model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
|
54 |
+
)
|
55 |
+
all_splits = []
|
56 |
+
for file_name in FILE_NAMEs:
|
57 |
+
if file_name.endswith(".pdf"):
|
58 |
+
loader = PyPDFLoader(os.path.join("data",file_name))
|
59 |
+
data = loader.load()[0].page_content
|
60 |
+
else:
|
61 |
+
with open(os.path.join("data",file_name), "r") as f:
|
62 |
+
data = f.read()
|
63 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
64 |
+
chunk_size=VECTOR_MAX_TOKENS,
|
65 |
+
chunk_overlap=VECTORS_TOKEN_OVERLAP_SIZE,
|
66 |
+
length_function=tiktoken_len,
|
67 |
+
separators=["\n\n\n","\n\n", "\n", " ", ""]
|
68 |
+
)
|
69 |
+
all_splits = all_splits + text_splitter.split_text(data)
|
70 |
+
|
71 |
+
vectorstore = Chroma.from_texts(texts=all_splits ,embedding=hf)
|
72 |
+
return vectorstore
|
73 |
+
|
74 |
+
|
75 |
+
chat = ChatGroq(temperature=0, groq_api_key="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", model_name="llama3-8b-8192", streaming=True)
|
76 |
+
rag_memory = ConversationSummaryBufferMemory(llm=chat, max_token_limit=3000)
|
77 |
+
|
78 |
+
my_vector_store = get_vectorstore()
|
data/Annexure-(I).pdf
ADDED
Binary file (21 kB). View file
|
|
data/Annexure-(II).pdf
ADDED
Binary file (25.8 kB). View file
|
|
data/Annexure-(III).pdf
ADDED
Binary file (20.7 kB). View file
|
|
data/Annexure-(IV).pdf
ADDED
Binary file (46.1 kB). View file
|
|
data/Annexure-I-(1).pdf
ADDED
Binary file (413 kB). View file
|
|
data/Annexure-II-(1).pdf
ADDED
Binary file (318 kB). View file
|
|
data/Claims Settlement Requirements.pdf
ADDED
Binary file (212 kB). View file
|
|
data/Customer Service.pdf
ADDED
Binary file (153 kB). View file
|
|
data/Form-for-contact-details-of-PH.pdf
ADDED
Binary file (60.2 kB). View file
|
|
data/Guide-to-Policyholders.pdf
ADDED
Binary file (465 kB). View file
|
|
data/INCOME-TAX-BENEFIT.pdf
ADDED
Binary file (425 kB). View file
|
|
data/NRI Centre.pdf
ADDED
Binary file (514 kB). View file
|
|
data/Phone Help Line.pdf
ADDED
Binary file (676 kB). View file
|
|
data/Policy Guidelines & Helpline.pdf
ADDED
Binary file (684 kB). View file
|
|
data/Policy Status.pdf
ADDED
Binary file (537 kB). View file
|
|
data/Spurious-calls.pdf
ADDED
Binary file (13.2 kB). View file
|
|
data/Term of Insurance.pdf
ADDED
Binary file (152 kB). View file
|
|
data/final-LIC-self-certfication-individuals-23-April-2023.pdf
ADDED
Binary file (464 kB). View file
|
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
transformers
|
3 |
+
tiktoken
|
4 |
+
langchain-groq
|
5 |
+
langchain-community
|
6 |
+
langchain-text-splitters
|
7 |
+
langchain-chroma
|
8 |
+
sentence_transformers
|
9 |
+
pypdf
|
10 |
+
torch
|
11 |
+
streamlit
|