HarshSanghavi commited on
Commit
188a44f
·
verified ·
1 Parent(s): f278e58

Upload 21 files

Browse files
app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import random
3
+ from app_config import SYSTEM_PROMPT, NLP_MODEL_NAME, NUMBER_OF_VECTORS_FOR_RAG, NLP_MODEL_TEMPERATURE, NLP_MODEL_MAX_TOKENS, VECTOR_MAX_TOKENS,my_vector_store,chat,tiktoken_len
4
+ from langchain.memory import ConversationSummaryBufferMemory
5
+ from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
6
+ from langchain.chains.summarize import load_summarize_chain
7
+ from langchain.prompts import PromptTemplate
8
+ from langchain_groq import ChatGroq
9
+ from dotenv import load_dotenv
10
+ from pathlib import Path
11
+ import os
12
+ env_path = Path('.') / '.env'
13
+ load_dotenv(dotenv_path=env_path)
14
+
15
+ def response_generator(prompt: str) -> str:
16
+ """this function can be used for general quetion answers which are related to tyrex and tyre recycling
17
+
18
+ Args:
19
+ prompt (string): user query
20
+
21
+ Returns:
22
+ string: answer of the query
23
+ """
24
+
25
+ try:
26
+ retriever = st.session_state.retriever
27
+ docs = retriever.invoke(prompt)
28
+ my_context = [doc.page_content for doc in docs]
29
+ my_context = '\n\n'.join(my_context)
30
+
31
+ system_message = SystemMessage(content = SYSTEM_PROMPT.format(context=my_context, previous_message_summary=st.session_state.rag_memory.moving_summary_buffer))
32
+ chat_messages = (system_message + st.session_state.rag_memory.chat_memory.messages + HumanMessage(content=prompt)).messages
33
+ print("total tokens: ", tiktoken_len(str(chat_messages)))
34
+ # print("my_context*********",my_context)
35
+ response = st.session_state.llm.invoke(chat_messages)
36
+ return response.content
37
+
38
+ except Exception as error:
39
+ print(error)
40
+ return "Oops! something went wrong, please try again."
41
+
42
+
43
+ st.markdown(
44
+ """
45
+ <style>
46
+ .st-emotion-cache-janbn0 {
47
+ flex-direction: row-reverse;
48
+ text-align: right;
49
+ }
50
+ </style>
51
+ """,
52
+ unsafe_allow_html=True,
53
+ )
54
+
55
+ # When user gives input
56
+
57
+ print("SYSTEM MESSAGE")
58
+ if "messages" not in st.session_state:
59
+ st.session_state.messages=[{"role": "system", "content": SYSTEM_PROMPT}]
60
+
61
+ print("SYSTEM MODEL")
62
+ if "llm" not in st.session_state:
63
+ st.session_state.llm = ChatGroq(temperature=NLP_MODEL_TEMPERATURE, groq_api_key=str(os.getenv('GROQ_API_KEY')), model_name=NLP_MODEL_NAME)
64
+
65
+ print("rag")
66
+ if "rag_memory" not in st.session_state:
67
+ st.session_state.rag_memory = ConversationSummaryBufferMemory(llm=st.session_state.llm, max_token_limit= 5000)
68
+
69
+ print("retrival")
70
+ if "retriever" not in st.session_state:
71
+ # vector_store = get_vectorstore_with_doc_from_pdf('GPT OUTPUT.pdf')
72
+ st.session_state.retriever = my_vector_store.as_retriever(k=NUMBER_OF_VECTORS_FOR_RAG)
73
+
74
+
75
+ st.title("LIC Mitra: Customer Support for LIC Policies")
76
+ print("container")
77
+ # Display chat messages from history
78
+ container = st.container(height=600)
79
+ for message in st.session_state.messages:
80
+ if message["role"] != "system":
81
+ with container.chat_message(message["role"]):
82
+ st.write(message["content"])
83
+
84
+ if prompt := st.chat_input("Enter your query here... "):
85
+ with container.chat_message("user"):
86
+ st.write(prompt)
87
+ st.session_state.messages.append({"role":"user" , "content":prompt})
88
+
89
+ with container.chat_message("assistant"):
90
+ response = response_generator(prompt=prompt)
91
+ print("******************************************************** Response ********************************************************")
92
+ print("MY RESPONSE IS:", response)
93
+ st.write(response)
94
+
95
+ print("Response is:", response)
96
+ st.session_state.rag_memory.save_context({'input': prompt}, {'output': response})
97
+ st.session_state.messages.append({"role":"assistant" , "content":response})
app_config.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tiktoken
2
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
3
+ from langchain_chroma import Chroma
4
+ from langchain_community.embeddings import HuggingFaceBgeEmbeddings
5
+ from langchain.document_loaders import PyPDFLoader
6
+ from langchain.memory import ConversationSummaryBufferMemory
7
+ from langchain_groq import ChatGroq
8
+ import os
9
+ tokenizer = tiktoken.get_encoding('cl100k_base')
10
+ FILE_NAMEs = os.listdir('data')
11
+
12
+ # system_template = """ you are LIC Customer Service Chatbot.
13
+ # Use the following pieces of context to answer the user's question.
14
+ # If you don't know the answer, just say that you don't know, don't try to make up an answer.
15
+ # ----------------
16
+ # {context}"""
17
+
18
+
19
+ SYSTEM_PROMPT = """
20
+ you are LIC Customer Service Chatbot.
21
+ Use the following pieces of context to answer the user's question.
22
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
23
+
24
+ context: {context}
25
+ previous message summary: {previous_message_summary}
26
+ """
27
+
28
+ human_template = "{question}"
29
+
30
+ NLP_MODEL_NAME = "llama3-70b-8192"
31
+ REASONING_MODEL_NAME = "mixtral-8x7b-32768"
32
+ REASONING_MODEL_TEMPERATURE = 0
33
+ NLP_MODEL_TEMPERATURE = 0
34
+ NLP_MODEL_MAX_TOKENS = 5400
35
+ VECTOR_MAX_TOKENS = 100
36
+ VECTORS_TOKEN_OVERLAP_SIZE = 20
37
+ NUMBER_OF_VECTORS_FOR_RAG = 7
38
+
39
+
40
+
41
+ # create the length function
42
+ def tiktoken_len(text):
43
+ tokens = tokenizer.encode(
44
+ text,
45
+ disallowed_special=()
46
+ )
47
+ return len(tokens)
48
+ def get_vectorstore():
49
+ model_name = "BAAI/bge-small-en"
50
+ model_kwargs = {"device": "cpu"}
51
+ encode_kwargs = {"normalize_embeddings": True}
52
+ hf = HuggingFaceBgeEmbeddings(
53
+ model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
54
+ )
55
+ all_splits = []
56
+ for file_name in FILE_NAMEs:
57
+ if file_name.endswith(".pdf"):
58
+ loader = PyPDFLoader(os.path.join("data",file_name))
59
+ data = loader.load()[0].page_content
60
+ else:
61
+ with open(os.path.join("data",file_name), "r") as f:
62
+ data = f.read()
63
+ text_splitter = RecursiveCharacterTextSplitter(
64
+ chunk_size=VECTOR_MAX_TOKENS,
65
+ chunk_overlap=VECTORS_TOKEN_OVERLAP_SIZE,
66
+ length_function=tiktoken_len,
67
+ separators=["\n\n\n","\n\n", "\n", " ", ""]
68
+ )
69
+ all_splits = all_splits + text_splitter.split_text(data)
70
+
71
+ vectorstore = Chroma.from_texts(texts=all_splits ,embedding=hf)
72
+ return vectorstore
73
+
74
+
75
+ chat = ChatGroq(temperature=0, groq_api_key="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", model_name="llama3-8b-8192", streaming=True)
76
+ rag_memory = ConversationSummaryBufferMemory(llm=chat, max_token_limit=3000)
77
+
78
+ my_vector_store = get_vectorstore()
data/Annexure-(I).pdf ADDED
Binary file (21 kB). View file
 
data/Annexure-(II).pdf ADDED
Binary file (25.8 kB). View file
 
data/Annexure-(III).pdf ADDED
Binary file (20.7 kB). View file
 
data/Annexure-(IV).pdf ADDED
Binary file (46.1 kB). View file
 
data/Annexure-I-(1).pdf ADDED
Binary file (413 kB). View file
 
data/Annexure-II-(1).pdf ADDED
Binary file (318 kB). View file
 
data/Claims Settlement Requirements.pdf ADDED
Binary file (212 kB). View file
 
data/Customer Service.pdf ADDED
Binary file (153 kB). View file
 
data/Form-for-contact-details-of-PH.pdf ADDED
Binary file (60.2 kB). View file
 
data/Guide-to-Policyholders.pdf ADDED
Binary file (465 kB). View file
 
data/INCOME-TAX-BENEFIT.pdf ADDED
Binary file (425 kB). View file
 
data/NRI Centre.pdf ADDED
Binary file (514 kB). View file
 
data/Phone Help Line.pdf ADDED
Binary file (676 kB). View file
 
data/Policy Guidelines & Helpline.pdf ADDED
Binary file (684 kB). View file
 
data/Policy Status.pdf ADDED
Binary file (537 kB). View file
 
data/Spurious-calls.pdf ADDED
Binary file (13.2 kB). View file
 
data/Term of Insurance.pdf ADDED
Binary file (152 kB). View file
 
data/final-LIC-self-certfication-individuals-23-April-2023.pdf ADDED
Binary file (464 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ transformers
3
+ tiktoken
4
+ langchain-groq
5
+ langchain-community
6
+ langchain-text-splitters
7
+ langchain-chroma
8
+ sentence_transformers
9
+ pypdf
10
+ torch
11
+ streamlit