shallou commited on
Commit
b35ee0f
·
verified ·
1 Parent(s): 729f4d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -33
app.py CHANGED
@@ -1,76 +1,71 @@
1
- import streamlit as st
2
  from dotenv import load_dotenv
 
3
  import pickle
4
  from PyPDF2 import PdfReader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
- from langchain.embeddings.openai import OpenAIEmbeddings
7
  from langchain.vectorstores import FAISS
8
- from langchain.llms import OpenAI
9
  from langchain.chains.question_answering import load_qa_chain
10
  from langchain.callbacks import get_openai_callback
11
  import os
12
-
 
13
  load_dotenv()
14
-
15
  def main():
16
  st.header("LLM-powered PDF Chatbot 💬")
17
-
18
-
19
- # upload a PDF file
20
  pdf = st.file_uploader("Upload your PDF", type='pdf')
21
-
22
- # st.write(pdf)
23
  if pdf is not None:
24
  pdf_reader = PdfReader(pdf)
25
 
26
  text = ""
27
  for page in pdf_reader.pages:
28
  text += page.extract_text()
29
-
30
  text_splitter = RecursiveCharacterTextSplitter(
31
  chunk_size=1000,
32
  chunk_overlap=200,
33
  length_function=len
34
- )
35
  chunks = text_splitter.split_text(text=text)
36
-
37
- # # embeddings
38
  store_name = pdf.name[:-4]
39
  st.write(f'{store_name}')
40
- # st.write(chunks)
41
-
42
  if os.path.exists(f"{store_name}.pkl"):
43
  with open(f"{store_name}.pkl", "rb") as f:
44
  VectorStore = pickle.load(f)
45
- # st.write('Embeddings Loaded from the Disk')s
46
  else:
47
- embeddings = OpenAIEmbeddings()
48
  VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
49
  with open(f"{store_name}.pkl", "wb") as f:
50
  pickle.dump(VectorStore, f)
51
-
52
- # embeddings = OpenAIEmbeddings()
53
- # VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
54
-
55
  # Accept user questions/query
56
  query = st.text_input("Ask questions about your PDF file:")
57
- # st.write(query)
58
-
59
  if query:
60
  docs = VectorStore.similarity_search(query=query, k=3)
61
-
62
- llm = OpenAI()
 
 
63
  chain = load_qa_chain(llm=llm, chain_type="stuff")
64
  with get_openai_callback() as cb:
65
  response = chain.run(input_documents=docs, question=query)
66
  print(cb)
67
  st.write(response)
68
-
69
  if __name__ == '__main__':
70
  main()
71
-
72
  def set_bg_from_url(url, opacity=1):
73
-
74
  footer = """
75
  <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
76
  <footer>
@@ -92,10 +87,9 @@ def set_bg_from_url(url, opacity=1):
92
  </p>
93
  </div>
94
  </footer>
95
- """
96
  st.markdown(footer, unsafe_allow_html=True)
97
-
98
-
99
  # Set background image using HTML and CSS
100
  st.markdown(
101
  f"""
@@ -111,4 +105,4 @@ def set_bg_from_url(url, opacity=1):
111
  )
112
 
113
  # Set background image from URL
114
- set_bg_from_url("https://www.1access.com/wp-content/uploads/2019/10/GettyImages-1180389186.jpg", opacity=0.875)
 
 
1
  from dotenv import load_dotenv
2
+ import streamlit as st
3
  import pickle
4
  from PyPDF2 import PdfReader
5
  from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
  from langchain.vectorstores import FAISS
8
+ from langchain.llms import HuggingFace
9
  from langchain.chains.question_answering import load_qa_chain
10
  from langchain.callbacks import get_openai_callback
11
  import os
12
+
13
+ # Load environment variables from .env file
14
  load_dotenv()
15
+
16
  def main():
17
  st.header("LLM-powered PDF Chatbot 💬")
18
+
19
+ # Upload a PDF file
 
20
  pdf = st.file_uploader("Upload your PDF", type='pdf')
21
+
 
22
  if pdf is not None:
23
  pdf_reader = PdfReader(pdf)
24
 
25
  text = ""
26
  for page in pdf_reader.pages:
27
  text += page.extract_text()
28
+
29
  text_splitter = RecursiveCharacterTextSplitter(
30
  chunk_size=1000,
31
  chunk_overlap=200,
32
  length_function=len
33
+ )
34
  chunks = text_splitter.split_text(text=text)
35
+
36
+ # Process and store embeddings
37
  store_name = pdf.name[:-4]
38
  st.write(f'{store_name}')
39
+
 
40
  if os.path.exists(f"{store_name}.pkl"):
41
  with open(f"{store_name}.pkl", "rb") as f:
42
  VectorStore = pickle.load(f)
43
+ st.write('Embeddings Loaded from the Disk')
44
  else:
45
+ embeddings = HuggingFaceEmbeddings()
46
  VectorStore = FAISS.from_texts(chunks, embedding=embeddings)
47
  with open(f"{store_name}.pkl", "wb") as f:
48
  pickle.dump(VectorStore, f)
49
+
 
 
 
50
  # Accept user questions/query
51
  query = st.text_input("Ask questions about your PDF file:")
52
+
 
53
  if query:
54
  docs = VectorStore.similarity_search(query=query, k=3)
55
+
56
+ # Use Hugging Face model for question answering
57
+ model_name = "distilbert-base-uncased-distilled-squad" # Example model
58
+ llm = HuggingFace(model_name=model_name)
59
  chain = load_qa_chain(llm=llm, chain_type="stuff")
60
  with get_openai_callback() as cb:
61
  response = chain.run(input_documents=docs, question=query)
62
  print(cb)
63
  st.write(response)
64
+
65
  if __name__ == '__main__':
66
  main()
67
+
68
  def set_bg_from_url(url, opacity=1):
 
69
  footer = """
70
  <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-gH2yIJqKdNHPEq0n4Mqa/HGKIhSkIHeL5AyhkYV8i59U5AR6csBvApHHNl/vI1Bx" crossorigin="anonymous">
71
  <footer>
 
87
  </p>
88
  </div>
89
  </footer>
90
+ """
91
  st.markdown(footer, unsafe_allow_html=True)
92
+
 
93
  # Set background image using HTML and CSS
94
  st.markdown(
95
  f"""
 
105
  )
106
 
107
  # Set background image from URL
108
+ set_bg_from_url("https://www.1access.com/wp-content/uploads/2019/10/GettyImages-1180389186.jpg", opacity=0.875)