ritampatra commited on
Commit
5ab0b92
·
verified ·
1 Parent(s): c3aa5e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -8
app.py CHANGED
@@ -1,10 +1,10 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
- from langchain.embeddings import HuggingFaceEmbeddings
4
  from langchain.vectorstores import FAISS
5
  from langchain.document_loaders import PyPDFLoader
6
  from langchain.chains.question_answering import load_qa_chain
7
  from langchain.llms import HuggingFaceHub
 
8
 
9
  # Function to load and process the document (PDF)
10
  def load_document(file):
@@ -12,16 +12,32 @@ def load_document(file):
12
  documents = loader.load()
13
  return documents
14
 
15
- # Function to embed the documents using sentence-transformers and store them in FAISS
16
  def embed_documents(documents):
17
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
18
- vector_store = FAISS.from_documents(documents, embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  return vector_store
20
 
21
- # Function to handle the chatbot's conversation by querying the document embeddings
22
  def chat_with_document(query, vector_store):
23
  retriever = vector_store.as_retriever()
24
- llm = HuggingFaceHub(repo_id="google/flan-t5-large", model_kwargs={"temperature":0.2})
25
  chain = load_qa_chain(llm, chain_type="stuff")
26
  results = retriever.get_relevant_documents(query)
27
  answer = chain.run(input_documents=results, question=query)
@@ -61,7 +77,7 @@ def chatbot_interface():
61
  with gr.Row():
62
  question.render()
63
  answer.render()
64
-
65
  # Launch the Gradio app
66
  demo.launch()
67
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModel, pipeline
 
3
  from langchain.vectorstores import FAISS
4
  from langchain.document_loaders import PyPDFLoader
5
  from langchain.chains.question_answering import load_qa_chain
6
  from langchain.llms import HuggingFaceHub
7
+ import torch
8
 
9
  # Function to load and process the document (PDF)
10
  def load_document(file):
 
12
  documents = loader.load()
13
  return documents
14
 
15
+ # Function to embed documents using Hugging Face model directly
16
  def embed_documents(documents):
17
+ # Load tokenizer and model
18
+ tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
19
+ model = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
20
+
21
+ # Get document texts
22
+ document_texts = [doc.page_content for doc in documents]
23
+
24
+ # Create embeddings for each document
25
+ embeddings = []
26
+ for text in document_texts:
27
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
28
+ with torch.no_grad():
29
+ model_output = model(**inputs)
30
+ embedding = model_output.last_hidden_state.mean(dim=1) # Mean pool the embeddings
31
+ embeddings.append(embedding.squeeze().numpy())
32
+
33
+ # Store embeddings in FAISS vector store
34
+ vector_store = FAISS.from_embeddings(embeddings, documents)
35
  return vector_store
36
 
37
+ # Function to handle chatbot queries
38
  def chat_with_document(query, vector_store):
39
  retriever = vector_store.as_retriever()
40
+ llm = HuggingFaceHub(repo_id="google/flan-t5-large", model_kwargs={"temperature": 0.2})
41
  chain = load_qa_chain(llm, chain_type="stuff")
42
  results = retriever.get_relevant_documents(query)
43
  answer = chain.run(input_documents=results, question=query)
 
77
  with gr.Row():
78
  question.render()
79
  answer.render()
80
+
81
  # Launch the Gradio app
82
  demo.launch()
83