kanishka089 commited on
Commit
794af9d
·
verified ·
1 Parent(s): b9f5ac7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -8
app.py CHANGED
@@ -1,4 +1,6 @@
1
  import os
 
 
2
  import gradio as gr
3
  from dotenv import load_dotenv
4
  from huggingface_hub import InferenceClient
@@ -20,12 +22,34 @@ model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
20
  gpt4all_kwargs = {'allow_download': 'false'}
21
 
22
 
23
- def loadAndRetrieveDocuments() -> VectorStoreRetriever:
24
- loader = pdf.PyPDFLoader("constitution.pdf") # constitution
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  documents = loader.load()
 
 
 
 
 
 
26
  textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
27
  documentSplits = textSplitter.split_documents(documents)
28
- embeddings = OllamaEmbeddings(model="nomic-embed-text")
29
  vectorStore = Chroma.from_documents(documents=documentSplits, embedding=GPT4AllEmbeddings(model_name=model_name,
30
  gpt4all_kwargs=gpt4all_kwargs))
31
  return vectorStore.as_retriever()
@@ -35,7 +59,12 @@ def formatDocuments(documents: list) -> str:
35
  return "\n\n".join(document.page_content for document in documents)
36
 
37
 
38
- retriever = loadAndRetrieveDocuments()
 
 
 
 
 
39
 
40
  # Chat history
41
  chat_history = []
@@ -47,13 +76,13 @@ def ragChain(question: str) -> str:
47
  formattedContext = formatDocuments(retrievedDocuments)
48
  formattedPrompt = (f"Question: {question}\n\n"
49
  f"Context: {formattedContext}\n\n"
50
- f"Please provide a detailed and explanatory answer based solely on the provided context.")
51
 
52
  messages = chat_history + [{"role": "user", "content": formattedPrompt}]
53
 
54
  response = client.chat_completion(
55
  messages=messages,
56
- max_tokens=800,
57
  stream=False
58
  )
59
  # Extract the generated response text using dataclass attributes
@@ -73,11 +102,11 @@ with gr.Blocks() as demo:
73
  with gr.Column():
74
  textbox = gr.Textbox(label="Question")
75
  with gr.Row():
76
- buttonTerms = gr.Button("Terms")
77
  button = gr.Button("Submit")
78
 
79
  with gr.Column():
80
- output = gr.Textbox(label="Output")
81
 
82
 
83
  def on_button_click(question):
 
1
  import os
2
+ import requests
3
+ import tempfile
4
  import gradio as gr
5
  from dotenv import load_dotenv
6
  from huggingface_hub import InferenceClient
 
22
  gpt4all_kwargs = {'allow_download': 'false'}
23
 
24
 
25
+ # Function to download the PDF from a URL and load documents
26
+ def loadAndRetrieveDocuments(url: str, local_file_path: str) -> VectorStoreRetriever:
27
+ try:
28
+ # Attempt to download PDF
29
+ response = requests.get(url)
30
+ response.raise_for_status() # Ensure we notice bad responses
31
+
32
+ # Save PDF to a temporary file
33
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
34
+ temp_file.write(response.content)
35
+ temp_pdf_path = temp_file.name
36
+
37
+ except Exception as e:
38
+ print(f"Failed to download PDF from URL: {e}")
39
+ # Use local file if URL download fails
40
+ temp_pdf_path = local_file_path
41
+
42
+ # Load the PDF from the temporary file
43
+ loader = pdf.PyPDFLoader(temp_pdf_path)
44
  documents = loader.load()
45
+
46
+ # Clean up temporary file if created
47
+ if temp_pdf_path != local_file_path:
48
+ os.remove(temp_pdf_path)
49
+
50
+ # Process documents
51
  textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
52
  documentSplits = textSplitter.split_documents(documents)
 
53
  vectorStore = Chroma.from_documents(documents=documentSplits, embedding=GPT4AllEmbeddings(model_name=model_name,
54
  gpt4all_kwargs=gpt4all_kwargs))
55
  return vectorStore.as_retriever()
 
59
  return "\n\n".join(document.page_content for document in documents)
60
 
61
 
62
+ # Define URL and local file path
63
+ url = "http://www.parliament.lk/files/pdf/constitution.pdf"
64
+ local_file_path = "constitution.pdf" # Local file path
65
+
66
+ # Load documents from URL or local file
67
+ retriever = loadAndRetrieveDocuments(url, local_file_path)
68
 
69
  # Chat history
70
  chat_history = []
 
76
  formattedContext = formatDocuments(retrievedDocuments)
77
  formattedPrompt = (f"Question: {question}\n\n"
78
  f"Context: {formattedContext}\n\n"
79
+ f"Please provide a detailed answer based solely on the provided context.")
80
 
81
  messages = chat_history + [{"role": "user", "content": formattedPrompt}]
82
 
83
  response = client.chat_completion(
84
  messages=messages,
85
+ max_tokens=700,
86
  stream=False
87
  )
88
  # Extract the generated response text using dataclass attributes
 
102
  with gr.Column():
103
  textbox = gr.Textbox(label="Question")
104
  with gr.Row():
105
+ buttonTerms = gr.Button("Terms of use")
106
  button = gr.Button("Submit")
107
 
108
  with gr.Column():
109
+ output = gr.Textbox(label="Output", lines=25)
110
 
111
 
112
  def on_button_click(question):