Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
import os
|
|
|
|
|
2 |
import gradio as gr
|
3 |
from dotenv import load_dotenv
|
4 |
from huggingface_hub import InferenceClient
|
@@ -20,12 +22,34 @@ model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
|
|
20 |
gpt4all_kwargs = {'allow_download': 'false'}
|
21 |
|
22 |
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
documents = loader.load()
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
27 |
documentSplits = textSplitter.split_documents(documents)
|
28 |
-
embeddings = OllamaEmbeddings(model="nomic-embed-text")
|
29 |
vectorStore = Chroma.from_documents(documents=documentSplits, embedding=GPT4AllEmbeddings(model_name=model_name,
|
30 |
gpt4all_kwargs=gpt4all_kwargs))
|
31 |
return vectorStore.as_retriever()
|
@@ -35,7 +59,12 @@ def formatDocuments(documents: list) -> str:
|
|
35 |
return "\n\n".join(document.page_content for document in documents)
|
36 |
|
37 |
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
# Chat history
|
41 |
chat_history = []
|
@@ -47,13 +76,13 @@ def ragChain(question: str) -> str:
|
|
47 |
formattedContext = formatDocuments(retrievedDocuments)
|
48 |
formattedPrompt = (f"Question: {question}\n\n"
|
49 |
f"Context: {formattedContext}\n\n"
|
50 |
-
f"Please provide a detailed
|
51 |
|
52 |
messages = chat_history + [{"role": "user", "content": formattedPrompt}]
|
53 |
|
54 |
response = client.chat_completion(
|
55 |
messages=messages,
|
56 |
-
max_tokens=
|
57 |
stream=False
|
58 |
)
|
59 |
# Extract the generated response text using dataclass attributes
|
@@ -73,11 +102,11 @@ with gr.Blocks() as demo:
|
|
73 |
with gr.Column():
|
74 |
textbox = gr.Textbox(label="Question")
|
75 |
with gr.Row():
|
76 |
-
buttonTerms = gr.Button("Terms")
|
77 |
button = gr.Button("Submit")
|
78 |
|
79 |
with gr.Column():
|
80 |
-
output = gr.Textbox(label="Output")
|
81 |
|
82 |
|
83 |
def on_button_click(question):
|
|
|
1 |
import os
|
2 |
+
import requests
|
3 |
+
import tempfile
|
4 |
import gradio as gr
|
5 |
from dotenv import load_dotenv
|
6 |
from huggingface_hub import InferenceClient
|
|
|
22 |
gpt4all_kwargs = {'allow_download': 'false'}
|
23 |
|
24 |
|
25 |
+
# Function to download the PDF from a URL and load documents
|
26 |
+
def loadAndRetrieveDocuments(url: str, local_file_path: str) -> VectorStoreRetriever:
|
27 |
+
try:
|
28 |
+
# Attempt to download PDF
|
29 |
+
response = requests.get(url)
|
30 |
+
response.raise_for_status() # Ensure we notice bad responses
|
31 |
+
|
32 |
+
# Save PDF to a temporary file
|
33 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
34 |
+
temp_file.write(response.content)
|
35 |
+
temp_pdf_path = temp_file.name
|
36 |
+
|
37 |
+
except Exception as e:
|
38 |
+
print(f"Failed to download PDF from URL: {e}")
|
39 |
+
# Use local file if URL download fails
|
40 |
+
temp_pdf_path = local_file_path
|
41 |
+
|
42 |
+
# Load the PDF from the temporary file
|
43 |
+
loader = pdf.PyPDFLoader(temp_pdf_path)
|
44 |
documents = loader.load()
|
45 |
+
|
46 |
+
# Clean up temporary file if created
|
47 |
+
if temp_pdf_path != local_file_path:
|
48 |
+
os.remove(temp_pdf_path)
|
49 |
+
|
50 |
+
# Process documents
|
51 |
textSplitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
52 |
documentSplits = textSplitter.split_documents(documents)
|
|
|
53 |
vectorStore = Chroma.from_documents(documents=documentSplits, embedding=GPT4AllEmbeddings(model_name=model_name,
|
54 |
gpt4all_kwargs=gpt4all_kwargs))
|
55 |
return vectorStore.as_retriever()
|
|
|
59 |
return "\n\n".join(document.page_content for document in documents)
|
60 |
|
61 |
|
62 |
+
# Define URL and local file path
|
63 |
+
url = "http://www.parliament.lk/files/pdf/constitution.pdf"
|
64 |
+
local_file_path = "constitution.pdf" # Local file path
|
65 |
+
|
66 |
+
# Load documents from URL or local file
|
67 |
+
retriever = loadAndRetrieveDocuments(url, local_file_path)
|
68 |
|
69 |
# Chat history
|
70 |
chat_history = []
|
|
|
76 |
formattedContext = formatDocuments(retrievedDocuments)
|
77 |
formattedPrompt = (f"Question: {question}\n\n"
|
78 |
f"Context: {formattedContext}\n\n"
|
79 |
+
f"Please provide a detailed answer based solely on the provided context.")
|
80 |
|
81 |
messages = chat_history + [{"role": "user", "content": formattedPrompt}]
|
82 |
|
83 |
response = client.chat_completion(
|
84 |
messages=messages,
|
85 |
+
max_tokens=700,
|
86 |
stream=False
|
87 |
)
|
88 |
# Extract the generated response text using dataclass attributes
|
|
|
102 |
with gr.Column():
|
103 |
textbox = gr.Textbox(label="Question")
|
104 |
with gr.Row():
|
105 |
+
buttonTerms = gr.Button("Terms of use")
|
106 |
button = gr.Button("Submit")
|
107 |
|
108 |
with gr.Column():
|
109 |
+
output = gr.Textbox(label="Output", lines=25)
|
110 |
|
111 |
|
112 |
def on_button_click(question):
|