Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ from PyPDF2 import PdfReader
|
|
8 |
from tempfile import NamedTemporaryFile
|
9 |
|
10 |
# Initialize Groq client
|
11 |
-
client = Groq(api_key=os.environ
|
12 |
|
13 |
# Function to extract text from a PDF
|
14 |
def extract_text_from_pdf(pdf_file_path):
|
@@ -70,11 +70,11 @@ def download_pdf_from_url(url):
|
|
70 |
else:
|
71 |
return None
|
72 |
|
73 |
-
# Function to process
|
74 |
-
def process_documents(
|
75 |
vector_db = None
|
76 |
-
for idx, link in enumerate(
|
77 |
-
print(f"Processing document {idx + 1}...")
|
78 |
pdf_path = download_pdf_from_url(link)
|
79 |
if pdf_path:
|
80 |
text = extract_text_from_pdf(pdf_path)
|
@@ -82,22 +82,28 @@ def process_documents(links):
|
|
82 |
vector_db = create_embeddings_and_store(chunks, vector_db=vector_db)
|
83 |
print(f"β
Document {idx + 1} processed.")
|
84 |
else:
|
85 |
-
print(f"β Failed to process document {idx + 1}
|
86 |
return vector_db
|
87 |
|
88 |
-
#
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
vector_db = process_documents(doc_links)
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
|
|
8 |
from tempfile import NamedTemporaryFile
|
9 |
|
10 |
# Initialize Groq client
|
11 |
+
client = Groq(api_key=os.environ['GROQ_API_KEY'])
|
12 |
|
13 |
# Function to extract text from a PDF
|
14 |
def extract_text_from_pdf(pdf_file_path):
|
|
|
70 |
else:
|
71 |
return None
|
72 |
|
73 |
+
# Function to process all documents and build vector DB
|
74 |
+
def process_documents(doc_links):
|
75 |
vector_db = None
|
76 |
+
for idx, link in enumerate(doc_links):
|
77 |
+
print(f"π Processing document {idx + 1}...")
|
78 |
pdf_path = download_pdf_from_url(link)
|
79 |
if pdf_path:
|
80 |
text = extract_text_from_pdf(pdf_path)
|
|
|
82 |
vector_db = create_embeddings_and_store(chunks, vector_db=vector_db)
|
83 |
print(f"β
Document {idx + 1} processed.")
|
84 |
else:
|
85 |
+
print(f"β Failed to process document {idx + 1}")
|
86 |
return vector_db
|
87 |
|
88 |
+
# Main callable function for Graido
|
89 |
+
def run_query_pipeline(doc_links, user_query):
|
90 |
+
"""
|
91 |
+
Process documents and run a query. Returns LLM response.
|
92 |
+
|
93 |
+
Args:
|
94 |
+
doc_links (List[str]): List of Google Drive view links
|
95 |
+
user_query (str): User's natural language query
|
96 |
+
|
97 |
+
Returns:
|
98 |
+
str: LLM-generated response based on document context
|
99 |
+
"""
|
100 |
vector_db = process_documents(doc_links)
|
101 |
+
|
102 |
+
if not vector_db:
|
103 |
+
return "β οΈ No documents could be processed."
|
104 |
+
|
105 |
+
if not user_query:
|
106 |
+
return "β οΈ No user query provided."
|
107 |
+
|
108 |
+
response = query_vector_db(user_query, vector_db)
|
109 |
+
return response
|