isana25 commited on
Commit
97b8e7d
Β·
verified Β·
1 Parent(s): 2180e83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -21
app.py CHANGED
@@ -8,7 +8,7 @@ from PyPDF2 import PdfReader
8
  from tempfile import NamedTemporaryFile
9
 
10
  # Initialize Groq client
11
- client = Groq(api_key=os.environ.get('GROQ_API_KEY'))
12
 
13
  # Function to extract text from a PDF
14
  def extract_text_from_pdf(pdf_file_path):
@@ -70,11 +70,11 @@ def download_pdf_from_url(url):
70
  else:
71
  return None
72
 
73
- # Function to process multiple PDFs and return the vector store
74
- def process_documents(links):
75
  vector_db = None
76
- for idx, link in enumerate(links):
77
- print(f"Processing document {idx + 1}...")
78
  pdf_path = download_pdf_from_url(link)
79
  if pdf_path:
80
  text = extract_text_from_pdf(pdf_path)
@@ -82,22 +82,28 @@ def process_documents(links):
82
  vector_db = create_embeddings_and_store(chunks, vector_db=vector_db)
83
  print(f"βœ… Document {idx + 1} processed.")
84
  else:
85
- print(f"❌ Failed to process document {idx + 1}.")
86
  return vector_db
87
 
88
- # Example usage (to be called from Graido backend logic)
89
- if __name__ == "__main__":
90
- doc_links = [
91
- "https://drive.google.com/file/d/1YWX-RYxgtcKO1QETnz1N3rboZUhRZwcH/view?usp=sharing",
92
- "https://drive.google.com/file/d/1JPf0XvDhn8QoDOlZDrxCOpu4WzKFESNz/view?usp=sharing",
93
- ]
 
 
 
 
 
 
94
  vector_db = process_documents(doc_links)
95
- while True:
96
- user_query = input("Enter your query (or 'exit'): ")
97
- if user_query.lower() == "exit":
98
- break
99
- if vector_db:
100
- response = query_vector_db(user_query, vector_db)
101
- print("πŸ’¬ Response:", response)
102
- else:
103
- print("⚠️ No documents available to query.")
 
8
  from tempfile import NamedTemporaryFile
9
 
10
  # Initialize Groq client
11
+ client = Groq(api_key=os.environ['GROQ_API_KEY'])
12
 
13
  # Function to extract text from a PDF
14
  def extract_text_from_pdf(pdf_file_path):
 
70
  else:
71
  return None
72
 
73
+ # Function to process all documents and build vector DB
74
+ def process_documents(doc_links):
75
  vector_db = None
76
+ for idx, link in enumerate(doc_links):
77
+ print(f"πŸ“„ Processing document {idx + 1}...")
78
  pdf_path = download_pdf_from_url(link)
79
  if pdf_path:
80
  text = extract_text_from_pdf(pdf_path)
 
82
  vector_db = create_embeddings_and_store(chunks, vector_db=vector_db)
83
  print(f"βœ… Document {idx + 1} processed.")
84
  else:
85
+ print(f"❌ Failed to process document {idx + 1}")
86
  return vector_db
87
 
88
+ # Main callable function for Graido
89
+ def run_query_pipeline(doc_links, user_query):
90
+ """
91
+ Process documents and run a query. Returns LLM response.
92
+
93
+ Args:
94
+ doc_links (List[str]): List of Google Drive view links
95
+ user_query (str): User's natural language query
96
+
97
+ Returns:
98
+ str: LLM-generated response based on document context
99
+ """
100
  vector_db = process_documents(doc_links)
101
+
102
+ if not vector_db:
103
+ return "⚠️ No documents could be processed."
104
+
105
+ if not user_query:
106
+ return "⚠️ No user query provided."
107
+
108
+ response = query_vector_db(user_query, vector_db)
109
+ return response