RAG_Application / app.py
isana25's picture
Update app.py
1fb2c6f verified
import gradio as gr
import os
import fitz # PyMuPDF
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer
from groq import Groq
# βœ… Load Groq API key securely
groq_api_key = os.getenv("GROQ_API_KEY")
client = Groq(api_key=groq_api_key)
# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')
stored_chunks = []
stored_index = None
def extract_text_from_pdf(pdf_path):
doc = fitz.open(pdf_path)
text = ""
for page in doc:
text += page.get_text()
return text
def handle_pdf(file_path):
global stored_chunks, stored_index
try:
# Read text
text = extract_text_from_pdf(file_path)
# Simple chunking by 500 words
words = text.split()
chunks = [' '.join(words[i:i+500]) for i in range(0, len(words), 500)]
# Embed and build FAISS index
embeddings = model.encode(chunks)
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))
# Store for later use
stored_chunks = chunks
stored_index = index
return "βœ… PDF successfully processed. Ready for questions."
except Exception as e:
return f"❌ Error during PDF processing: {str(e)}"
def answer_query(query):
if not stored_chunks or stored_index is None:
return "❌ Please upload and process a PDF first."
try:
query_vec = model.encode(query).reshape(1, -1)
D, I = stored_index.search(query_vec, k=3)
top_chunks = [stored_chunks[i] for i in I[0]]
context = "\n\n".join(top_chunks)
prompt = f"""Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}\nAnswer:"""
response = client.chat.completions.create(
model="llama3-8b-8192",
messages=[{"role": "user", "content": prompt}],
temperature=0.2
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"❌ Error during answering: {str(e)}"
# 🧠 Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# πŸ“„ PDF Q&A using Groq + LLaMA3")
with gr.Row():
file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
process_output = gr.Textbox(label="Processing Status")
process_button = gr.Button("πŸ“₯ Process PDF")
process_button.click(fn=handle_pdf, inputs=[file_input], outputs=[process_output])
gr.Markdown("## πŸ’¬ Ask a Question from the PDF")
question_input = gr.Textbox(label="Your Question")
ask_button = gr.Button("πŸ€– Ask")
answer_output = gr.Textbox(label="Answer", lines=5)
ask_button.click(fn=answer_query, inputs=[question_input], outputs=[answer_output])
demo.launch()