Spaces:
Sleeping
Sleeping
import gradio as gr | |
import os | |
import fitz # PyMuPDF | |
import numpy as np | |
import faiss | |
from sentence_transformers import SentenceTransformer | |
from groq import Groq | |
# β Load Groq API key securely | |
groq_api_key = os.getenv("GROQ_API_KEY") | |
client = Groq(api_key=groq_api_key) | |
# Load embedding model | |
model = SentenceTransformer('all-MiniLM-L6-v2') | |
stored_chunks = [] | |
stored_index = None | |
def extract_text_from_pdf(pdf_path): | |
doc = fitz.open(pdf_path) | |
text = "" | |
for page in doc: | |
text += page.get_text() | |
return text | |
def handle_pdf(file_path): | |
global stored_chunks, stored_index | |
try: | |
# Read text | |
text = extract_text_from_pdf(file_path) | |
# Simple chunking by 500 words | |
words = text.split() | |
chunks = [' '.join(words[i:i+500]) for i in range(0, len(words), 500)] | |
# Embed and build FAISS index | |
embeddings = model.encode(chunks) | |
index = faiss.IndexFlatL2(embeddings.shape[1]) | |
index.add(np.array(embeddings)) | |
# Store for later use | |
stored_chunks = chunks | |
stored_index = index | |
return "β PDF successfully processed. Ready for questions." | |
except Exception as e: | |
return f"β Error during PDF processing: {str(e)}" | |
def answer_query(query): | |
if not stored_chunks or stored_index is None: | |
return "β Please upload and process a PDF first." | |
try: | |
query_vec = model.encode(query).reshape(1, -1) | |
D, I = stored_index.search(query_vec, k=3) | |
top_chunks = [stored_chunks[i] for i in I[0]] | |
context = "\n\n".join(top_chunks) | |
prompt = f"""Answer the question based on the context below:\n\nContext:\n{context}\n\nQuestion: {query}\nAnswer:""" | |
response = client.chat.completions.create( | |
model="llama3-8b-8192", | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0.2 | |
) | |
return response.choices[0].message.content.strip() | |
except Exception as e: | |
return f"β Error during answering: {str(e)}" | |
# π§ Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("# π PDF Q&A using Groq + LLaMA3") | |
with gr.Row(): | |
file_input = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
process_output = gr.Textbox(label="Processing Status") | |
process_button = gr.Button("π₯ Process PDF") | |
process_button.click(fn=handle_pdf, inputs=[file_input], outputs=[process_output]) | |
gr.Markdown("## π¬ Ask a Question from the PDF") | |
question_input = gr.Textbox(label="Your Question") | |
ask_button = gr.Button("π€ Ask") | |
answer_output = gr.Textbox(label="Answer", lines=5) | |
ask_button.click(fn=answer_query, inputs=[question_input], outputs=[answer_output]) | |
demo.launch() | |