Spaces:

Kal1510
/

AskMyPDFs

Sleeping

App Files Files Community

Kal1510 commited on May 3

Commit

610b317

verified ·

1 Parent(s): 5f3ca8f

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -157

app.py CHANGED Viewed

@@ -1,40 +1,32 @@
 import os
-import torch
 import gradio as gr
 from PyPDF2 import PdfReader
-from transformers import (
-    AutoTokenizer, pipeline,
-    AutoModelForCausalLM, AutoConfig,
-    BitsAndBytesConfig
-)
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.schema import Document
-from langchain import HuggingFacePipeline
-from huggingface_hub import login
-api_key=os.getenv("api_key")
-try:
-    login(token=api_key)
-    print("login!")
-except Exception as e:
-    print(f"Login failed: {e}")
-# ------------------------------
-# Device setup
-# ------------------------------
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # ------------------------------
-# Embedding model config
 # ------------------------------
 modelPath = "sentence-transformers/all-mpnet-base-v2"
-model_kwargs = {"device": str(device)}
 encode_kwargs = {"normalize_embedding": False}
 embeddings = HuggingFaceEmbeddings(
@@ -44,50 +36,59 @@ embeddings = HuggingFaceEmbeddings(
 )
 # ------------------------------
-# Load Mistral model in 4bit
 # ------------------------------
-model_name = "mistralai/Mistral-7B-Instruct-v0.1"
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-tokenizer.pad_token = tokenizer.eos_token
-tokenizer.padding_side = "right"
-# 4-bit quantization config
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_use_double_quant=True,
-    bnb_4bit_compute_dtype=torch.float16
 )
-# Load model
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    # quantization_config=bnb_config,
-    device_map="auto"
-)
 # ------------------------------
-# Improved Text Generation Pipeline
 # ------------------------------
-text_generation = pipeline(
-    model=model,
-    tokenizer=tokenizer,
-    task="text-generation",
-    temperature=0.7,
-    top_p=0.9,
-    top_k=50,
-    repetition_penalty=1.1,
-    return_full_text=False,
-    max_new_tokens=2000,
-    do_sample=True,
-    eos_token_id=tokenizer.eos_token_id,
-)
-# Wrap in LangChain interface
-mistral_llm = HuggingFacePipeline(pipeline=text_generation)
 # ------------------------------
-# PDF Processing Functions
 # ------------------------------
 def pdf_text(pdf_docs):
     text = ""
@@ -101,8 +102,8 @@ def pdf_text(pdf_docs):
 def get_chunks(text):
     splitter = RecursiveCharacterTextSplitter(
-        chunk_size=1000,
-        chunk_overlap=200,
         length_function=len
     )
     chunks = splitter.split_text(text)
@@ -112,96 +113,50 @@ def get_vectorstore(documents):
     db = FAISS.from_documents(documents, embedding=embeddings)
     db.save_local("faiss_index")
-# ------------------------------
-# Conversational Prompt Template
-# ------------------------------
-def get_qa_prompt():
-    prompt_template = """<s>[INST]
-    You are a helpful, knowledgeable AI assistant. Answer the user's question based on the provided context.
-    Guidelines:
-    - Respond in a natural, conversational tone
-    - Be detailed but concise
-    - Use paragraphs and bullet points when appropriate
-    - If you don't know, say so
-    - Maintain a friendly and professional demeanor
-    Conversation History:
-    {chat_history}
-    Relevant Context:
-    {context}
-    Current Question: {question}
-    Provide a helpful response: [/INST]"""
-    return PromptTemplate(
-        template=prompt_template,
-        input_variables=["context", "question", "chat_history"]
-    )
-# ------------------------------
-# Chat Handling Functions
-# ------------------------------
 def handle_pdf_upload(pdf_files):
     try:
-        if not pdf_files:
-            return "⚠️ Please upload at least one PDF file"
         text = pdf_text(pdf_files)
         if not text.strip():
-            return "⚠️ Could not extract text from PDFs - please try different files"
         chunks = get_chunks(text)
         get_vectorstore(chunks)
-        return f"✅ Processed {len(pdf_files)} PDF(s) with {len(chunks)} text chunks"
     except Exception as e:
         return f"❌ Error: {str(e)}"
-def format_chat_history(chat_history):
-    return "\n".join([f"User: {q}\nAssistant: {a}" for q, a in chat_history[-3:]])
 def user_query(msg, chat_history):
     if not os.path.exists("faiss_index"):
-        chat_history.append((msg, "Please upload PDF documents first so I can help you."))
         return "", chat_history
     try:
-        # Load vector store
         db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
-        retriever = db.as_retriever(search_kwargs={"k": 3})
-        # Get relevant context
         docs = retriever.get_relevant_documents(msg)
-        context = "\n\n".join([d.page_content for d in docs])
-        # Generate response
         prompt = get_qa_prompt()
-        chain = LLMChain(llm=mistral_llm, prompt=prompt)
-        response = chain.run({
-            "question": msg,
-            "context": context,
-            "chat_history": format_chat_history(chat_history)
-        })
-        # Clean response
-        response = response.strip()
-        for end_token in ["</s>", "[INST]", "[/INST]"]:
-            if response.endswith(end_token):
-                response = response[:-len(end_token)].strip()
         chat_history.append((msg, response))
         return "", chat_history
     except Exception as e:
         error_msg = f"Sorry, I encountered an error: {str(e)}"
         chat_history.append((msg, error_msg))
         return "", chat_history
 # ------------------------------
-# Gradio Interface
 # ------------------------------
 with gr.Blocks(theme=gr.themes.Soft(), title="PDF Chat Assistant") as demo:
     with gr.Row():
@@ -209,7 +164,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="PDF Chat Assistant") as demo:
         # 📚 PDF Chat Assistant
         ### Have natural conversations with your documents
         """)
     with gr.Row():
         with gr.Column(scale=1, min_width=300):
             gr.Markdown("### Document Upload")
@@ -227,7 +182,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="PDF Chat Assistant") as demo:
             2. Click Process Documents
             3. Start chatting in the right panel
             """)
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(
                 height=600,
@@ -260,37 +215,10 @@ with gr.Blocks(theme=gr.themes.Soft(), title="PDF Chat Assistant") as demo:
                     label="Example Questions"
                 )
-    # Event handlers
-    upload_btn.click(
-        fn=handle_pdf_upload,
-        inputs=pdf_input,
-        outputs=status_box
-    )
-    submit_btn.click(
-        fn=user_query,
-        inputs=[message, chatbot],
-        outputs=[message, chatbot]
-    )
-    message.submit(
-        fn=user_query,
-        inputs=[message, chatbot],
-        outputs=[message, chatbot]
-    )
-    clear_chat.click(
-        lambda: [],
-        None,
-        chatbot,
-        queue=False
-    )
-# Launch the app
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7861,
-        share=True,
-        debug=True
-    )

 import os
 import gradio as gr
 from PyPDF2 import PdfReader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import FAISS
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
 from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.schema import Document
+from llama_cpp import Llama
+import warnings
+warnings.filterwarnings("ignore")
+import subprocess
+subprocess.run([
+    "huggingface-cli", "download",
+    "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
+    "mistral-7b-instruct-v0.1.Q2_K.gguf",
+    "--local-dir", "./models",
+    "--local-dir-use-symlinks", "False"
+], check=True)
 # ------------------------------
+# Device and Embedding Setup (CPU optimized)
 # ------------------------------
 modelPath = "sentence-transformers/all-mpnet-base-v2"
+model_kwargs = {"device": "cpu"}  # Force CPU usage
 encode_kwargs = {"normalize_embedding": False}
 embeddings = HuggingFaceEmbeddings(
 )
 # ------------------------------
+# Load Mistral GGUF via llama.cpp (CPU optimized)
 # ------------------------------
+llm_cpp = Llama(
+    model_path="./models/mistral-7b-instruct-v0.1.Q2_K.gguf",
+    n_ctx=2048,
+    n_threads=4,  # Adjust based on your CPU cores
+    n_gpu_layers=0,  # Force CPU-only
+    temperature=0.7,
+    top_p=0.9,
+    repeat_penalty=1.1
 )
+# ------------------------------
+# LangChain-compatible wrapper
+# ------------------------------
+def mistral_llm(prompt):
+    output = llm_cpp(
+        prompt,
+        max_tokens=512,  # Reduced for CPU performance
+        stop=["</s>", "[INST]", "[/INST]"]
+    )
+    return output["choices"][0]["text"].strip()
 # ------------------------------
+# Prompt Template (unchanged)
 # ------------------------------
+def get_qa_prompt():
+    template = """<s>[INST] \
+You are a helpful, knowledgeable AI assistant. Answer the user's question based on the provided context.
+Guidelines:
+- Respond in a natural, conversational tone
+- Be detailed but concise
+- Use paragraphs and bullet points when appropriate
+- If you don't know, say so
+- Maintain a friendly and professional demeanor
+Conversation History:
+{chat_history}
+Relevant Context:
+{context}
+Current Question: {question}
+Provide a helpful response: [/INST]"""
+    return PromptTemplate(
+        template=template,
+        input_variables=["context", "question", "chat_history"]
+    )
 # ------------------------------
+# PDF and Chat Logic (optimized for CPU)
 # ------------------------------
 def pdf_text(pdf_docs):
     text = ""
 def get_chunks(text):
     splitter = RecursiveCharacterTextSplitter(
+        chunk_size=800,  # Smaller chunks for CPU
+        chunk_overlap=100,
         length_function=len
     )
     chunks = splitter.split_text(text)
     db = FAISS.from_documents(documents, embedding=embeddings)
     db.save_local("faiss_index")
+def format_chat_history(history):
+    return "\n".join([f"User: {q}\nAssistant: {a}" for q, a in history[-2:]])  # Shorter history
 def handle_pdf_upload(pdf_files):
+    if not pdf_files:
+        return "⚠️ Upload at least one PDF"
     try:
         text = pdf_text(pdf_files)
         if not text.strip():
+            return "⚠️ Could not extract text"
         chunks = get_chunks(text)
         get_vectorstore(chunks)
+        return f"✅ Processed {len(pdf_files)} PDF(s) with {len(chunks)} chunks"
     except Exception as e:
         return f"❌ Error: {str(e)}"
 def user_query(msg, chat_history):
     if not os.path.exists("faiss_index"):
+        chat_history.append((msg, "Please upload PDF documents first."))
         return "", chat_history
     try:
         db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
+        retriever = db.as_retriever(search_kwargs={"k": 2})  # Fewer documents for CPU
         docs = retriever.get_relevant_documents(msg)
+        context = "\n\n".join([d.page_content for d in docs][:2])  # Limit context
         prompt = get_qa_prompt()
+        final_prompt = prompt.format(
+            context=context[:1500],  # Further limit context size
+            question=msg,
+            chat_history=format_chat_history(chat_history)
+        )
+        response = mistral_llm(final_prompt)
         chat_history.append((msg, response))
         return "", chat_history
     except Exception as e:
         error_msg = f"Sorry, I encountered an error: {str(e)}"
         chat_history.append((msg, error_msg))
         return "", chat_history
 # ------------------------------
+# Gradio Interface (your exact requested format)
 # ------------------------------
 with gr.Blocks(theme=gr.themes.Soft(), title="PDF Chat Assistant") as demo:
     with gr.Row():
         # 📚 PDF Chat Assistant
         ### Have natural conversations with your documents
         """)
     with gr.Row():
         with gr.Column(scale=1, min_width=300):
             gr.Markdown("### Document Upload")
             2. Click Process Documents
             3. Start chatting in the right panel
             """)
         with gr.Column(scale=2):
             chatbot = gr.Chatbot(
                 height=600,
                     label="Example Questions"
                 )
+    upload_btn.click(handle_pdf_upload, inputs=pdf_input, outputs=status_box)
+    submit_btn.click(user_query, inputs=[message, chatbot], outputs=[message, chatbot])
+    message.submit(user_query, inputs=[message, chatbot], outputs=[message, chatbot])
+    clear_chat.click(lambda: [], None, chatbot, queue=False)
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7862, share=True)  # Disable sharing for local CPU use