Spaces:

sanmmarr29
/

helperAi

Sleeping

App Files Files Community

sanmmarr29 commited on Jan 26

Commit

c660b8d

verified ·

1 Parent(s): d50769e

Upload 8 files

Browse files

Files changed (8) hide show

Dockerfile +24 -0
app/config.py +12 -0
app/database/mongodb.py +39 -0
app/main.py +40 -0
app/rag/document_processor.py +40 -0
app/rag/retriever.py +124 -0
app/utils/pdf_loader.py +32 -0
requirements.txt +15 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,24 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+# Install dependencies including CPU-only PyTorch
+RUN pip install --no-cache-dir -r requirements.txt \
+    && pip install torch --index-url https://download.pytorch.org/whl/cpu
+# Copy application code
+COPY . .
+# Expose port
+EXPOSE 7860
+# Run the application
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

app/config.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from pydantic_settings import BaseSettings
+class Settings(BaseSettings):
+    MONGODB_URL: str
+    COLLECTION_NAME: str = "documents"
+    DATABASE_NAME: str = "ragbot"
+    HUGGINGFACE_TOKEN: str
+    class Config:
+        env_file = ".env"
+settings = Settings()

app/database/mongodb.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from motor.motor_asyncio import AsyncIOMotorClient
+from ..config import settings
+import numpy as np
+class MongoDB:
+    def __init__(self):
+        self.client = None
+        self.db = None
+    async def connect(self):
+        self.client = AsyncIOMotorClient(settings.MONGODB_URL)
+        self.db = self.client[settings.DATABASE_NAME]
+        # Create text and vector indexes
+        collection = self.db[settings.COLLECTION_NAME]
+        await collection.create_index([("content", "text")])
+        await collection.create_index([("embeddings", "2dsphere")])
+    async def close(self):
+        if self.client:
+            self.client.close()
+    async def store_document(self, document_data):
+        collection = self.db[settings.COLLECTION_NAME]
+        result = await collection.insert_one(document_data)
+        return result.inserted_id
+    async def search_documents(self, query, limit=5):
+        collection = self.db[settings.COLLECTION_NAME]
+        # Combine text and vector search
+        text_results = collection.find(
+            {"$text": {"$search": query}},
+            {"score": {"$meta": "textScore"}}
+        ).sort([("score", {"$meta": "textScore"})]).limit(limit)
+        return await text_results.to_list(length=None)
+db = MongoDB()

app/main.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from contextlib import asynccontextmanager
+from fastapi import FastAPI, UploadFile, File
+from .database.mongodb import db
+from .rag.document_processor import DocumentProcessor
+from .rag.retriever import RAGRetriever
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup
+    await db.connect()
+    yield
+    # Shutdown
+    await db.close()
+app = FastAPI(
+    title="RAG Chatbot",
+    description="A RAG-based chatbot using DeepSeek model",
+    version="1.0.0"
+)
+document_processor = DocumentProcessor()
+rag_retriever = RAGRetriever()
+@app.post("/upload-pdf")
+async def upload_pdf(file: UploadFile = File(...)):
+    content = await file.read()
+    await document_processor.process_pdf(content)
+    return {"message": "PDF processed successfully"}
+@app.post("/chat")
+async def chat(query: str):
+    response = await rag_retriever.get_response(query)
+    return {
+        "reasoning": response["reasoning"],
+        "answer": response["answer"],
+        "context_used": response["context_used"]
+    }
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

app/rag/document_processor.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import dspy
+from typing import List
+from ..utils.pdf_loader import PDFLoader
+from ..database.mongodb import db
+from sentence_transformers import SentenceTransformer
+class DocumentProcessor:
+    def __init__(self):
+        self.pdf_loader = PDFLoader()
+        self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
+    def generate_embeddings(self, text: str):
+        return self.encoder.encode(text).tolist()
+    async def process_pdf(self, file_content: bytes):
+        # Extract text from PDF
+        text_content = self.pdf_loader.load_pdf(file_content)
+        # Process each page with improved chunking
+        for page_num, page_text in enumerate(text_content):
+            chunks = self.pdf_loader.chunk_text(
+                page_text,
+                chunk_size=512,  # Smaller chunks for better retrieval
+                overlap=50  # Add overlap between chunks
+            )
+            # Store chunks with embeddings in MongoDB
+            for chunk_num, chunk in enumerate(chunks):
+                document_data = {
+                    "page_number": page_num,
+                    "chunk_number": chunk_num,
+                    "content": chunk,
+                    "embeddings": self.generate_embeddings(chunk),
+                    "metadata": {
+                        "chunk_length": len(chunk),
+                        "position": chunk_num,
+                        "total_chunks": len(chunks)
+                    }
+                }
+                await db.store_document(document_data)

app/rag/retriever.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import dspy
+from typing import List
+from ..database.mongodb import db
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+from ..config import settings
+class RAGRetriever:
+    def __init__(self):
+        # Initialize DeepSeek model and tokenizer with CPU optimizations
+        self.model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.model_name,
+            token=settings.HUGGINGFACE_TOKEN
+        )
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.model_name,
+            token=settings.HUGGINGFACE_TOKEN,
+            torch_dtype=torch.bfloat16,
+            low_cpu_mem_usage=True,
+            device_map="cpu"
+        )
+        # Configure DSPy with custom LLM
+        class DeepSeekLLM(dspy.LM):
+            def __init__(self, model, tokenizer):
+                super().__init__()
+                self.model = model
+                self.tokenizer = tokenizer
+            def basic_generate(self, prompt, **kwargs):
+                inputs = self.tokenizer(
+                    prompt,
+                    return_tensors="pt",
+                    truncation=True,
+                    max_length=1024
+                )
+                with torch.inference_mode():  # More memory efficient than no_grad
+                    outputs = self.model.generate(
+                        inputs.input_ids,
+                        max_length=256,  # Reduced for CPU
+                        temperature=0.7,
+                        do_sample=True,
+                        pad_token_id=self.tokenizer.eos_token_id,
+                        num_beams=1,  # No beam search for faster inference
+                        use_cache=True
+                    )
+                return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        self.lm = DeepSeekLLM(self.model, self.tokenizer)
+        dspy.settings.configure(lm=self.lm)
+        # Define RAG program
+        class RAGProgram(dspy.Module):
+            def __init__(self):
+                super().__init__()
+                self.retrieve = dspy.Retrieve(k=5)
+                self.generate_answer = dspy.ChainOfThought("context, question -> answer")
+            def forward(self, question):
+                context = self.retrieve(question).passages
+                prediction = self.generate_answer(context=context, question=question)
+                return prediction.answer
+        self.rag_program = RAGProgram()
+        # Configure prompt template
+        self.template = """
+        Based on the following context, please answer the question accurately.
+        If the answer cannot be found in the context, say "I cannot answer based on the provided context."
+        Context: {context}
+        Question: {question}
+        Let's solve this step by step:
+        1) First, let's identify the key information from the context
+        2) Then, analyze how it relates to the question
+        3) Finally, provide a clear answer
+        Please format your response as:
+        Reasoning:
+        [Your step-by-step reasoning here]
+        Final Answer:
+        [Your concise answer here]
+        """
+    async def get_response(self, query: str) -> dict:
+        try:
+            # Get relevant documents from MongoDB
+            docs = await db.search_documents(query)
+            context = [doc['content'] for doc in docs]
+            # Prepare prompt
+            prompt = self.template.format(
+                context='\n'.join(context[:3]),
+                question=query
+            )
+            # Generate response using RAG program
+            raw_response = self.rag_program(prompt)
+            # Parse the response to separate reasoning and answer
+            try:
+                reasoning_part = raw_response.split("Final Answer:")[0].replace("Reasoning:", "").strip()
+                answer_part = raw_response.split("Final Answer:")[1].strip()
+            except:
+                # Fallback if response format is unexpected
+                reasoning_part = "Could not extract reasoning"
+                answer_part = raw_response
+            return {
+                "reasoning": reasoning_part,
+                "answer": answer_part,
+                "context_used": context[:3]  # Include used context for transparency
+            }
+        except Exception as e:
+            return {
+                "reasoning": "An error occurred during processing",
+                "answer": f"Error: {str(e)}",
+                "context_used": []
+            }

app/utils/pdf_loader.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import PyPDF2
+from typing import List
+import io
+class PDFLoader:
+    @staticmethod
+    def load_pdf(file_content: bytes) -> List[str]:
+        pdf_file = io.BytesIO(file_content)
+        pdf_reader = PyPDF2.PdfReader(pdf_file)
+        text_content = []
+        for page in pdf_reader.pages:
+            text_content.append(page.extract_text())
+        return text_content
+    @staticmethod
+    def chunk_text(text: str, chunk_size: int = 1000) -> List[str]:
+        chunks = []
+        current_chunk = ""
+        for sentence in text.split(". "):
+            if len(current_chunk) + len(sentence) <= chunk_size:
+                current_chunk += sentence + ". "
+            else:
+                chunks.append(current_chunk)
+                current_chunk = sentence + ". "
+        if current_chunk:
+            chunks.append(current_chunk)
+        return chunks

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+fastapi
+uvicorn
+motor
+pymongo
+python-multipart
+PyPDF2
+dspy-ai
+pydantic
+pydantic-settings
+python-dotenv
+sentence-transformers
+numpy
+transformers
+cloudpickle
+accelerate>=0.26.0