sanmmarr29 commited on
Commit
c660b8d
·
verified ·
1 Parent(s): d50769e

Upload 8 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y \
7
+ git \
8
+ && rm -rf /var/lib/apt/lists/*
9
+
10
+ # Copy requirements first to leverage Docker cache
11
+ COPY requirements.txt .
12
+
13
+ # Install dependencies including CPU-only PyTorch
14
+ RUN pip install --no-cache-dir -r requirements.txt \
15
+ && pip install torch --index-url https://download.pytorch.org/whl/cpu
16
+
17
+ # Copy application code
18
+ COPY . .
19
+
20
+ # Expose port
21
+ EXPOSE 7860
22
+
23
+ # Run the application
24
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
app/config.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings
2
+
3
+ class Settings(BaseSettings):
4
+ MONGODB_URL: str
5
+ COLLECTION_NAME: str = "documents"
6
+ DATABASE_NAME: str = "ragbot"
7
+ HUGGINGFACE_TOKEN: str
8
+
9
+ class Config:
10
+ env_file = ".env"
11
+
12
+ settings = Settings()
app/database/mongodb.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from motor.motor_asyncio import AsyncIOMotorClient
2
+ from ..config import settings
3
+ import numpy as np
4
+
5
+ class MongoDB:
6
+ def __init__(self):
7
+ self.client = None
8
+ self.db = None
9
+
10
+ async def connect(self):
11
+ self.client = AsyncIOMotorClient(settings.MONGODB_URL)
12
+ self.db = self.client[settings.DATABASE_NAME]
13
+
14
+ # Create text and vector indexes
15
+ collection = self.db[settings.COLLECTION_NAME]
16
+ await collection.create_index([("content", "text")])
17
+ await collection.create_index([("embeddings", "2dsphere")])
18
+
19
+ async def close(self):
20
+ if self.client:
21
+ self.client.close()
22
+
23
+ async def store_document(self, document_data):
24
+ collection = self.db[settings.COLLECTION_NAME]
25
+ result = await collection.insert_one(document_data)
26
+ return result.inserted_id
27
+
28
+ async def search_documents(self, query, limit=5):
29
+ collection = self.db[settings.COLLECTION_NAME]
30
+
31
+ # Combine text and vector search
32
+ text_results = collection.find(
33
+ {"$text": {"$search": query}},
34
+ {"score": {"$meta": "textScore"}}
35
+ ).sort([("score", {"$meta": "textScore"})]).limit(limit)
36
+
37
+ return await text_results.to_list(length=None)
38
+
39
+ db = MongoDB()
app/main.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from contextlib import asynccontextmanager
2
+ from fastapi import FastAPI, UploadFile, File
3
+ from .database.mongodb import db
4
+ from .rag.document_processor import DocumentProcessor
5
+ from .rag.retriever import RAGRetriever
6
+
7
+ @asynccontextmanager
8
+ async def lifespan(app: FastAPI):
9
+ # Startup
10
+ await db.connect()
11
+ yield
12
+ # Shutdown
13
+ await db.close()
14
+
15
+ app = FastAPI(
16
+ title="RAG Chatbot",
17
+ description="A RAG-based chatbot using DeepSeek model",
18
+ version="1.0.0"
19
+ )
20
+ document_processor = DocumentProcessor()
21
+ rag_retriever = RAGRetriever()
22
+
23
+ @app.post("/upload-pdf")
24
+ async def upload_pdf(file: UploadFile = File(...)):
25
+ content = await file.read()
26
+ await document_processor.process_pdf(content)
27
+ return {"message": "PDF processed successfully"}
28
+
29
+ @app.post("/chat")
30
+ async def chat(query: str):
31
+ response = await rag_retriever.get_response(query)
32
+ return {
33
+ "reasoning": response["reasoning"],
34
+ "answer": response["answer"],
35
+ "context_used": response["context_used"]
36
+ }
37
+
38
+ if __name__ == "__main__":
39
+ import uvicorn
40
+ uvicorn.run(app, host="0.0.0.0", port=7860)
app/rag/document_processor.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dspy
2
+ from typing import List
3
+ from ..utils.pdf_loader import PDFLoader
4
+ from ..database.mongodb import db
5
+ from sentence_transformers import SentenceTransformer
6
+
7
+ class DocumentProcessor:
8
+ def __init__(self):
9
+ self.pdf_loader = PDFLoader()
10
+ self.encoder = SentenceTransformer('all-MiniLM-L6-v2')
11
+
12
+ def generate_embeddings(self, text: str):
13
+ return self.encoder.encode(text).tolist()
14
+
15
+ async def process_pdf(self, file_content: bytes):
16
+ # Extract text from PDF
17
+ text_content = self.pdf_loader.load_pdf(file_content)
18
+
19
+ # Process each page with improved chunking
20
+ for page_num, page_text in enumerate(text_content):
21
+ chunks = self.pdf_loader.chunk_text(
22
+ page_text,
23
+ chunk_size=512, # Smaller chunks for better retrieval
24
+ overlap=50 # Add overlap between chunks
25
+ )
26
+
27
+ # Store chunks with embeddings in MongoDB
28
+ for chunk_num, chunk in enumerate(chunks):
29
+ document_data = {
30
+ "page_number": page_num,
31
+ "chunk_number": chunk_num,
32
+ "content": chunk,
33
+ "embeddings": self.generate_embeddings(chunk),
34
+ "metadata": {
35
+ "chunk_length": len(chunk),
36
+ "position": chunk_num,
37
+ "total_chunks": len(chunks)
38
+ }
39
+ }
40
+ await db.store_document(document_data)
app/rag/retriever.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dspy
2
+ from typing import List
3
+ from ..database.mongodb import db
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ import torch
6
+ from ..config import settings
7
+
8
+ class RAGRetriever:
9
+ def __init__(self):
10
+ # Initialize DeepSeek model and tokenizer with CPU optimizations
11
+ self.model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
12
+ self.tokenizer = AutoTokenizer.from_pretrained(
13
+ self.model_name,
14
+ token=settings.HUGGINGFACE_TOKEN
15
+ )
16
+ self.model = AutoModelForCausalLM.from_pretrained(
17
+ self.model_name,
18
+ token=settings.HUGGINGFACE_TOKEN,
19
+ torch_dtype=torch.bfloat16,
20
+ low_cpu_mem_usage=True,
21
+ device_map="cpu"
22
+ )
23
+
24
+ # Configure DSPy with custom LLM
25
+ class DeepSeekLLM(dspy.LM):
26
+ def __init__(self, model, tokenizer):
27
+ super().__init__()
28
+ self.model = model
29
+ self.tokenizer = tokenizer
30
+
31
+ def basic_generate(self, prompt, **kwargs):
32
+ inputs = self.tokenizer(
33
+ prompt,
34
+ return_tensors="pt",
35
+ truncation=True,
36
+ max_length=1024
37
+ )
38
+ with torch.inference_mode(): # More memory efficient than no_grad
39
+ outputs = self.model.generate(
40
+ inputs.input_ids,
41
+ max_length=256, # Reduced for CPU
42
+ temperature=0.7,
43
+ do_sample=True,
44
+ pad_token_id=self.tokenizer.eos_token_id,
45
+ num_beams=1, # No beam search for faster inference
46
+ use_cache=True
47
+ )
48
+ return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
49
+
50
+ self.lm = DeepSeekLLM(self.model, self.tokenizer)
51
+ dspy.settings.configure(lm=self.lm)
52
+
53
+ # Define RAG program
54
+ class RAGProgram(dspy.Module):
55
+ def __init__(self):
56
+ super().__init__()
57
+ self.retrieve = dspy.Retrieve(k=5)
58
+ self.generate_answer = dspy.ChainOfThought("context, question -> answer")
59
+
60
+ def forward(self, question):
61
+ context = self.retrieve(question).passages
62
+ prediction = self.generate_answer(context=context, question=question)
63
+ return prediction.answer
64
+
65
+ self.rag_program = RAGProgram()
66
+
67
+ # Configure prompt template
68
+ self.template = """
69
+ Based on the following context, please answer the question accurately.
70
+ If the answer cannot be found in the context, say "I cannot answer based on the provided context."
71
+
72
+ Context: {context}
73
+
74
+ Question: {question}
75
+
76
+ Let's solve this step by step:
77
+ 1) First, let's identify the key information from the context
78
+ 2) Then, analyze how it relates to the question
79
+ 3) Finally, provide a clear answer
80
+
81
+ Please format your response as:
82
+ Reasoning:
83
+ [Your step-by-step reasoning here]
84
+
85
+ Final Answer:
86
+ [Your concise answer here]
87
+ """
88
+
89
+ async def get_response(self, query: str) -> dict:
90
+ try:
91
+ # Get relevant documents from MongoDB
92
+ docs = await db.search_documents(query)
93
+ context = [doc['content'] for doc in docs]
94
+
95
+ # Prepare prompt
96
+ prompt = self.template.format(
97
+ context='\n'.join(context[:3]),
98
+ question=query
99
+ )
100
+
101
+ # Generate response using RAG program
102
+ raw_response = self.rag_program(prompt)
103
+
104
+ # Parse the response to separate reasoning and answer
105
+ try:
106
+ reasoning_part = raw_response.split("Final Answer:")[0].replace("Reasoning:", "").strip()
107
+ answer_part = raw_response.split("Final Answer:")[1].strip()
108
+ except:
109
+ # Fallback if response format is unexpected
110
+ reasoning_part = "Could not extract reasoning"
111
+ answer_part = raw_response
112
+
113
+ return {
114
+ "reasoning": reasoning_part,
115
+ "answer": answer_part,
116
+ "context_used": context[:3] # Include used context for transparency
117
+ }
118
+
119
+ except Exception as e:
120
+ return {
121
+ "reasoning": "An error occurred during processing",
122
+ "answer": f"Error: {str(e)}",
123
+ "context_used": []
124
+ }
app/utils/pdf_loader.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PyPDF2
2
+ from typing import List
3
+ import io
4
+
5
+ class PDFLoader:
6
+ @staticmethod
7
+ def load_pdf(file_content: bytes) -> List[str]:
8
+ pdf_file = io.BytesIO(file_content)
9
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
10
+
11
+ text_content = []
12
+ for page in pdf_reader.pages:
13
+ text_content.append(page.extract_text())
14
+
15
+ return text_content
16
+
17
+ @staticmethod
18
+ def chunk_text(text: str, chunk_size: int = 1000) -> List[str]:
19
+ chunks = []
20
+ current_chunk = ""
21
+
22
+ for sentence in text.split(". "):
23
+ if len(current_chunk) + len(sentence) <= chunk_size:
24
+ current_chunk += sentence + ". "
25
+ else:
26
+ chunks.append(current_chunk)
27
+ current_chunk = sentence + ". "
28
+
29
+ if current_chunk:
30
+ chunks.append(current_chunk)
31
+
32
+ return chunks
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ motor
4
+ pymongo
5
+ python-multipart
6
+ PyPDF2
7
+ dspy-ai
8
+ pydantic
9
+ pydantic-settings
10
+ python-dotenv
11
+ sentence-transformers
12
+ numpy
13
+ transformers
14
+ cloudpickle
15
+ accelerate>=0.26.0