Spaces:

oussamatahkoubit
/

questionANDanswering

Sleeping

App Files Files Community

oussamatahkoubit commited on Apr 27

Commit

8d9119f

verified ·

1 Parent(s): 7dd8474

Upload 8 files

Browse files

Files changed (8) hide show

app.py +158 -0
backend/extractors.py +57 -0
backend/file_handler.py +37 -0
backend/image_processor.py +101 -0
backend/models.py +56 -0
backend/qa_engine.py +65 -0
backend/response_formatter.py +48 -0
requirements.txt +14 -0

app.py ADDED Viewed

	@@ -0,0 +1,158 @@

+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+from fastapi.responses import JSONResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from starlette.requests import Request
+import os
+import time
+from pathlib import Path
+from typing import Optional, List
+# Import your backend modules
+from backend.file_handler import save_upload
+from backend.extractors import extract_text
+from backend.qa_engine import QAEngine
+from backend.image_processor import ImageProcessor
+from backend.response_formatter import ResponseFormatter
+app = FastAPI(
+    title="Intelligent QA Service",
+    description="Question answering for documents and images"
+)
+# Try to set Hugging Face token if available in environment
+huggingface_token = os.environ.get("HF_TOKEN")
+if huggingface_token:
+    from huggingface_hub import login
+    login(token=huggingface_token)
+# Initialize models with fallback options
+try:
+    qa_engine = QAEngine(model_name="distilbert-base-cased-distilled-squad")  # Use a public model
+except Exception as e:
+    print(f"Error initializing QA engine: {str(e)}")
+    # Fallback to a simpler implementation if needed
+    from backend.qa_engine import SimpleQAEngine
+    qa_engine = SimpleQAEngine()
+try:
+    image_processor = ImageProcessor()
+except Exception as e:
+    print(f"Error initializing Image Processor: {str(e)}")
+    # Create a fallback image processor if needed
+    from backend.image_processor import SimpleImageProcessor
+    image_processor = SimpleImageProcessor()
+formatter = ResponseFormatter()
+# Mount static files and templates
+templates = Jinja2Templates(directory="frontend/templates")
+app.mount("/static", StaticFiles(directory="frontend/static"), name="static")
+@app.get("/")
+async def read_root(request: Request):
+    """Render the main page"""
+    return templates.TemplateResponse("index.html", {"request": request})
+@app.post("/api/document-qa")
+async def document_qa(
+    file: UploadFile = File(...),
+    question: str = Form(...)
+):
+    """Process document and answer question"""
+    try:
+        # Save the uploaded file
+        file_id, file_name = save_upload(file)
+        file_path = Path(f"/tmp/uploads/{file_name}")
+        # Extract text from document
+        document_text = extract_text(str(file_path))
+        # Get answer from QA engine
+        if isinstance(document_text, dict):
+            # Handle structured document text
+            # This is a simplistic approach - you'd need to convert the
+            # structured content to plain text for the QA engine
+            if "content" in document_text:
+                if isinstance(document_text["content"], list):
+                    if isinstance(document_text["content"][0], dict):
+                        # Handle docx structure
+                        text = " ".join([p["text"] for p in document_text["content"]])
+                    else:
+                        # Handle txt structure
+                        text = " ".join(document_text["content"])
+                else:
+                    text = str(document_text["content"])
+            else:
+                text = str(document_text)
+        else:
+            # Plain text from PDF or PPTX
+            text = document_text
+        qa_result = qa_engine.answer_question(text, question)
+        qa_result["timestamp"] = time.time()
+        # Format response
+        response = formatter.format_document_qa_response(qa_result, file.filename)
+        return JSONResponse(content=response)
+    except Exception as e:
+        error_response = formatter.format_error_response(str(e))
+        return JSONResponse(content=error_response, status_code=error_response["status_code"])
+@app.post("/api/image-qa")
+async def image_qa(
+    file: UploadFile = File(...),
+    question: str = Form(...)
+):
+    """Process image and answer question"""
+    try:
+        print(f"Received image: {file.filename}, size: {file.size}, question: {question}")
+        # Validate file is an image
+        if not file.content_type.startswith('image/'):
+            print(f"Invalid content type: {file.content_type}")
+            return JSONResponse(
+                content={"error": "File must be an image", "status_code": 400},
+                status_code=400
+            )
+        # Save the uploaded file
+        file_id, file_name = save_upload(file)
+        file_path = Path(f"/tmp/uploads/{file_name}")
+        print(f"Saved image to: {file_path}")
+        if not file_path.exists():
+            print(f"File not saved properly at {file_path}")
+            return JSONResponse(
+                content={"error": "File could not be saved", "status_code": 500},
+                status_code=500
+            )
+        # Process the image
+        vqa_result = image_processor.answer_image_question(str(file_path), question)
+        vqa_result["timestamp"] = time.time()
+        # Format response
+        response = formatter.format_image_qa_response(vqa_result, file.filename)
+        return JSONResponse(content=response)
+    except Exception as e:
+        import traceback
+        print(f"Error in image_qa: {str(e)}")
+        print(traceback.format_exc())
+        error_response = formatter.format_error_response(str(e))
+        return JSONResponse(content=error_response, status_code=error_response.get("status_code", 500))
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

backend/extractors.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from pathlib import Path
+import fitz  # PyMuPDF
+from docx import Document
+from pptx import Presentation
+# PDF
+def extract_text_pdf(file_path: str) -> str:
+    text = ""
+    with fitz.open(file_path) as doc:
+        for page in doc:
+            text += page.get_text()
+    return text.strip()
+# DOCX
+def extract_text_docx(file_path: str) -> dict:
+    doc = Document(file_path)
+    paragraphs = []
+    for para in doc.paragraphs:
+        text = para.text.strip()
+        if text:  # Only include non-empty paragraphs
+            paragraphs.append({
+                "style": para.style.name,
+                "text": text
+            })
+    return {"content": paragraphs}
+# TXT
+def extract_text_txt(file_path: str) -> str:
+    with open(file_path, "r", encoding="utf-8") as f:
+        lines = f.read().splitlines()  # ✅ split into clean lines
+    return {"content": lines}
+# PPTX
+def extract_text_pptx(file_path: str) -> str:
+    prs = Presentation(file_path)
+    text = []
+    for slide in prs.slides:
+        for shape in slide.shapes:
+            if hasattr(shape, "text"):
+                text.append(shape.text)
+    return "\n".join(text).strip()
+# Dispatcher
+def extract_text(file_path: str) -> str:
+    ext = Path(file_path).suffix.lower()
+    if ext == ".pdf":
+        return extract_text_pdf(file_path)
+    elif ext == ".docx":
+        return extract_text_docx(file_path)
+    elif ext == ".txt":
+        return extract_text_txt(file_path)
+    elif ext == ".pptx":
+        return extract_text_pptx(file_path)
+    else:
+        raise ValueError("Unsupported file extension")

backend/file_handler.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from fastapi import UploadFile, HTTPException
+from pathlib import Path
+from uuid import uuid4
+# Accepted file types
+ALLOWED_TYPES = {
+    "application/pdf": ".pdf",
+    "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
+    "text/plain": ".txt",
+    "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx"
+}
+MAX_FILE_SIZE = 10 * 1024 * 1024  # 10MB
+tmp_dir = Path("/tmp/uploads")
+tmp_dir.mkdir(parents=True, exist_ok=True)
+UPLOAD_DIR = tmp_dir
+def save_upload(file: UploadFile) -> tuple[str, str]:
+    if file.content_type not in ALLOWED_TYPES:
+        raise HTTPException(status_code=400, detail="Unsupported file type.")
+    # Read file into memory to check size and save it
+    file_bytes = file.file.read()
+    if len(file_bytes) > MAX_FILE_SIZE:
+        raise HTTPException(status_code=413, detail="File is too large. Maximum size is 10MB.")
+    file_ext = ALLOWED_TYPES[file.content_type]
+    file_id = str(uuid4())
+    file_path = UPLOAD_DIR / f"{file_id}{file_ext}"
+    with open(file_path, "wb") as f:
+        f.write(file_bytes)
+    return file_id, file_path.name

backend/image_processor.py ADDED Viewed

	@@ -0,0 +1,101 @@

+from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
+from transformers import ViltProcessor, ViltForQuestionAnswering  # Add these imports
+from PIL import Image
+import torch
+from typing import Dict, Any, Union, List
+class ImageProcessor:
+    def __init__(self, caption_model_name: str = "nlpconnect/vit-gpt2-image-captioning",
+                 vqa_model_name: str = "dandelin/vilt-b32-finetuned-vqa"):
+        # Image captioning model
+        self.caption_processor = ViTImageProcessor.from_pretrained(caption_model_name)
+        self.tokenizer = AutoTokenizer.from_pretrained(caption_model_name)
+        self.caption_model = VisionEncoderDecoderModel.from_pretrained(caption_model_name)
+        # VQA model
+        self.vqa_processor = ViltProcessor.from_pretrained(vqa_model_name)
+        self.vqa_model = ViltForQuestionAnswering.from_pretrained(vqa_model_name)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.caption_model.to(self.device)
+        self.vqa_model.to(self.device)
+    def generate_caption(self, image_path: str) -> str:
+        """Generate a descriptive caption for the provided image"""
+        try:
+            image = Image.open(image_path).convert("RGB")
+            pixel_values = self.caption_processor(image, return_tensors="pt").pixel_values.to(self.device)
+            gen_kwargs = {
+                "max_length": 50,
+                "num_beams": 4,
+                "early_stopping": True
+            }
+            output_ids = self.caption_model.generate(pixel_values, **gen_kwargs)
+            caption = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
+            return caption
+        except Exception as e:
+            return f"Error processing image: {str(e)}"
+    def answer_image_question(self, image_path: str, question: str) -> Dict[str, Any]:
+        """Answer a question about the provided image using a Visual QA model"""
+        try:
+            # Open image
+            image = Image.open(image_path).convert("RGB")
+            # Prepare inputs
+            inputs = self.vqa_processor(image, question, return_tensors="pt")
+            inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            # Forward pass
+            with torch.no_grad():
+                outputs = self.vqa_model(**inputs)
+            # Get answer
+            logits = outputs.logits
+            idx = logits.argmax(-1).item()
+            answer = self.vqa_model.config.id2label[idx]
+            confidence = torch.softmax(logits, dim=-1)[0, idx].item()
+            return {"answer": answer, "confidence": confidence}
+        except Exception as e:
+            print(f"VQA Error: {str(e)}")
+            # Fallback to caption
+            try:
+                caption = self.generate_caption(image_path)
+                return {
+                    "answer": f"Based on the image which shows {caption}, I cannot provide a specific answer.",
+                    "confidence": 0.0
+                }
+            except Exception as e2:
+                return {"answer": f"Error processing image: {str(e)}, {str(e2)}", "confidence": 0.0}
+# Add this to the end of your image_processor.py file
+class SimpleImageProcessor:
+    """A simple fallback image processor that doesn't require external models"""
+    def __init__(self):
+        """Initialize without any models"""
+        print("Using SimpleImageProcessor fallback")
+    def generate_caption(self, image_path: str) -> str:
+        """Generate a basic caption for the provided image"""
+        try:
+            # Just extract basic image information
+            from PIL import Image
+            img = Image.open(image_path)
+            return f"an image of size {img.width}x{img.height}"
+        except Exception as e:
+            return f"an image (could not process: {str(e)})"
+    def answer_image_question(self, image_path: str, question: str) -> Dict[str, Any]:
+        """Provide a fallback answer for image questions"""
+        return {
+            "answer": "I cannot analyze this image right now. The image processing system is not fully functional.",
+            "confidence": 0.0
+        }

backend/models.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from transformers import pipeline
+import os
+from typing import Dict, Any, Optional
+# Singleton model manager
+class ModelManager:
+    _instance = None
+    @classmethod
+    def get_instance(cls):
+        if cls._instance is None:
+            cls._instance = cls()
+        return cls._instance
+    def __init__(self):
+        self.pipelines = {}
+        # Set default models - preferably small ones that are publicly accessible
+        self.model_configs = {
+            "document_qa": {
+                "name": "distilbert-base-cased-distilled-squad",  # Smaller, public model
+                "type": "question-answering"
+            },
+            "image_captioning": {
+                "name": "Salesforce/blip-image-captioning-base",  # Public model
+                "type": "image-to-text"
+            }
+        }
+    def load_pipeline(self, pipeline_type: str) -> bool:
+        """Load a specific pipeline if it's not already loaded"""
+        if pipeline_type not in self.model_configs:
+            return False
+        if pipeline_type in self.pipelines:
+            return True
+        config = self.model_configs[pipeline_type]
+        model_name = config["name"]
+        try:
+            if config["type"] == "question-answering":
+                self.pipelines[pipeline_type] = pipeline("question-answering", model=model_name)
+            elif config["type"] == "image-to-text":
+                self.pipelines[pipeline_type] = pipeline("image-to-text", model=model_name)
+            return True
+        except Exception as e:
+            print(f"Error loading pipeline {model_name}: {str(e)}")
+            return False
+    def get_pipeline(self, pipeline_type: str) -> Optional[Any]:
+        """Get a loaded pipeline or load it if not already loaded"""
+        if pipeline_type not in self.pipelines and not self.load_pipeline(pipeline_type):
+            return None
+        return self.pipelines[pipeline_type]

backend/qa_engine.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from transformers import pipeline
+import torch
+from typing import Dict, List, Any
+class QAEngine:
+    def __init__(self, model_name: str = "deepset/roberta-base-squad2"):
+        # Use the pipeline API which works better with Hugging Face Spaces
+        self.qa_pipeline = pipeline("question-answering", model=model_name)
+    def answer_question(self, context: str, question: str) -> Dict[str, Any]:
+        """Answer a question based on the provided context"""
+        try:
+            # Use the pipeline directly
+            result = self.qa_pipeline(question=question, context=context)
+            return {
+                "answer": result["answer"],
+                "confidence": result["score"],
+                "start_position": result["start"],
+                "end_position": result["end"]
+            }
+        except Exception as e:
+            return {
+                "answer": f"Error processing question: {str(e)}",
+                "confidence": 0.0,
+                "start_position": 0,
+                "end_position": 0
+            }
+    def answer_multiple_questions(self, context: str, questions: List[str]) -> List[Dict[str, Any]]:
+        """Answer multiple questions from the same context"""
+        return [self.answer_question(context, question) for question in questions]
+# Add this to qa_engine.py
+class SimpleQAEngine:
+    """A simple QA engine that doesn't rely on complex models"""
+    def answer_question(self, context: str, question: str) -> Dict[str, Any]:
+        """Basic keyword-based answer extraction (fallback when models fail)"""
+        # Very basic implementation - just finds sentences with keywords from the question
+        from nltk.tokenize import sent_tokenize
+        try:
+            import nltk
+            nltk.download('punkt', quiet=True)
+        except:
+            pass
+        question_words = set(question.lower().split())
+        best_sentence = ""
+        best_score = 0
+        for sentence in sent_tokenize(context):
+            sentence_words = set(sentence.lower().split())
+            overlap = len(question_words.intersection(sentence_words))
+            if overlap > best_score:
+                best_score = overlap
+                best_sentence = sentence
+        return {
+            "answer": best_sentence if best_score > 0 else "No relevant information found.",
+            "confidence": min(best_score / max(1, len(question_words)), 1.0),
+            "start_position": context.find(best_sentence) if best_sentence in context else 0,
+            "end_position": context.find(best_sentence) + len(best_sentence) if best_sentence in context else 0
+        }

backend/response_formatter.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from typing import Dict, Any, List, Union
+class ResponseFormatter:
+    @staticmethod
+    def format_document_qa_response(qa_result: Dict[str, Any], document_name: str) -> Dict[str, Any]:
+        """Format the response from the QA engine for document questions"""
+        formatted_response = {
+            "document": document_name,
+            "answer": qa_result.get("answer", "No answer found"),
+            "confidence": round(qa_result.get("confidence", 0) * 100, 2),
+            "metadata": {
+                "source_type": "document",
+                "timestamp": qa_result.get("timestamp")
+            }
+        }
+        # Add highlighted text positions if available
+        if "start_position" in qa_result and "end_position" in qa_result:
+            formatted_response["highlight"] = {
+                "start": qa_result["start_position"],
+                "end": qa_result["end_position"]
+            }
+        return formatted_response
+    @staticmethod
+    def format_image_qa_response(vqa_result: Dict[str, Any], image_name: str) -> Dict[str, Any]:
+        """Format the response from the image QA engine"""
+        formatted_response = {
+            "image": image_name,
+            "answer": vqa_result.get("answer", "No answer found"),
+            "confidence": round(vqa_result.get("confidence", 0) * 100, 2),
+            "metadata": {
+                "source_type": "image",
+                "timestamp": vqa_result.get("timestamp")
+            }
+        }
+        return formatted_response
+    @staticmethod
+    def format_error_response(error_message: str, status_code: int = 400) -> Dict[str, Any]:
+        """Format error responses"""
+        return {
+            "error": True,
+            "message": error_message,
+            "status_code": status_code
+        }

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+numpy==1.24.3
+fastapi==0.95.1
+uvicorn==0.22.0
+python-multipart==0.0.6
+Jinja2==3.1.2
+torch==2.0.1
+transformers==4.30.2
+accelerate==0.20.3
+sentencepiece==0.1.99
+pillow==9.5.0
+PyMuPDF==1.22.5
+python-docx==0.8.11
+python-pptx==0.6.21
+huggingface-hub[hf_xet]