Spaces:

oussamatahkoubit
/

questionANDanswering

Sleeping

App Files Files Community

oussamatahkoubit commited on Apr 27

Commit

5e8d8f8

verified ·

1 Parent(s): 4ab6353

Upload 4 files

Browse files

Files changed (5) hide show

.gitattributes +1 -0
Dockerfile +26 -4
app.py +279 -51
requirements.txt +14 -5
sample_documents/test_image.jpg +3 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 test_image.jpg filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 test_image.jpg filter=lfs diff=lfs merge=lfs -text
+sample_documents/test_image.jpg filter=lfs diff=lfs merge=lfs -text

Dockerfile CHANGED Viewed

@@ -1,8 +1,30 @@
 FROM python:3.10-slim
-WORKDIR /code
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-COPY app.py .
-COPY test_image.jpg .
 EXPOSE 7860
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    build-essential \
+    python3-dev \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    poppler-utils \
+    tesseract-ocr \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# Create sample documents directory
+RUN mkdir -p /app/sample_documents
+# Copy application code
+COPY . .
+# Expose port for Gradio
 EXPOSE 7860
+# Command to run the application
+CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -1,69 +1,297 @@
 import os
-import io
-import logging
-import requests
-from fastapi import FastAPI
 from PIL import Image
-app = FastAPI()
-# Get your token from environment variable 'HF_TOKEN'
-API_TOKEN = os.getenv("HF_TOKEN")
-if not API_TOKEN:
-    raise RuntimeError("HF_TOKEN environment variable is not set!")
-# Use a Visual Question Answering (VQA) model
-API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-vqa-base"
-HEADERS = {
-    "Authorization": f"Bearer {API_TOKEN}"
-}
-# Configure logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("app")
-@app.on_event("startup")
-async def startup_event():
-    logger.info("Warming up the Hugging Face API")
-@app.get("/")
-def home():
-    return {"message": "VQA API is running"}
-@app.get("/ask")
-def ask_question():
-    image_path = "/code/test_image.jpg"
-    question = "What does the picture show?" # Example question
-    logger.info(f"Reading image: {image_path}")
-    try:
-        with open(image_path, "rb") as image_file:
-            image_bytes = image_file.read()
-        logger.info("Sending request to Hugging Face API")
-        response = requests.post(
-            API_URL,
-            headers=HEADERS,
-            files={"image": ("filename.jpg", image_bytes, "image/jpeg")},
-            data={"inputs": f'{{"question":"{question}"}}'}
         )
-        response.raise_for_status()
-        result = response.json()
-        if "answer" in result:
-            answer = result["answer"]
-            return {"question": question, "answer": answer}
-        else:
-            logger.error(f"Unexpected response format: {result}")
-            return {"error": "Unexpected response format"}
-    except requests.exceptions.HTTPError as e:
-        logger.error(f"HTTP error occurred: {e}")
-        return {"error": str(e)}
-    except Exception as e:
-        logger.error(f"Other error occurred: {e}")
-        return {"error": str(e)}

 import os
+import cv2
+import gradio as gr
+import torch
+from llava.model.builder import load_pretrained_model
+from llava.mm_utils import get_model_name_from_path
+from llava.conversation import conv_templates
+from llava.utils import disable_torch_init
 from PIL import Image
+import pytesseract
+from pdf2image import convert_from_path
+import docx
+import openpyxl
+from pptx import Presentation
+import io
+import tempfile
+import re
+import shutil
+# Sample documents directory
+SAMPLE_DIR = "sample_documents"
+# Initialize LLaVA model
+disable_torch_init()
+# Model paths
+model_path = "liuhaotian/llava-v1.5-7b"
+model_name = get_model_name_from_path(model_path)
+tokenizer, model, processor, context_len = load_pretrained_model(
+    model_path=model_path,
+    model_base=None,
+    model_name=model_name,
+    device="cuda" if torch.cuda.is_available() else "cpu",
+    load_8bit=not torch.cuda.is_available(),
+    load_4bit=not torch.cuda.is_available()
+)
+# Document processing functions
+def process_image(image_path):
+    # Use Tesseract to extract text from image
+    img = Image.open(image_path)
+    text = pytesseract.image_to_string(img)
+    return text, img
+def process_pdf(pdf_path):
+    # Convert PDF to images and extract text
+    images = convert_from_path(pdf_path)
+    text = ""
+    for img in images:
+        text += pytesseract.image_to_string(img) + "\n\n"
+    return text, images[0] if images else None
+def process_docx(docx_path):
+    # Extract text from DOCX
+    doc = docx.Document(docx_path)
+    text = ""
+    for paragraph in doc.paragraphs:
+        text += paragraph.text + "\n"
+    return text, None
+def process_excel(excel_path):
+    # Extract data from Excel
+    workbook = openpyxl.load_workbook(excel_path)
+    text = ""
+    for sheet_name in workbook.sheetnames:
+        sheet = workbook[sheet_name]
+        text += f"Sheet: {sheet_name}\n"
+        for row in sheet.iter_rows(values_only=True):
+            text += " | ".join([str(cell) if cell is not None else "" for cell in row]) + "\n"
+        text += "\n"
+    return text, None
+def process_pptx(pptx_path):
+    # Extract text from PowerPoint
+    presentation = Presentation(pptx_path)
+    text = ""
+    for i, slide in enumerate(presentation.slides):
+        text += f"Slide {i+1}:\n"
+        for shape in slide.shapes:
+            if hasattr(shape, "text"):
+                text += shape.text + "\n"
+        text += "\n"
+    return text, None
+def process_document(file_path):
+    # Process document based on file extension
+    _, ext = os.path.splitext(file_path)
+    ext = ext.lower()
+    if ext in ['.jpg', '.jpeg', '.png', '.bmp']:
+        return process_image(file_path)
+    elif ext == '.pdf':
+        return process_pdf(file_path)
+    elif ext == '.docx':
+        return process_docx(file_path)
+    elif ext in ['.xlsx', '.xls']:
+        return process_excel(file_path)
+    elif ext == '.pptx':
+        return process_pptx(file_path)
+    else:
+        return "Unsupported file format", None
+def get_sample_documents():
+    """Get list of sample documents from the sample directory"""
+    if not os.path.exists(SAMPLE_DIR):
+        os.makedirs(SAMPLE_DIR)
+        # Create a sample text file if no samples exist
+        with open(os.path.join(SAMPLE_DIR, "sample.txt"), "w") as f:
+            f.write("This is a sample document for testing the document chatbot.\n\n")
+            f.write("It contains information about AI models and document processing.\n")
+            f.write("You can ask questions about this document to test the system.")
+    return [f for f in os.listdir(SAMPLE_DIR) if os.path.isfile(os.path.join(SAMPLE_DIR, f))]
+def chat_with_document(file, query, use_sample=False, sample_name=None):
+    if use_sample and sample_name:
+        file_path = os.path.join(SAMPLE_DIR, sample_name)
+    else:
+        # Handle uploaded file
+        temp_dir = tempfile.mkdtemp()
+        file_path = os.path.join(temp_dir, file.name)
+        with open(file_path, 'wb') as f:
+            f.write(file.read())
+    # Check if it's an image for visual analysis
+    _, ext = os.path.splitext(file_path)
+    ext = ext.lower()
+    if ext in ['.jpg', '.jpeg', '.png', '.bmp']:
+        # For images, we can use LLaVA's visual capabilities
+        image = Image.open(file_path).convert('RGB')
+        # Set up conversation
+        conv = conv_templates["llava_v1"].copy()
+        conv.append_message(conv.roles[0], query)
+        conv.append_message(conv.roles[1], None)
+        prompt = conv.get_prompt()
+        # Process image with model
+        image_tensor = processor.preprocess(image, return_tensors='pt')['pixel_values'].half().cuda() if torch.cuda.is_available() else processor.preprocess(image, return_tensors='pt')['pixel_values'].float()
+        # Generate response
+        with torch.no_grad():
+            response = model.generate(
+                image_tensor,
+                tokenizer(prompt, return_tensors='pt').input_ids.to(model.device),
+                max_new_tokens=1024,
+                temperature=0.7,
+                do_sample=True,
+            )
+        response = tokenizer.decode(response[0], skip_special_tokens=True)
+        response = response.split(conv.sep2)[-1].strip()
+    else:
+        # For documents, extract text and send to model
+        document_text, _ = process_document(file_path)
+        # Limit text length to avoid exceeding context window
+        if len(document_text) > 4000:
+            document_text = document_text[:4000] + "...(truncated)"
+        # Set up conversation with extracted text
+        full_prompt = f"This is the content of the document:\n\n{document_text}\n\nNow, please answer this question about the document: {query}"
+        # Use LLaVA for text generation
+        conv = conv_templates["llava_v1"].copy()
+        conv.append_message(conv.roles[0], full_prompt)
+        conv.append_message(conv.roles[1], None)
+        prompt = conv.get_prompt()
+        # Generate response
+        with torch.no_grad():
+            response = model.generate(
+                None,
+                tokenizer(prompt, return_tensors='pt').input_ids.to(model.device),
+                max_new_tokens=1024,
+                temperature=0.7,
+                do_sample=True,
+            )
+        response = tokenizer.decode(response[0], skip_special_tokens=True)
+        response = response.split(conv.sep2)[-1].strip()
+    # Clean up if using uploaded file
+    if not use_sample:
+        os.remove(file_path)
+        os.rmdir(temp_dir)
+    return response
+# Create Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Document and Image Chat Assistant")
+    with gr.Tab("Upload Your Document"):
+        with gr.Row():
+            with gr.Column():
+                file_input = gr.File(label="Upload Document or Image")
+                query_input = gr.Textbox(label="Ask a question about the document", lines=2)
+                submit_btn = gr.Button("Submit")
+            with gr.Column():
+                output = gr.Textbox(label="Response", lines=10)
+        submit_btn.click(
+            fn=lambda file, query: chat_with_document(file, query, use_sample=False),
+            inputs=[file_input, query_input],
+            outputs=output
+        )
+    with gr.Tab("Use Sample Documents"):
+        with gr.Row():
+            with gr.Column():
+                sample_dropdown = gr.Dropdown(choices=get_sample_documents(), label="Select Sample Document")
+                sample_query_input = gr.Textbox(label="Ask a question about the sample document", lines=2)
+                sample_submit_btn = gr.Button("Submit")
+            with gr.Column():
+                sample_output = gr.Textbox(label="Response", lines=10)
+        sample_submit_btn.click(
+            fn=lambda sample, query: chat_with_document(None, query, use_sample=True, sample_name=sample),
+            inputs=[sample_dropdown, sample_query_input],
+            outputs=sample_output
         )
+# Create sample documents
+def create_sample_documents():
+    """Create sample documents for testing"""
+    if not os.path.exists(SAMPLE_DIR):
+        os.makedirs(SAMPLE_DIR)
+    # Sample text document
+    with open(os.path.join(SAMPLE_DIR, "ai_overview.txt"), "w") as f:
+        f.write("# Artificial Intelligence Overview\n\n")
+        f.write("Artificial Intelligence (AI) is the simulation of human intelligence processes by machines, especially computer systems.\n")
+        f.write("These processes include learning (the acquisition of information and rules for using the information),\n")
+        f.write("reasoning (using rules to reach approximate or definite conclusions) and self-correction.\n\n")
+        f.write("Major AI techniques include:\n")
+        f.write("- Machine Learning\n")
+        f.write("- Natural Language Processing\n")
+        f.write("- Computer Vision\n")
+        f.write("- Robotics\n\n")
+        f.write("AI is transforming many fields including healthcare, finance, transportation, and more.")
+    # Create sample DOCX if python-docx is available
+    try:
+        doc = docx.Document()
+        doc.add_heading('Project Schedule', 0)
+        doc.add_paragraph('This document outlines the schedule for the AI chatbot project.')
+        doc.add_heading('Phase 1: Planning', level=1)
+        doc.add_paragraph('Requirements gathering: Week 1-2')
+        doc.add_paragraph('Architecture design: Week 3')
+        doc.add_heading('Phase 2: Development', level=1)
+        doc.add_paragraph('Backend development: Week 4-6')
+        doc.add_paragraph('Frontend development: Week 5-7')
+        doc.add_paragraph('Integration: Week 8')
+        doc.add_heading('Phase 3: Testing', level=1)
+        doc.add_paragraph('Unit testing: Week 9')
+        doc.add_paragraph('Integration testing: Week 10')
+        doc.add_paragraph('User acceptance testing: Week 11')
+        doc.save(os.path.join(SAMPLE_DIR, 'project_schedule.docx'))
+    except:
+        pass
+    # Create a simple image with text
+    try:
+        img = Image.new('RGB', (800, 400), color=(255, 255, 255))
+        from PIL import ImageDraw, ImageFont
+        d = ImageDraw.Draw(img)
+        # Use default font
+        d.text((50, 50), "Document Chat Demo", fill=(0, 0, 0))
+        d.text((50, 100), "This is a sample image for testing the document chat system.", fill=(0, 0, 0))
+        d.text((50, 150), "The system should be able to answer questions about this text.", fill=(0, 0, 0))
+        d.text((50, 200), "It can also describe visual elements in the image.", fill=(0, 0, 0))
+        # Draw some shapes
+        d.rectangle([(50, 250), (200, 300)], outline=(255, 0, 0))
+        d.ellipse([(300, 250), (450, 300)], outline=(0, 255, 0))
+        d.polygon([(500, 250), (550, 300), (600, 250)], outline=(0, 0, 255))
+        img.save(os.path.join(SAMPLE_DIR, 'sample_image.png'))
+    except:
+        pass
+# Create sample documents when starting
+create_sample_documents()
+# Start Gradio server
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt CHANGED Viewed

@@ -1,5 +1,14 @@
-fastapi==0.110.0
-uvicorn[standard]==0.29.0
-requests==2.31.0
-python-dotenv==1.0.1
-pillow==10.2.0

+torch
+transformers>=4.31.0
+accelerate>=0.21.0
+bitsandbytes>=0.41.0
+sentencepiece
+gradio
+Pillow
+opencv-python
+pytesseract
+pdf2image
+python-docx
+openpyxl
+python-pptx
+llava-torch

sample_documents/test_image.jpg ADDED Viewed

Git LFS Details

SHA256: 8a56ccfc341865af4ec1c2d836e52e71dcd959e41a8522f60bfcc3ff4e99d388
Pointer size: 131 Bytes
Size of remote file: 107 kB