Spaces:

Tanish28
/

New_Space

Sleeping

App Files Files Community

Tanish28 commited on Feb 19

Commit

573a310

verified ·

1 Parent(s): 94a3ea6

Create app.py

Browse files

Files changed (1) hide show

app.py +98 -0

app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import gradio as gr
+import os
+from pdf2image import convert_from_path
+from openai import OpenAI
+import base64
+import io
+import tempfile
+# Initialize OpenAI client with API key
+OPENAI_API_KEY = "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"  # Replace with your actual OpenAI API key
+client = OpenAI(api_key=OPENAI_API_KEY)
+def extract_text_from_pdf(pdf_file):
+    """Extract all text from PDF pages using OpenAI's GPT-4 Vision"""
+    try:
+        # Save uploaded file to temporary location
+        with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
+            tmp_file.write(pdf_file)
+            pdf_path = tmp_file.name
+        print(f"Processing PDF...")
+        # Convert PDF to images
+        images = convert_from_path(pdf_path)
+        # Clean up temporary file
+        os.unlink(pdf_path)
+        extracted_texts = []
+        for i, image in enumerate(images):
+            print(f"Processing page {i+1} of {len(images)}...")
+            # Convert image to base64
+            img_buffer = io.BytesIO()
+            image.save(img_buffer, format='PNG')
+            img_base64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
+            # Use OpenAI to extract text
+            response = client.chat.completions.create(
+                model="gpt-4-vision-preview",
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "Extract ALL text from this image exactly as it appears, preserving all formatting, numbers, and special characters. Include everything you can see, from headers to footers, timestamps to footnotes."
+                    },
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": "Please extract and transcribe ALL text visible in this image, exactly as it appears."
+                            },
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:image/png;base64,{img_base64}"
+                                }
+                            }
+                        ]
+                    }
+                ],
+                max_tokens=4096
+            )
+            extracted_texts.append(f"\n=== Page {i + 1} ===\n\n{response.choices[0].message.content}")
+        return "\n".join(extracted_texts)
+    except Exception as e:
+        return f"Error in text extraction: {str(e)}"
+def process_pdf(pdf_file):
+    if pdf_file is None:
+        return "Please upload a PDF file."
+    try:
+        # Read the uploaded file
+        file_content = pdf_file.read()
+        # Extract text
+        extracted_text = extract_text_from_pdf(file_content)
+        return extracted_text
+    except Exception as e:
+        return f"Error processing PDF: {str(e)}"
+# Create Gradio interface
+demo = gr.Interface(
+    fn=process_pdf,
+    inputs=gr.File(label="Upload PDF", file_types=[".pdf"]),
+    outputs=gr.Textbox(label="Extracted Text", lines=25),
+    title="PDF Text Extractor (GPT-4 Vision)",
+    description="Upload a PDF file to extract all text using GPT-4 Vision. Please note that processing may take a few minutes depending on the number of pages.",
+    theme=gr.themes.Soft()
+)
+# Launch with share=True to get a public link
+demo.launch(share=True)