Tanish28 commited on
Commit
573a310
·
verified ·
1 Parent(s): 94a3ea6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from pdf2image import convert_from_path
4
+ from openai import OpenAI
5
+ import base64
6
+ import io
7
+ import tempfile
8
+
9
+ # Initialize OpenAI client with API key
10
+ OPENAI_API_KEY = "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" # Replace with your actual OpenAI API key
11
+ client = OpenAI(api_key=OPENAI_API_KEY)
12
+
13
+ def extract_text_from_pdf(pdf_file):
14
+ """Extract all text from PDF pages using OpenAI's GPT-4 Vision"""
15
+ try:
16
+ # Save uploaded file to temporary location
17
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
18
+ tmp_file.write(pdf_file)
19
+ pdf_path = tmp_file.name
20
+
21
+ print(f"Processing PDF...")
22
+
23
+ # Convert PDF to images
24
+ images = convert_from_path(pdf_path)
25
+
26
+ # Clean up temporary file
27
+ os.unlink(pdf_path)
28
+
29
+ extracted_texts = []
30
+ for i, image in enumerate(images):
31
+ print(f"Processing page {i+1} of {len(images)}...")
32
+
33
+ # Convert image to base64
34
+ img_buffer = io.BytesIO()
35
+ image.save(img_buffer, format='PNG')
36
+ img_base64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
37
+
38
+ # Use OpenAI to extract text
39
+ response = client.chat.completions.create(
40
+ model="gpt-4-vision-preview",
41
+ messages=[
42
+ {
43
+ "role": "system",
44
+ "content": "Extract ALL text from this image exactly as it appears, preserving all formatting, numbers, and special characters. Include everything you can see, from headers to footers, timestamps to footnotes."
45
+ },
46
+ {
47
+ "role": "user",
48
+ "content": [
49
+ {
50
+ "type": "text",
51
+ "text": "Please extract and transcribe ALL text visible in this image, exactly as it appears."
52
+ },
53
+ {
54
+ "type": "image_url",
55
+ "image_url": {
56
+ "url": f"data:image/png;base64,{img_base64}"
57
+ }
58
+ }
59
+ ]
60
+ }
61
+ ],
62
+ max_tokens=4096
63
+ )
64
+
65
+ extracted_texts.append(f"\n=== Page {i + 1} ===\n\n{response.choices[0].message.content}")
66
+
67
+ return "\n".join(extracted_texts)
68
+
69
+ except Exception as e:
70
+ return f"Error in text extraction: {str(e)}"
71
+
72
+ def process_pdf(pdf_file):
73
+ if pdf_file is None:
74
+ return "Please upload a PDF file."
75
+
76
+ try:
77
+ # Read the uploaded file
78
+ file_content = pdf_file.read()
79
+
80
+ # Extract text
81
+ extracted_text = extract_text_from_pdf(file_content)
82
+
83
+ return extracted_text
84
+ except Exception as e:
85
+ return f"Error processing PDF: {str(e)}"
86
+
87
+ # Create Gradio interface
88
+ demo = gr.Interface(
89
+ fn=process_pdf,
90
+ inputs=gr.File(label="Upload PDF", file_types=[".pdf"]),
91
+ outputs=gr.Textbox(label="Extracted Text", lines=25),
92
+ title="PDF Text Extractor (GPT-4 Vision)",
93
+ description="Upload a PDF file to extract all text using GPT-4 Vision. Please note that processing may take a few minutes depending on the number of pages.",
94
+ theme=gr.themes.Soft()
95
+ )
96
+
97
+ # Launch with share=True to get a public link
98
+ demo.launch(share=True)