Tanish28 commited on
Commit
6d8c6b0
·
verified ·
1 Parent(s): 011d7c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -62
app.py CHANGED
@@ -4,42 +4,42 @@ from pdf2image import convert_from_path
4
  from openai import OpenAI
5
  import base64
6
  import asyncio
 
7
  import gradio as gr
8
 
9
- # Get the API key from environment variables
10
  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
11
 
12
- class PDFFormExtractor:
13
  def __init__(self, api_key):
14
  self.client = OpenAI(api_key=api_key)
15
-
16
- async def extract_first_form(self, pdf_path):
17
  try:
18
  if not os.path.exists(pdf_path):
19
  raise FileNotFoundError(f"PDF file not found: {pdf_path}")
20
 
21
  print(f"Processing PDF: {pdf_path}")
22
 
23
- # Convert only the first page to an image
24
- images = convert_from_path(pdf_path, first_page=1, last_page=1)
25
-
26
- if not images:
27
- return "No pages found in the PDF."
28
-
29
- image = images[0]
30
- print("Processing first page...")
31
 
32
- img_buffer = io.BytesIO()
33
- image.save(img_buffer, format='PNG')
34
- img_base64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
35
-
36
- # Custom prompt specifically for extracting the Patient Admission Form
37
- prompt = """
38
- You are an expert in document processing and OCR with deep knowledge of Markdown formatting. Your task is to extract the structured content from a PDF page image and convert it into a clean Markdown format.
39
-
40
- Extract the Patient Admission Form (KMHIPF002V3) from this image and format it as follows:
 
 
 
 
 
 
 
41
 
42
- ```markdown
43
  # PATIENT ADMISSION FORM
44
  ## DR.KAMAKSHI MEMORIAL HOSPITAL, PALLIKARANAI, CHENNAI.
45
 
@@ -123,54 +123,55 @@ Contact No.: ________________
123
  *\\* Subject to change during the course of diseases*
124
 
125
  ---
126
- *Form No: KMHIPF002V3*
127
- """
128
-
129
- response = self.client.chat.completions.create(
130
- model="gpt-4o",
131
- messages=[
132
- {"role": "system", "content": prompt},
 
 
 
133
  {
134
- "role": "user",
135
- "content": [
136
- {
137
- "type": "text",
138
- "text": "Extract and format the Patient Admission Form from this image according to the specified markdown format."
139
- },
140
- {
141
- "type": "image_url",
142
- "image_url": {"url": f"data:image/png;base64,{img_base64}"}
143
- }
144
- ]
145
  }
146
- ],
147
- max_tokens=4096
148
- )
149
-
150
- result = response.choices[0].message["content"]
151
- return result
152
-
153
- except Exception as e:
154
- print(f"Error in form extraction: {str(e)}")
155
- return f"Error: {str(e)}"
156
 
 
157
 
158
- def extract_first_form(pdf_file):
159
  if OPENAI_API_KEY is None:
160
  return "Error: OpenAI API key not found. Please set the OPENAI_API_KEY environment variable."
161
 
162
- extractor = PDFFormExtractor(OPENAI_API_KEY)
 
 
 
 
 
 
 
 
 
163
 
164
- pdf_path = pdf_file.name
165
- result = asyncio.run(extractor.extract_first_form(pdf_path))
166
- return result
167
-
168
 
169
  iface = gr.Interface(
170
- fn=extract_first_form,
171
- inputs=gr.File(label="Upload PDF with Patient Admission Form"),
172
- outputs=gr.Textbox(label="Extracted Form in Markdown", lines=30),
173
- title="Patient Admission Form Extractor",
174
- description="Upload a PDF file to extract the Patient Admission Form (first form) in markdown format."
175
  )
176
- iface.launch()
 
 
4
  from openai import OpenAI
5
  import base64
6
  import asyncio
7
+ from datetime import datetime
8
  import gradio as gr
9
 
10
+ # We'll use an environment variable for the API key in Spaces
11
  OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
12
 
13
+ class PDFTextExtractor:
14
  def __init__(self, api_key):
15
  self.client = OpenAI(api_key=api_key)
16
+
17
+ async def extract_text_from_pdf(self, pdf_path):
18
  try:
19
  if not os.path.exists(pdf_path):
20
  raise FileNotFoundError(f"PDF file not found: {pdf_path}")
21
 
22
  print(f"Processing PDF: {pdf_path}")
23
 
24
+ images = convert_from_path(pdf_path)
 
 
 
 
 
 
 
25
 
26
+ extracted_texts = []
27
+ for i, image in enumerate(images):
28
+ print(f"Processing page {i+1}...")
29
+
30
+ img_buffer = io.BytesIO()
31
+ image.save(img_buffer, format='PNG')
32
+ img_base64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
33
+
34
+ response = self.client.chat.completions.create(
35
+ model="gpt-4o",
36
+ messages=[
37
+ {
38
+ "role": "system",
39
+ "content": """You are an expert in document processing and OCR with deep knowledge of Markdown formatting.
40
+ Extract the Patient Admission Form (KMHIPF002V3) from this image and format it in clean markdown.
41
+ Follow this exact markdown structure:
42
 
 
43
  # PATIENT ADMISSION FORM
44
  ## DR.KAMAKSHI MEMORIAL HOSPITAL, PALLIKARANAI, CHENNAI.
45
 
 
123
  *\\* Subject to change during the course of diseases*
124
 
125
  ---
126
+ *Form No: KMHIPF002V3*
127
+ """
128
+ },
129
+ {
130
+ "role": "user",
131
+ "content": [
132
+ {
133
+ "type": "text",
134
+ "text": "Extract and format the Patient Admission Form from this image according to the specified markdown format. Preserve all form fields and checkboxes (as □)."
135
+ },
136
  {
137
+ "type": "image_url",
138
+ "image_url": {
139
+ "url": f"data:image/png;base64,{img_base64}"
140
+ }
 
 
 
 
 
 
 
141
  }
142
+ ]
143
+ }
144
+ ],
145
+ max_tokens=4096
146
+ )
 
 
 
 
 
147
 
148
+ return response.choices[0].message.content
149
 
150
+ def extract_text(pdf_file):
151
  if OPENAI_API_KEY is None:
152
  return "Error: OpenAI API key not found. Please set the OPENAI_API_KEY environment variable."
153
 
154
+ extractor = PDFTextExtractor(OPENAI_API_KEY)
155
+
156
+ pdf_path = pdf_file.name
157
+ extracted_texts = asyncio.run(extractor.extract_text_from_pdf(pdf_path))
158
+
159
+ if extracted_texts:
160
+ output = ""
161
+ for page in extracted_texts:
162
+ output += f"\n\n=== Page {page['page']} ===\n\n"
163
+ output += page['text']
164
 
165
+ return output
166
+ else:
167
+ return "Failed to extract text from PDF"
 
168
 
169
  iface = gr.Interface(
170
+ fn=extract_text,
171
+ inputs=gr.File(label="Upload PDF"),
172
+ outputs="text",
173
+ title="PDF Text Extractor",
174
+ description="Upload a PDF file to extract all text using OpenAI's GPT-4 Vision."
175
  )
176
+
177
+ iface.launch()