Tanish28 commited on
Commit
8c1fd84
·
verified ·
1 Parent(s): b8f7920

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -60
app.py CHANGED
@@ -1,83 +1,178 @@
1
- import gradio as gr
2
  import os
 
3
  from pdf2image import convert_from_path
4
- from anthropic import Anthropic
5
  import base64
6
- import io
 
7
 
8
- # Initialize Anthropic client with API key
9
- ANTHROPIC_API_KEY = "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" # Replace with your Anthropic API key
10
- client = Anthropic(api_key=ANTHROPIC_API_KEY)
11
 
12
- def extract_text_from_pdf(pdf_path):
13
- """Extract all text from PDF pages using Claude"""
14
- try:
15
- print(f"Processing PDF...")
16
-
17
- # Convert PDF to images
18
- images = convert_from_path(pdf_path)
19
-
20
- extracted_texts = []
21
- for i, image in enumerate(images):
22
- print(f"Processing page {i+1} of {len(images)}...")
 
 
 
 
 
 
 
 
 
23
 
24
- # Convert image to base64
25
  img_buffer = io.BytesIO()
26
  image.save(img_buffer, format='PNG')
27
  img_base64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
28
 
29
- # Use Claude to extract text
30
- message = client.messages.create(
31
- model="claude-3-opus-20240229",
32
- max_tokens=4000,
33
- temperature=0,
34
- system="Extract ALL text from this image exactly as it appears, preserving all formatting, numbers, and special characters. Include everything you can see, from headers to footers, timestamps to footnotes.",
35
- messages=[{
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  "role": "user",
37
  "content": [
38
  {
39
  "type": "text",
40
- "text": "Please extract and transcribe ALL text visible in this image, exactly as it appears. Include every piece of text you can see, maintaining the exact formatting, spacing, and line breaks."
41
  },
42
  {
43
- "type": "image",
44
- "source": {
45
- "type": "base64",
46
- "media_type": "image/png",
47
- "data": img_base64
48
  }
49
  }
50
  ]
51
- }]
52
- )
53
-
54
- extracted_texts.append(f"\n=== Page {i + 1} ===\n\n{message.content[0].text}")
55
 
56
- return "\n".join(extracted_texts)
57
-
58
- except Exception as e:
59
- return f"Error in text extraction: {str(e)}"
60
-
61
- def process_pdf(pdf_file):
62
- if pdf_file is None:
63
- return "Please upload a PDF file."
64
 
65
- try:
66
- # In Spaces, pdf_file is the file path
67
- extracted_text = extract_text_from_pdf(pdf_file)
68
- return extracted_text
69
  except Exception as e:
70
- return f"Error processing PDF: {str(e)}"
71
-
72
- # Create Gradio interface
73
- demo = gr.Interface(
74
- fn=process_pdf,
75
- inputs=gr.File(label="Upload PDF", file_types=[".pdf"]),
76
- outputs=gr.Textbox(label="Extracted Text", lines=25),
77
- title="PDF Text Extractor (Powered by Claude)",
78
- description="Upload a PDF file to extract all text using Claude's Vision capabilities. Please note that processing may take a few minutes depending on the number of pages.",
79
- theme=gr.themes.Soft()
80
- )
81
 
82
- # Launch the app
83
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import io
3
  from pdf2image import convert_from_path
4
+ from openai import OpenAI
5
  import base64
6
+ import asyncio
7
+ import gradio as gr
8
 
9
+ # We'll use an environment variable for the API key in Spaces
10
+ OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
 
11
 
12
+ class PDFFormExtractor:
13
+ def __init__(self, api_key):
14
+ self.client = OpenAI(api_key=api_key)
15
+
16
+ async def extract_first_form(self, pdf_path):
17
+ try:
18
+ if not os.path.exists(pdf_path):
19
+ raise FileNotFoundError(f"PDF file not found: {pdf_path}")
20
+
21
+ print(f"Processing PDF: {pdf_path}")
22
+
23
+ # Only convert the first page
24
+ images = convert_from_path(pdf_path, first_page=1, last_page=1)
25
+
26
+ if not images:
27
+ return "No pages found in the PDF."
28
+
29
+ # Process only the first page
30
+ image = images[0]
31
+ print("Processing first page...")
32
 
 
33
  img_buffer = io.BytesIO()
34
  image.save(img_buffer, format='PNG')
35
  img_base64 = base64.b64encode(img_buffer.getvalue()).decode('utf-8')
36
 
37
+ # Custom prompt specifically for extracting the Patient Admission Form
38
+ prompt = """
39
+ You are an expert in document processing and OCR with deep knowledge of Markdown formatting. Your task is to extract the structured content from a PDF page image and convert it into a clean Markdown format.
40
+
41
+ Extract the Patient Admission Form (KMHIPF002V3) from this image and format it as follows:
42
+
43
+ ```markdown
44
+ # PATIENT ADMISSION FORM
45
+ ## DR.KAMAKSHI MEMORIAL HOSPITAL, PALLIKARANAI, CHENNAI.
46
+
47
+ ### PATIENT PROFILE
48
+ *Please paste the sticker within the box*
49
+
50
+ * UHID: ______
51
+ * Patient Name: ______
52
+ * Age/Gender: ______
53
+ * Doctor Name: ______
54
+
55
+ ### BASIC INFORMATION
56
+ * Date & Time of Admission: [DD/MM/YYYY]
57
+ * Date of Birth: [DD/MM/YYYY]
58
+
59
+ ### IDENTIFICATION
60
+ **ID Proof Already Registered**: □ Yes □ No
61
+
62
+ **Type of ID**:
63
+ * □ Aadhar
64
+ * □ Passport
65
+ * □ Voter ID
66
+ * □ Driving License
67
+ * □ Others
68
+
69
+ ID No.: ________________
70
+ Contact No.: ________________
71
+
72
+ ### MEDICAL DETAILS
73
+ * Provisional Diagnosis:
74
+ * Reason for Admission:
75
+ * Plan of Care:
76
+ * Expected Outcome*:
77
+
78
+ ### CONSULTANT DETAILS
79
+ * Primary Consultant Name:
80
+ * Speciality:
81
+
82
+ ### PATIENT REFERENCE INFORMATION
83
+ *(To be filled by Front Office)*
84
+
85
+ **Reference Via**:
86
+ * □ Doctor
87
+ * □ Hospital
88
+ * □ Ambulance
89
+ * □ DRKMH Employee
90
+ * □ Self / Walk In
91
+
92
+ **Referrer Details**:
93
+ * Name: ________________
94
+ * Contact No.: ________________
95
+
96
+ ### TYPE OF ADMISSION
97
+ * □ Emergency
98
+ * □ Elective
99
+ * □ MLC
100
+ * □ Surgery
101
+ * □ Medical
102
+ * □ Others: ________________
103
+
104
+ ### TREATMENT TYPE
105
+ * □ In Patient
106
+ * □ Day Care
107
+ * Transfer To: ________________
108
+
109
+ ### CONTACT DETAILS
110
+ **Person to Contact (Next of Kin)**:
111
+ * Name: ________________
112
+ * Relationship with Patient: ________________
113
+ * Address: ________________
114
+ * Pincode: ________________
115
+ * Mobile: ________________
116
+ * Email: ________________
117
+
118
+ ### OFFICIAL USE
119
+ * Front Office Executive Name:
120
+ * Front Office Executive Signature:
121
+ * Advance Amount Paid:
122
+ * ICD-10 Code (For Medical Records Section):
123
+
124
+ *\\* Subject to change during the course of diseases*
125
+
126
+ ---
127
+ *Form No: KMHIPF002V3* """
128
+
129
+ response = self.client.chat.completions.create(
130
+ model="gpt-4o",
131
+ messages=[
132
+ {
133
+ "role": "system",
134
+ "content": prompt
135
+ },
136
+ {
137
  "role": "user",
138
  "content": [
139
  {
140
  "type": "text",
141
+ "text": "Extract and format the Patient Admission Form from this image according to the specified markdown format."
142
  },
143
  {
144
+ "type": "image_url",
145
+ "image_url": {
146
+ "url": f"data:image/png;base64,{img_base64}"
 
 
147
  }
148
  }
149
  ]
150
+ }
151
+ ],
152
+ max_tokens=4096
153
+ )
154
 
155
+ return response.choices[0].message.content
 
 
 
 
 
 
 
156
 
 
 
 
 
157
  except Exception as e:
158
+ print(f"Error in form extraction: {str(e)}")
159
+ return f"Error: {str(e)}"
160
+
161
+ def extract_first_form(pdf_file):
162
+ if OPENAI_API_KEY is None:
163
+ return "Error: OpenAI API key not found. Please set the OPENAI_API_KEY environment variable."
 
 
 
 
 
164
 
165
+ extractor = PDFFormExtractor(OPENAI_API_KEY)
166
+
167
+ pdf_path = pdf_file.name
168
+ result = asyncio.run(extractor.extract_first_form(pdf_path))
169
+ return result
170
+
171
+ iface = gr.Interface(
172
+ fn=extract_first_form,
173
+ inputs=gr.File(label="Upload PDF with Patient Admission Form"),
174
+ outputs=gr.Textbox(label="Extracted Form in Markdown", lines=30),
175
+ title="Patient Admission Form Extractor",
176
+ description="Upload a PDF file to extract the Patient Admission Form (first form) in markdown format."
177
+ )
178
+ iface.launch()