apjanco commited on
Commit
81db712
·
1 Parent(s): aac5be6

not quite working

Browse files
Files changed (1) hide show
  1. app.py +8 -135
app.py CHANGED
@@ -59,7 +59,7 @@ assistant_prompt = '<|assistant|>\n'
59
  prompt_suffix = "<|end|>\n"
60
 
61
  @spaces.GPU
62
- def run_example(image:str, model_id:str = "nanonets/Nanonets-OCR-s", prompt: str = """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Return the equations in LaTeX representation. If there is an image in the document and image caption is not present, add a small description of the image inside the <img></img> tag; otherwise, add the image caption inside <img></img>. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using ☐ and ☑ for check boxes."""):
63
 
64
  image_path = array_to_image_path(image)
65
 
@@ -107,71 +107,9 @@ def run_example(image:str, model_id:str = "nanonets/Nanonets-OCR-s", prompt: str
107
 
108
  return ocr_text, ocr_text # Return twice: once for display, once for state
109
 
110
- css = """
111
- /* Overall app styling */
112
- .gradio-container {
113
- max-width: 1200px !important;
114
- margin: 0 auto;
115
- padding: 20px;
116
- background-color: #f8f9fa;
117
- }
118
- /* Tabs styling */
119
- .tabs {
120
- border-radius: 8px;
121
- background: white;
122
- padding: 20px;
123
- box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);
124
- }
125
- /* Input/Output containers */
126
- .input-container, .output-container {
127
- background: white;
128
- border-radius: 8px;
129
- padding: 15px;
130
- margin: 10px 0;
131
- box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
132
- }
133
- /* Button styling */
134
- .submit-btn {
135
- background-color: #2d31fa !important;
136
- border: none !important;
137
- padding: 8px 20px !important;
138
- border-radius: 6px !important;
139
- color: white !important;
140
- transition: all 0.3s ease !important;
141
- }
142
-
143
- .submit-btn:hover {
144
- background-color: #1f24c7 !important;
145
- transform: translateY(-1px);
146
- }
147
- /* Output text area */
148
- #output {
149
- height: 500px;
150
- overflow: auto;
151
- border: 1px solid #e0e0e0;
152
- border-radius: 6px;
153
- padding: 15px;
154
- background: #ffffff;
155
- font-family: 'Arial', sans-serif;
156
- }
157
- /* Dropdown styling */
158
- .gr-dropdown {
159
- border-radius: 6px !important;
160
- border: 1px solid #e0e0e0 !important;
161
- }
162
- /* Image upload area */
163
- .gr-image-input {
164
- border: 2px dashed #ccc;
165
- border-radius: 8px;
166
- padding: 20px;
167
- transition: all 0.3s ease;
168
- }
169
- .gr-image-input:hover {
170
- border-color: #2d31fa;
171
- }
172
- """
173
 
174
- with gr.Blocks(css=css) as demo:
 
175
  # Add state variables to store OCR results
176
  ocr_state = gr.State()
177
 
@@ -180,8 +118,8 @@ with gr.Blocks(css=css) as demo:
180
  with gr.Column(elem_classes="input-container"):
181
  input_img = gr.Image(label="Input Picture", elem_classes="gr-image-input")
182
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2.5-VL-7B-Instruct", elem_classes="gr-dropdown")
183
-
184
-
185
  submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
186
  with gr.Column(elem_classes="output-container"):
187
  output_text = gr.HighlightedText(label="Output Text", elem_id="output")
@@ -191,75 +129,10 @@ with gr.Blocks(css=css) as demo:
191
  # Modify the submit button click handler to update state
192
  submit_btn.click(
193
  run_example,
194
- inputs=[input_img, model_selector],
195
  outputs=[output_text, ocr_state] # Add ocr_state to outputs
196
  )
197
- with gr.Row():
198
- filename = gr.Textbox(label="Save filename (without extension)", placeholder="Enter filename to save")
199
- download_btn = gr.Button("Download Image & Text", elem_classes="submit-btn")
200
- download_output = gr.File(label="Download")
201
-
202
- # Modify create_zip to use the state data
203
- def create_zip(image, fname, ocr_result):
204
- # Validate inputs
205
- if not fname or image is None: # Changed the validation check
206
- return None
207
-
208
- try:
209
- # Convert numpy array to PIL Image if needed
210
- if isinstance(image, np.ndarray):
211
- image = Image.fromarray(image)
212
- elif not isinstance(image, Image.Image):
213
- return None
214
-
215
- with tempfile.TemporaryDirectory() as temp_dir:
216
- # Save image
217
- img_path = os.path.join(temp_dir, f"{fname}.png")
218
- image.save(img_path)
219
-
220
- # Use the OCR result from state
221
- original_text = ocr_result.original_text if ocr_result else ""
222
- entities = ocr_result.entities if ocr_result else []
223
-
224
- # Save text
225
- txt_path = os.path.join(temp_dir, f"{fname}.txt")
226
- with open(txt_path, 'w', encoding='utf-8') as f:
227
- f.write(original_text)
228
-
229
- # Create JSON with text and entities
230
- json_data = {
231
- "text": original_text,
232
- "entities": entities,
233
- "image_file": f"{fname}.png"
234
- }
235
-
236
- # Save JSON
237
- json_path = os.path.join(temp_dir, f"{fname}.json")
238
- with open(json_path, 'w', encoding='utf-8') as f:
239
- json.dump(json_data, f, indent=2, ensure_ascii=False)
240
-
241
- # Create zip file
242
- output_dir = "downloads"
243
- os.makedirs(output_dir, exist_ok=True)
244
- zip_path = os.path.join(output_dir, f"{fname}.zip")
245
-
246
- with zipfile.ZipFile(zip_path, 'w') as zipf:
247
- zipf.write(img_path, os.path.basename(img_path))
248
- zipf.write(txt_path, os.path.basename(txt_path))
249
- zipf.write(json_path, os.path.basename(json_path))
250
-
251
- return zip_path
252
-
253
- except Exception as e:
254
- print(f"Error creating zip: {str(e)}")
255
- return None
256
-
257
- # Update the download button click handler to include state
258
- download_btn.click(
259
- create_zip,
260
- inputs=[input_img, filename, ocr_state],
261
- outputs=[download_output]
262
- )
263
-
264
  demo.queue(api_open=False)
265
  demo.launch(debug=True)
 
59
  prompt_suffix = "<|end|>\n"
60
 
61
  @spaces.GPU
62
+ def run_example(image, model_id= "nanonets/Nanonets-OCR-s", prompt= """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Return the equations in LaTeX representation. If there is an image in the document and image caption is not present, add a small description of the image inside the <img></img> tag; otherwise, add the image caption inside <img></img>. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using ☐ and ☑ for check boxes."""):
63
 
64
  image_path = array_to_image_path(image)
65
 
 
107
 
108
  return ocr_text, ocr_text # Return twice: once for display, once for state
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+
112
+ with gr.Blocks() as demo:
113
  # Add state variables to store OCR results
114
  ocr_state = gr.State()
115
 
 
118
  with gr.Column(elem_classes="input-container"):
119
  input_img = gr.Image(label="Input Picture", elem_classes="gr-image-input")
120
  model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2.5-VL-7B-Instruct", elem_classes="gr-dropdown")
121
+ prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...", elem_classes="gr-textbox")
122
+
123
  submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
124
  with gr.Column(elem_classes="output-container"):
125
  output_text = gr.HighlightedText(label="Output Text", elem_id="output")
 
129
  # Modify the submit button click handler to update state
130
  submit_btn.click(
131
  run_example,
132
+ inputs=[input_img, model_selector,prompt],
133
  outputs=[output_text, ocr_state] # Add ocr_state to outputs
134
  )
135
+
136
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  demo.queue(api_open=False)
138
  demo.launch(debug=True)