Spaces:
Sleeping
Sleeping
not quite working
Browse files
app.py
CHANGED
@@ -59,7 +59,7 @@ assistant_prompt = '<|assistant|>\n'
|
|
59 |
prompt_suffix = "<|end|>\n"
|
60 |
|
61 |
@spaces.GPU
|
62 |
-
def run_example(image
|
63 |
|
64 |
image_path = array_to_image_path(image)
|
65 |
|
@@ -107,71 +107,9 @@ def run_example(image:str, model_id:str = "nanonets/Nanonets-OCR-s", prompt: str
|
|
107 |
|
108 |
return ocr_text, ocr_text # Return twice: once for display, once for state
|
109 |
|
110 |
-
css = """
|
111 |
-
/* Overall app styling */
|
112 |
-
.gradio-container {
|
113 |
-
max-width: 1200px !important;
|
114 |
-
margin: 0 auto;
|
115 |
-
padding: 20px;
|
116 |
-
background-color: #f8f9fa;
|
117 |
-
}
|
118 |
-
/* Tabs styling */
|
119 |
-
.tabs {
|
120 |
-
border-radius: 8px;
|
121 |
-
background: white;
|
122 |
-
padding: 20px;
|
123 |
-
box-shadow: 0 2px 6px rgba(0, 0, 0, 0.1);
|
124 |
-
}
|
125 |
-
/* Input/Output containers */
|
126 |
-
.input-container, .output-container {
|
127 |
-
background: white;
|
128 |
-
border-radius: 8px;
|
129 |
-
padding: 15px;
|
130 |
-
margin: 10px 0;
|
131 |
-
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
|
132 |
-
}
|
133 |
-
/* Button styling */
|
134 |
-
.submit-btn {
|
135 |
-
background-color: #2d31fa !important;
|
136 |
-
border: none !important;
|
137 |
-
padding: 8px 20px !important;
|
138 |
-
border-radius: 6px !important;
|
139 |
-
color: white !important;
|
140 |
-
transition: all 0.3s ease !important;
|
141 |
-
}
|
142 |
-
|
143 |
-
.submit-btn:hover {
|
144 |
-
background-color: #1f24c7 !important;
|
145 |
-
transform: translateY(-1px);
|
146 |
-
}
|
147 |
-
/* Output text area */
|
148 |
-
#output {
|
149 |
-
height: 500px;
|
150 |
-
overflow: auto;
|
151 |
-
border: 1px solid #e0e0e0;
|
152 |
-
border-radius: 6px;
|
153 |
-
padding: 15px;
|
154 |
-
background: #ffffff;
|
155 |
-
font-family: 'Arial', sans-serif;
|
156 |
-
}
|
157 |
-
/* Dropdown styling */
|
158 |
-
.gr-dropdown {
|
159 |
-
border-radius: 6px !important;
|
160 |
-
border: 1px solid #e0e0e0 !important;
|
161 |
-
}
|
162 |
-
/* Image upload area */
|
163 |
-
.gr-image-input {
|
164 |
-
border: 2px dashed #ccc;
|
165 |
-
border-radius: 8px;
|
166 |
-
padding: 20px;
|
167 |
-
transition: all 0.3s ease;
|
168 |
-
}
|
169 |
-
.gr-image-input:hover {
|
170 |
-
border-color: #2d31fa;
|
171 |
-
}
|
172 |
-
"""
|
173 |
|
174 |
-
|
|
|
175 |
# Add state variables to store OCR results
|
176 |
ocr_state = gr.State()
|
177 |
|
@@ -180,8 +118,8 @@ with gr.Blocks(css=css) as demo:
|
|
180 |
with gr.Column(elem_classes="input-container"):
|
181 |
input_img = gr.Image(label="Input Picture", elem_classes="gr-image-input")
|
182 |
model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2.5-VL-7B-Instruct", elem_classes="gr-dropdown")
|
183 |
-
|
184 |
-
|
185 |
submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
|
186 |
with gr.Column(elem_classes="output-container"):
|
187 |
output_text = gr.HighlightedText(label="Output Text", elem_id="output")
|
@@ -191,75 +129,10 @@ with gr.Blocks(css=css) as demo:
|
|
191 |
# Modify the submit button click handler to update state
|
192 |
submit_btn.click(
|
193 |
run_example,
|
194 |
-
inputs=[input_img, model_selector],
|
195 |
outputs=[output_text, ocr_state] # Add ocr_state to outputs
|
196 |
)
|
197 |
-
|
198 |
-
|
199 |
-
download_btn = gr.Button("Download Image & Text", elem_classes="submit-btn")
|
200 |
-
download_output = gr.File(label="Download")
|
201 |
-
|
202 |
-
# Modify create_zip to use the state data
|
203 |
-
def create_zip(image, fname, ocr_result):
|
204 |
-
# Validate inputs
|
205 |
-
if not fname or image is None: # Changed the validation check
|
206 |
-
return None
|
207 |
-
|
208 |
-
try:
|
209 |
-
# Convert numpy array to PIL Image if needed
|
210 |
-
if isinstance(image, np.ndarray):
|
211 |
-
image = Image.fromarray(image)
|
212 |
-
elif not isinstance(image, Image.Image):
|
213 |
-
return None
|
214 |
-
|
215 |
-
with tempfile.TemporaryDirectory() as temp_dir:
|
216 |
-
# Save image
|
217 |
-
img_path = os.path.join(temp_dir, f"{fname}.png")
|
218 |
-
image.save(img_path)
|
219 |
-
|
220 |
-
# Use the OCR result from state
|
221 |
-
original_text = ocr_result.original_text if ocr_result else ""
|
222 |
-
entities = ocr_result.entities if ocr_result else []
|
223 |
-
|
224 |
-
# Save text
|
225 |
-
txt_path = os.path.join(temp_dir, f"{fname}.txt")
|
226 |
-
with open(txt_path, 'w', encoding='utf-8') as f:
|
227 |
-
f.write(original_text)
|
228 |
-
|
229 |
-
# Create JSON with text and entities
|
230 |
-
json_data = {
|
231 |
-
"text": original_text,
|
232 |
-
"entities": entities,
|
233 |
-
"image_file": f"{fname}.png"
|
234 |
-
}
|
235 |
-
|
236 |
-
# Save JSON
|
237 |
-
json_path = os.path.join(temp_dir, f"{fname}.json")
|
238 |
-
with open(json_path, 'w', encoding='utf-8') as f:
|
239 |
-
json.dump(json_data, f, indent=2, ensure_ascii=False)
|
240 |
-
|
241 |
-
# Create zip file
|
242 |
-
output_dir = "downloads"
|
243 |
-
os.makedirs(output_dir, exist_ok=True)
|
244 |
-
zip_path = os.path.join(output_dir, f"{fname}.zip")
|
245 |
-
|
246 |
-
with zipfile.ZipFile(zip_path, 'w') as zipf:
|
247 |
-
zipf.write(img_path, os.path.basename(img_path))
|
248 |
-
zipf.write(txt_path, os.path.basename(txt_path))
|
249 |
-
zipf.write(json_path, os.path.basename(json_path))
|
250 |
-
|
251 |
-
return zip_path
|
252 |
-
|
253 |
-
except Exception as e:
|
254 |
-
print(f"Error creating zip: {str(e)}")
|
255 |
-
return None
|
256 |
-
|
257 |
-
# Update the download button click handler to include state
|
258 |
-
download_btn.click(
|
259 |
-
create_zip,
|
260 |
-
inputs=[input_img, filename, ocr_state],
|
261 |
-
outputs=[download_output]
|
262 |
-
)
|
263 |
-
|
264 |
demo.queue(api_open=False)
|
265 |
demo.launch(debug=True)
|
|
|
59 |
prompt_suffix = "<|end|>\n"
|
60 |
|
61 |
@spaces.GPU
|
62 |
+
def run_example(image, model_id= "nanonets/Nanonets-OCR-s", prompt= """Extract the text from the above document as if you were reading it naturally. Return the tables in html format. Return the equations in LaTeX representation. If there is an image in the document and image caption is not present, add a small description of the image inside the <img></img> tag; otherwise, add the image caption inside <img></img>. Watermarks should be wrapped in brackets. Ex: <watermark>OFFICIAL COPY</watermark>. Page numbers should be wrapped in brackets. Ex: <page_number>14</page_number> or <page_number>9/22</page_number>. Prefer using ☐ and ☑ for check boxes."""):
|
63 |
|
64 |
image_path = array_to_image_path(image)
|
65 |
|
|
|
107 |
|
108 |
return ocr_text, ocr_text # Return twice: once for display, once for state
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
+
|
112 |
+
with gr.Blocks() as demo:
|
113 |
# Add state variables to store OCR results
|
114 |
ocr_state = gr.State()
|
115 |
|
|
|
118 |
with gr.Column(elem_classes="input-container"):
|
119 |
input_img = gr.Image(label="Input Picture", elem_classes="gr-image-input")
|
120 |
model_selector = gr.Dropdown(choices=list(models.keys()), label="Model", value="Qwen/Qwen2.5-VL-7B-Instruct", elem_classes="gr-dropdown")
|
121 |
+
prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here...", elem_classes="gr-textbox")
|
122 |
+
|
123 |
submit_btn = gr.Button(value="Submit", elem_classes="submit-btn")
|
124 |
with gr.Column(elem_classes="output-container"):
|
125 |
output_text = gr.HighlightedText(label="Output Text", elem_id="output")
|
|
|
129 |
# Modify the submit button click handler to update state
|
130 |
submit_btn.click(
|
131 |
run_example,
|
132 |
+
inputs=[input_img, model_selector,prompt],
|
133 |
outputs=[output_text, ocr_state] # Add ocr_state to outputs
|
134 |
)
|
135 |
+
|
136 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
demo.queue(api_open=False)
|
138 |
demo.launch(debug=True)
|