Commit
·
d3f80ab
1
Parent(s):
c45522e
Update files/functions.py
Browse files- files/functions.py +1 -39
files/functions.py
CHANGED
@@ -358,7 +358,7 @@ def pdf_to_images(uploaded_pdf):
|
|
358 |
except PdfReadError:
|
359 |
path_to_file = pdf_blank
|
360 |
filename = path_to_file.replace(examples_dir,"")
|
361 |
-
msg = "
|
362 |
images = [Image.open(image_blank)]
|
363 |
else:
|
364 |
try:
|
@@ -371,44 +371,6 @@ def pdf_to_images(uploaded_pdf):
|
|
371 |
|
372 |
return filename, msg, images
|
373 |
|
374 |
-
# get filename and images of PDF pages
|
375 |
-
def pdf_to_images(uploaded_pdf):
|
376 |
-
|
377 |
-
# file name of the uploaded PDF
|
378 |
-
filename = next(iter(uploaded_pdf))
|
379 |
-
|
380 |
-
try:
|
381 |
-
PdfReader(filename)
|
382 |
-
except PdfReadError:
|
383 |
-
print("Invalid PDF file.")
|
384 |
-
else:
|
385 |
-
try:
|
386 |
-
images = convert_from_path(str(filename))
|
387 |
-
num_imgs = len(images)
|
388 |
-
print(f'The PDF "{filename}"" was converted into {num_imgs} images.')
|
389 |
-
print("Now, you can extract data from theses images (text, bounding boxes...).")
|
390 |
-
except:
|
391 |
-
print(f"Error with the PDF {filename}:it was not converted into images.")
|
392 |
-
print()
|
393 |
-
else:
|
394 |
-
# display images
|
395 |
-
if num_imgs > 0:
|
396 |
-
|
397 |
-
import matplotlib.pyplot as plt
|
398 |
-
%matplotlib inline
|
399 |
-
|
400 |
-
plt.figure(figsize=(20,10))
|
401 |
-
columns = 5
|
402 |
-
for i, image in enumerate(images):
|
403 |
-
plt.subplot(num_imgs / columns + 1, columns, i + 1)
|
404 |
-
plt.xticks(color="white")
|
405 |
-
plt.yticks(color="white")
|
406 |
-
plt.tick_params(bottom = False)
|
407 |
-
plt.tick_params(left = False)
|
408 |
-
plt.imshow(image)
|
409 |
-
|
410 |
-
return filename, images
|
411 |
-
|
412 |
# Extraction of image data (text and bounding boxes)
|
413 |
def extraction_data_from_image(images):
|
414 |
|
|
|
358 |
except PdfReadError:
|
359 |
path_to_file = pdf_blank
|
360 |
filename = path_to_file.replace(examples_dir,"")
|
361 |
+
msg = "invalid PDF file."
|
362 |
images = [Image.open(image_blank)]
|
363 |
else:
|
364 |
try:
|
|
|
371 |
|
372 |
return filename, msg, images
|
373 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
374 |
# Extraction of image data (text and bounding boxes)
|
375 |
def extraction_data_from_image(images):
|
376 |
|