pierreguillou commited on
Commit
d3f80ab
·
1 Parent(s): c45522e

Update files/functions.py

Browse files
Files changed (1) hide show
  1. files/functions.py +1 -39
files/functions.py CHANGED
@@ -358,7 +358,7 @@ def pdf_to_images(uploaded_pdf):
358
  except PdfReadError:
359
  path_to_file = pdf_blank
360
  filename = path_to_file.replace(examples_dir,"")
361
- msg = "Invalid PDF file."
362
  images = [Image.open(image_blank)]
363
  else:
364
  try:
@@ -371,44 +371,6 @@ def pdf_to_images(uploaded_pdf):
371
 
372
  return filename, msg, images
373
 
374
- # get filename and images of PDF pages
375
- def pdf_to_images(uploaded_pdf):
376
-
377
- # file name of the uploaded PDF
378
- filename = next(iter(uploaded_pdf))
379
-
380
- try:
381
- PdfReader(filename)
382
- except PdfReadError:
383
- print("Invalid PDF file.")
384
- else:
385
- try:
386
- images = convert_from_path(str(filename))
387
- num_imgs = len(images)
388
- print(f'The PDF "{filename}"" was converted into {num_imgs} images.')
389
- print("Now, you can extract data from theses images (text, bounding boxes...).")
390
- except:
391
- print(f"Error with the PDF {filename}:it was not converted into images.")
392
- print()
393
- else:
394
- # display images
395
- if num_imgs > 0:
396
-
397
- import matplotlib.pyplot as plt
398
- %matplotlib inline
399
-
400
- plt.figure(figsize=(20,10))
401
- columns = 5
402
- for i, image in enumerate(images):
403
- plt.subplot(num_imgs / columns + 1, columns, i + 1)
404
- plt.xticks(color="white")
405
- plt.yticks(color="white")
406
- plt.tick_params(bottom = False)
407
- plt.tick_params(left = False)
408
- plt.imshow(image)
409
-
410
- return filename, images
411
-
412
  # Extraction of image data (text and bounding boxes)
413
  def extraction_data_from_image(images):
414
 
 
358
  except PdfReadError:
359
  path_to_file = pdf_blank
360
  filename = path_to_file.replace(examples_dir,"")
361
+ msg = "invalid PDF file."
362
  images = [Image.open(image_blank)]
363
  else:
364
  try:
 
371
 
372
  return filename, msg, images
373
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  # Extraction of image data (text and bounding boxes)
375
  def extraction_data_from_image(images):
376