Inference-APP-Document-Understanding-at-paragraphlevel-v1

Runtime error

App Files Files Community

pierreguillou commited on Feb 15, 2023

Commit

d3f80ab

1 Parent(s): c45522e

Update files/functions.py

Browse files

Files changed (1) hide show

files/functions.py +1 -39

files/functions.py CHANGED Viewed

@@ -358,7 +358,7 @@ def pdf_to_images(uploaded_pdf):
         except PdfReadError:
             path_to_file = pdf_blank
             filename = path_to_file.replace(examples_dir,"")
-            msg = "Invalid PDF file."
             images = [Image.open(image_blank)]
         else:
             try:
@@ -371,44 +371,6 @@ def pdf_to_images(uploaded_pdf):
     return filename, msg, images
-# get filename and images of PDF pages
-def pdf_to_images(uploaded_pdf):
-    # file name of the uploaded PDF
-    filename = next(iter(uploaded_pdf))
-    try:
-        PdfReader(filename)
-    except PdfReadError:
-        print("Invalid PDF file.")
-    else:
-        try:
-            images = convert_from_path(str(filename))
-            num_imgs = len(images)
-            print(f'The PDF "{filename}"" was converted into {num_imgs} images.')
-            print("Now, you can extract data from theses images (text, bounding boxes...).")
-        except:
-            print(f"Error with the PDF {filename}:it was not converted into images.")
-            print()
-        else:
-            # display images
-            if num_imgs > 0:
-                import matplotlib.pyplot as plt
-                %matplotlib inline
-                plt.figure(figsize=(20,10))
-                columns = 5
-                for i, image in enumerate(images):
-                    plt.subplot(num_imgs / columns + 1, columns, i + 1)
-                    plt.xticks(color="white")
-                    plt.yticks(color="white")
-                    plt.tick_params(bottom = False)
-                    plt.tick_params(left = False)
-                    plt.imshow(image)
-                return filename, images
 # Extraction of image data (text and bounding boxes)
 def extraction_data_from_image(images):

         except PdfReadError:
             path_to_file = pdf_blank
             filename = path_to_file.replace(examples_dir,"")
+            msg = "invalid PDF file."
             images = [Image.open(image_blank)]
         else:
             try:
     return filename, msg, images
 # Extraction of image data (text and bounding boxes)
 def extraction_data_from_image(images):