pierreguillou commited on
Commit
c35bb2c
·
1 Parent(s): c41516f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -78
app.py CHANGED
@@ -51,10 +51,6 @@ os.system('python -m pip install --upgrade pip')
51
 
52
  ## model / feature extractor / tokenizer
53
 
54
- # models
55
- model_id_lilt = "pierreguillou/lilt-xlm-roberta-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512"
56
- model_id_layoutxlm = "pierreguillou/layout-xlm-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512"
57
-
58
  # get device
59
  import torch
60
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -66,6 +62,9 @@ tokenizer_lilt = AutoTokenizer.from_pretrained(model_id_lilt)
66
  model_lilt = AutoModelForTokenClassification.from_pretrained(model_id_lilt);
67
  model_lilt.to(device);
68
 
 
 
 
69
  ## model LayoutXLM
70
  from transformers import LayoutLMv2ForTokenClassification # LayoutXLMTokenizerFast,
71
  model_layoutxlm = LayoutLMv2ForTokenClassification.from_pretrained(model_id_layoutxlm);
@@ -79,17 +78,16 @@ feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False)
79
  from transformers import AutoTokenizer
80
  tokenizer_layoutxlm = AutoTokenizer.from_pretrained(tokenizer_id_layoutxlm)
81
 
82
- # get labels
83
- id2label_lilt = model_lilt.config.id2label
84
- label2id_lilt = model_lilt.config.label2id
85
- num_labels_lilt = len(id2label_lilt)
86
 
87
- id2label_layoutxlm = model_layoutxlm.config.id2label
88
- label2id_layoutxlm = model_layoutxlm.config.label2id
89
- num_labels_layoutxlm = len(id2label_layoutxlm)
 
90
 
91
  # APP outputs by model
92
- def app_outputs_by_model(uploaded_pdf, model_id, model, tokenizer, max_length, id2label, cls_box, sep_box):
93
  filename, msg, images = pdf_to_images(uploaded_pdf)
94
  num_images = len(images)
95
 
@@ -97,16 +95,28 @@ def app_outputs_by_model(uploaded_pdf, model_id, model, tokenizer, max_length, i
97
 
98
  # Extraction of image data (text and bounding boxes)
99
  dataset, texts_lines, texts_pars, texts_lines_par, row_indexes, par_boxes, line_boxes, lines_par_boxes = extraction_data_from_image(images)
 
100
  # prepare our data in the format of the model
101
- prepare_inference_features_partial = partial(prepare_inference_features_paragraph, tokenizer=tokenizer, max_length=max_length, cls_box=cls_box, sep_box=sep_box)
102
- encoded_dataset = dataset.map(prepare_inference_features_partial, batched=True, batch_size=64, remove_columns=dataset.column_names)
103
- custom_encoded_dataset = CustomDataset(encoded_dataset, tokenizer)
 
 
 
 
 
 
104
  # Get predictions (token level)
105
- outputs, images_ids_list, chunk_ids, input_ids, bboxes = predictions_token_level(images, custom_encoded_dataset, model_id, model)
106
- # Get predictions (line level)
107
- probs_bbox, bboxes_list_dict, input_ids_dict_dict, probs_dict_dict, df = predictions_paragraph_level(max_length, tokenizer, id2label, dataset, outputs, images_ids_list, chunk_ids, input_ids, bboxes, cls_box, sep_box)
 
 
 
 
 
108
  # Get labeled images with lines bounding boxes
109
- images = get_labeled_images(id2label, dataset, images_ids_list, bboxes_list_dict, probs_dict_dict)
110
 
111
  img_files = list()
112
  # get image of PDF without bounding boxes
@@ -144,97 +154,64 @@ def app_outputs_by_model(uploaded_pdf, model_id, model, tokenizer, max_length, i
144
  df, df_empty = dict(), pd.DataFrame()
145
  df[0], df[1] = df_empty.to_csv(csv_file, encoding="utf-8", index=False), df_empty.to_csv(csv_file, encoding="utf-8", index=False)
146
 
147
- return msg, img_files[0], images[0], csv_files[0], df[0]
148
-
149
- def app_outputs(uploaded_pdf):
150
- msg_lilt, img_files_lilt, images_lilt, csv_files_lilt, df_lilt = app_outputs_by_model(uploaded_pdf,
151
- model_id=model_id_lilt, model=model_lilt, tokenizer=tokenizer_lilt,
152
- max_length=max_length_lilt, id2label=id2label_lilt, cls_box=cls_box, sep_box=sep_box_lilt)
153
-
154
- msg_layoutxlm, img_files_layoutxlm, images_layoutxlm, csv_files_layoutxlm, df_layoutxlm = app_outputs_by_model(uploaded_pdf,
155
- model_id=model_id_layoutxlm, model=model_layoutxlm, tokenizer=tokenizer_layoutxlm,
156
- max_length=max_length_layoutxlm, id2label=id2label_layoutxlm, cls_box=cls_box, sep_box=sep_box_layoutxlm)
157
-
158
- return msg_lilt, msg_layoutxlm, img_files_lilt, img_files_layoutxlm, images_lilt, images_layoutxlm, csv_files_lilt, csv_files_layoutxlm, df_lilt, df_layoutxlm
159
 
160
- # gradio APP
161
- with gr.Blocks(title="Inference APP for Document Understanding at paragraph level (v1 - LiLT base vs LayoutXLM base)", css=".gradio-container") as demo:
162
  gr.HTML("""
163
- <div style="font-family:'Times New Roman', 'Serif'; font-size:26pt; font-weight:bold; text-align:center;"><h1>Inference APP for Document Understanding at paragraph level (v1 - LiLT base vs LayoutXLM base)</h1></div>
164
- <div style="margin-top: 40px"><p>(04/01/2023) This Inference APP compares - only on the first PDF page - 2 Document Understanding models finetuned on the dataset <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/datasets/pierreguillou/DocLayNet-base" target="_blank">DocLayNet base</a> at paragraph level (chunk size of 512 tokens): <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/pierreguillou/lilt-xlm-roberta-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512" target="_blank">LiLT base combined with XLM-RoBERTa base</a> and <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/pierreguillou/layout-xlm-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512" target="_blank">LayoutXLM base combined with XLM-RoBERTa base</a>.</p></div>
165
- <div><p>To test these 2 models separately, use their corresponding APP on Hugging Face Spaces: <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-paragraphlevel-v1" target="_blank">LiLT base APP (v1 - paragraph level)</a> and <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-paragraphlevel-v2" target="_blank">LayoutXLM base APP (v2 - paragraph level)</a>.</p></div><div style="margin-top: 20px"><p>Links to Document Understanding APPs:</p><ul><li>Line level: <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-linelevel-v1" target="_blank">v1 (LiLT base)</a> | <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-linelevel-v2" target="_blank">v2 (LayoutXLM base)</a></li><li>Paragraph level: <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-paragraphlevel-v1" target="_blank">v1 (LiLT base)</a> | <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-paragraphlevel-v2" target="_blank">v2 (LayoutXLM base)</a></li></ul></div><div style="margin-top: 20px"><p>More information about the DocLayNet datasets, the finetuning of the model and this APP in the following blog posts:</p><ul><li>(03/31/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-inference-app-and-fine-tuning-notebook-for-document-understanding-at-paragraph-level-3507af80573d" target="_blank">Document AI | Inference APP and fine-tuning notebook for Document Understanding at paragraph level with LayoutXLM base</a></li><li>(03/25/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-app-to-compare-the-document-understanding-lilt-and-layoutxlm-base-models-at-line-1c53eb481a15" target="_blank">Document AI | APP to compare the Document Understanding LiLT and LayoutXLM (base) models at line level</a></li><li>(03/05/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-inference-app-and-fine-tuning-notebook-for-document-understanding-at-line-level-with-b08fdca5f4dc" target="_blank">Document AI | Inference APP and fine-tuning notebook for Document Understanding at line level with LayoutXLM base</a></li><li>(02/14/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-inference-app-for-document-understanding-at-line-level-a35bbfa98893" target="_blank">Document AI | Inference APP for Document Understanding at line level</a></li><li>(02/10/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-document-understanding-model-at-line-level-with-lilt-tesseract-and-doclaynet-dataset-347107a643b8" target="_blank">Document AI | Document Understanding model at line level with LiLT, Tesseract and DocLayNet dataset</a></li><li>(01/31/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-doclaynet-image-viewer-app-3ac54c19956" target="_blank">Document AI | DocLayNet image viewer APP</a></li><li>(01/27/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-processing-of-doclaynet-dataset-to-be-used-by-layout-models-of-the-hugging-face-hub-308d8bd81cdb" target="_blank">Document AI | Processing of DocLayNet dataset to be used by layout models of the Hugging Face hub (finetuning, inference)</a></li></ul></div>
 
 
 
 
 
 
166
  """)
167
  with gr.Row():
168
  pdf_file = gr.File(label="PDF")
169
  with gr.Row():
170
- submit_btn = gr.Button(f"Get layout detection by LiLT and LayoutXLM on the first PDF page")
171
- reset_btn = gr.Button(value="Clear")
172
  with gr.Row():
173
- output_messages = []
174
- with gr.Column():
175
- output_msg = gr.Textbox(label="LiLT output message")
176
- output_messages.append(output_msg)
177
- with gr.Column():
178
- output_msg = gr.Textbox(label="LayoutXLM output message")
179
- output_messages.append(output_msg)
180
  with gr.Row():
181
  fileboxes = []
182
- with gr.Column():
183
- file_path = gr.File(visible=True, label=f"LiLT image file")
184
- fileboxes.append(file_path)
185
- with gr.Column():
186
- file_path = gr.File(visible=True, label=f"LayoutXLM image file")
187
  fileboxes.append(file_path)
188
  with gr.Row():
189
  imgboxes = []
190
- with gr.Column():
191
- img = gr.Image(type="pil", label=f"Lilt Image")
192
- imgboxes.append(img)
193
- with gr.Column():
194
- img = gr.Image(type="pil", label=f"LayoutXLM Image")
195
  imgboxes.append(img)
196
  with gr.Row():
197
  csvboxes = []
198
- with gr.Column():
199
- csv = gr.File(visible=True, label=f"LiLT csv file at paragraph level")
200
- csvboxes.append(csv)
201
- with gr.Column():
202
- csv = gr.File(visible=True, label=f"LayoutXLM csv file at paragraph level")
203
  csvboxes.append(csv)
204
  with gr.Row():
205
  dfboxes = []
206
- with gr.Column():
207
  df = gr.Dataframe(
208
  headers=["bounding boxes", "texts", "labels"],
209
  datatype=["str", "str", "str"],
210
  col_count=(3, "fixed"),
211
  visible=True,
212
- label=f"LiLT data",
213
- type="pandas",
214
- wrap=True
215
- )
216
- dfboxes.append(df)
217
- with gr.Column():
218
- df = gr.Dataframe(
219
- headers=["bounding boxes", "texts", "labels"],
220
- datatype=["str", "str", "str"],
221
- col_count=(3, "fixed"),
222
- visible=True,
223
- label=f"LayoutXLM data",
224
  type="pandas",
225
  wrap=True
226
  )
227
  dfboxes.append(df)
228
 
229
- outputboxes = output_messages + fileboxes + imgboxes + csvboxes + dfboxes
230
-
231
  submit_btn.click(app_outputs, inputs=[pdf_file], outputs=outputboxes)
232
-
233
  # https://github.com/gradio-app/gradio/pull/2044/files#diff-a91dd2749f68bb7d0099a0f4079a4fd2d10281e299e7b451cb1bb876a7c21975R91
234
  reset_btn.click(
235
- lambda: [pdf_file.update(value=None)] + [output_msg.update(value=None) for output_msg in output_messages] + [filebox.update(value=None) for filebox in fileboxes] + [imgbox.update(value=None) for imgbox in imgboxes] + [csvbox.update(value=None) for csvbox in csvboxes] + [dfbox.update(value=None) for dfbox in dfboxes],
236
  inputs=[],
237
- outputs=[pdf_file] + output_messages + fileboxes + imgboxes + csvboxes + dfboxes
238
  )
239
 
240
  gr.Examples(
 
51
 
52
  ## model / feature extractor / tokenizer
53
 
 
 
 
 
54
  # get device
55
  import torch
56
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
62
  model_lilt = AutoModelForTokenClassification.from_pretrained(model_id_lilt);
63
  model_lilt.to(device);
64
 
65
+ tokenizer1 = tokenizer_lilt
66
+ model1 = model_lilt
67
+
68
  ## model LayoutXLM
69
  from transformers import LayoutLMv2ForTokenClassification # LayoutXLMTokenizerFast,
70
  model_layoutxlm = LayoutLMv2ForTokenClassification.from_pretrained(model_id_layoutxlm);
 
78
  from transformers import AutoTokenizer
79
  tokenizer_layoutxlm = AutoTokenizer.from_pretrained(tokenizer_id_layoutxlm)
80
 
81
+ tokenizer2 = tokenizer_layoutxlm
82
+ model2 = model_layoutxlm
 
 
83
 
84
+ # get labels
85
+ id2label = model_lilt.config.id2label
86
+ label2id = model_lilt.config.label2id
87
+ num_labels = len(id2label)
88
 
89
  # APP outputs by model
90
+ def app_outputs(uploaded_pdf):
91
  filename, msg, images = pdf_to_images(uploaded_pdf)
92
  num_images = len(images)
93
 
 
95
 
96
  # Extraction of image data (text and bounding boxes)
97
  dataset, texts_lines, texts_pars, texts_lines_par, row_indexes, par_boxes, line_boxes, lines_par_boxes = extraction_data_from_image(images)
98
+
99
  # prepare our data in the format of the model
100
+ # model1
101
+ prepare_inference_features_partial1 = partial(prepare_inference_features_paragraph, tokenizer=tokenizer1, max_length=max_length, cls_box=cls_box1, sep_box=sep_box1)
102
+ encoded_dataset1 = dataset.map(prepare_inference_features_partial1, batched=True, batch_size=64, remove_columns=dataset.column_names)
103
+ custom_encoded_dataset1 = CustomDataset(encoded_dataset1, tokenizer1)
104
+ # model2
105
+ prepare_inference_features_partial2 = partial(prepare_inference_features_paragraph, tokenizer=tokenizer2, max_length=max_length, cls_box=cls_box2, sep_box=sep_box2)
106
+ encoded_dataset2 = dataset.map(prepare_inference_features_partial2, batched=True, batch_size=64, remove_columns=dataset.column_names)
107
+ custom_encoded_dataset2 = CustomDataset(encoded_dataset2, tokenizer2)
108
+
109
  # Get predictions (token level)
110
+ # model1
111
+ outputs1, images_ids_list1, chunk_ids1, input_ids1, bboxes1 = predictions_token_level(images, custom_encoded_dataset1, model_id1, model1)
112
+ # model2
113
+ outputs2, images_ids_list2, chunk_ids2, input_ids2, bboxes2 = predictions_token_level(images, custom_encoded_dataset2, model_id2, model2)
114
+
115
+ # Get predictions (paragraph level)
116
+ bboxes_list_dict, input_ids_dict_dict, probs_dict_dict, df = predictions_paragraph_level(max_length, tokenizer1, id2label, dataset, outputs1, images_ids_list1, chunk_ids1, input_ids1, bboxes1, cls_box1, sep_box1, tokenizer2, outputs2, images_ids_list2, chunk_ids2, input_ids2, bboxes2, cls_box2, sep_box2)
117
+
118
  # Get labeled images with lines bounding boxes
119
+ images = get_labeled_images(id2label, dataset, images_ids_list1, bboxes_list_dict, probs_dict_dict)
120
 
121
  img_files = list()
122
  # get image of PDF without bounding boxes
 
154
  df, df_empty = dict(), pd.DataFrame()
155
  df[0], df[1] = df_empty.to_csv(csv_file, encoding="utf-8", index=False), df_empty.to_csv(csv_file, encoding="utf-8", index=False)
156
 
157
+ return msg, img_files[0], img_files[1], images[0], images[1], csv_files[0], csv_files[1], df[0], df[1]
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ # Gradio APP
160
+ with gr.Blocks(title='Inference APP for Document Understanding at paragraph level (v3 - Ensemble "LiLT + LayoutXLM" base)', css=".gradio-container") as demo:
161
  gr.HTML("""
162
+ <div style="font-family:'Times New Roman', 'Serif'; font-size:26pt; font-weight:bold; text-align:center;"><h1>Inference APP for Document Understanding at paragraph level (v3 - Ensemble "LiLT + LayoutXLM" base)</h1></div>
163
+ <div style="margin-top: 40px"><p>(04/04/2023) This Inference APP uses an ensemble of 2 Document Understanding models finetuned on the dataset DocLayNet base at paragraph level (chunk size of 512 tokens) and combined with XLM-RoBERTa base: <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/pierreguillou/lilt-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512" target="_blank">LiLT base</a> and <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/pierreguillou/layout-xlm-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512" target="_blank">LayoutXLM base</a>.</p><p>This ensemble calculates the probabilities of each block from the outputs of the models for each label before selecting the label with the highest sum of the normalized probabilities.</p>
164
+ <p>Note: <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://arxiv.org/abs/2202.13669" target="_blank">LiLT (Language-Independent Layout Transformer)</a> and <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://arxiv.org/abs/2104.08836" target="_blank">LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding</a> are Document Understanding models that use both layout and text in order to detect labels of bounding boxes. Combined with the model <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/xlm-roberta-base" target="_blank">XML-RoBERTa base</a>, this finetuned model has the capacity to <b>understand any language</b>. Finetuned on the dataset <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/datasets/pierreguillou/DocLayNet-base" target="_blank">DocLayNet base</a>, they can <b>classifly any bounding box (and its OCR text) to 11 labels</b> (Caption, Footnote, Formula, List-item, Page-footer, Page-header, Picture, Section-header, Table, Text, Title).</p>
165
+ <p>They rely on an external OCR engine to get words and bounding boxes from the document image. Thus, let's run in this APP an OCR engine (<a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://github.com/madmaze/pytesseract#python-tesseract" target="_blank">PyTesseract</a>) to get the bounding boxes, then run the 2 models (already fine-tuned on the dataset DocLayNet base at paragraph level) on the individual tokens and then, normalized the sum of block probabilities as explained, and visualize the result at paragraph level!</p>
166
+ <p><b>It allows to get all pages of any PDF (of any language) with bounding boxes labeled at paragraph level and the associated dataframes with labeled data (bounding boxes, texts, labels) :-)</b></p></div>
167
+ <div><p>However, the inference time per page can be high when running the model on CPU due to the number of paragraph predictions to be made. Therefore, to avoid running this APP for too long, <b>only the first 2 pages are processed by this APP</b>. If you want to increase this limit, you can either clone this APP in Hugging Face Space (or run its <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://github.com/piegu/language-models/blob/master/Gradio_inference_on_Ensemble_LiLT_&_LayoutXLM_base_model_finetuned_on_DocLayNet_base_in_any_language_at_levelparagraphs_ml512.ipynb" target="_blank">notebook</a> on your own plateform) and change the value of the parameter <code>max_imgboxes</code>, or run the inference notebook "<a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://github.com/piegu/language-models/blob/master/inference_on_Ensemble_LiLT_&_LayoutXLM_base_model_finetuned_on_DocLayNet_base_in_any_language_at_levelparagraphs_ml512.ipynb" target="_blank">Document AI | Inference at paragraph level by using the association of 2 Document Understanding models (LiLT and LayoutXLM base fine-tuned on DocLayNet base dataset)</a>" on your own platform as it does not have this limit.</p></div>
168
+ <div style="margin-top: 20px"><p>Links to Document Understanding APPs:</p><ul><li>Line level: <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-linelevel-v1" target="_blank">v1 (LiLT base)</a> | <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-linelevel-v2" target="_blank">v2 (LayoutXLM base)</a> | <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-linelevel-v3" target="_blank">v3 (LilT base vs LayoutXLM base)</a></li><li>Paragraph level: <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-paragraphlevel-v1" target="_blank">v1 (LiLT base)</a> | <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-paragraphlevel-v2" target="_blank">v2 (LayoutXLM base)</a> | <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-paragraphlevel-v3" target="_blank">v3 (LilT base vs LayoutXLM base)</a></li></ul></div>
169
+ <div style="margin-top: 20px"><p>More information about the DocLayNet datasets, the finetuning of the model and this APP in the following blog posts:</p>
170
+ <ul><li>(03/31/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-inference-app-and-fine-tuning-notebook-for-document-understanding-at-paragraph-level-3507af80573d" target="_blank">Document AI | Inference APP and fine-tuning notebook for Document Understanding at paragraph level with LayoutXLM base</a></li><li>(03/25/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-app-to-compare-the-document-understanding-lilt-and-layoutxlm-base-models-at-line-1c53eb481a15" target="_blank">Document AI | APP to compare the Document Understanding LiLT and LayoutXLM (base) models at line level</a></li><li>(03/05/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-inference-app-and-fine-tuning-notebook-for-document-understanding-at-line-level-with-b08fdca5f4dc" target="_blank">Document AI | Inference APP and fine-tuning notebook for Document Understanding at line level with LayoutXLM base</a></li><li>(02/14/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-inference-app-for-document-understanding-at-line-level-a35bbfa98893" target="_blank">Document AI | Inference APP for Document Understanding at line level</a></li><li>(02/10/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-document-understanding-model-at-line-level-with-lilt-tesseract-and-doclaynet-dataset-347107a643b8" target="_blank">Document AI | Document Understanding model at line level with LiLT, Tesseract and DocLayNet dataset</a></li><li>(01/31/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-doclaynet-image-viewer-app-3ac54c19956" target="_blank">Document AI | DocLayNet image viewer APP</a></li><li>(01/27/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-processing-of-doclaynet-dataset-to-be-used-by-layout-models-of-the-hugging-face-hub-308d8bd81cdb" target="_blank">Document AI | Processing of DocLayNet dataset to be used by layout models of the Hugging Face hub (finetuning, inference)</a></li></ul></div>
171
  """)
172
  with gr.Row():
173
  pdf_file = gr.File(label="PDF")
174
  with gr.Row():
175
+ submit_btn = gr.Button(f"Display first {max_imgboxes} labeled PDF pages")
176
+ reset_btn = gr.Button(value="Clear")
177
  with gr.Row():
178
+ output_msg = gr.Textbox(label="Output message")
 
 
 
 
 
 
179
  with gr.Row():
180
  fileboxes = []
181
+ for num_page in range(max_imgboxes):
182
+ file_path = gr.File(visible=True, label=f"Image file of the PDF page n°{num_page}")
 
 
 
183
  fileboxes.append(file_path)
184
  with gr.Row():
185
  imgboxes = []
186
+ for num_page in range(max_imgboxes):
187
+ img = gr.Image(type="pil", label=f"Image of the PDF page n°{num_page}")
 
 
 
188
  imgboxes.append(img)
189
  with gr.Row():
190
  csvboxes = []
191
+ for num_page in range(max_imgboxes):
192
+ csv = gr.File(visible=True, label=f"CSV file at paragraph level (page {num_page})")
 
 
 
193
  csvboxes.append(csv)
194
  with gr.Row():
195
  dfboxes = []
196
+ for num_page in range(max_imgboxes):
197
  df = gr.Dataframe(
198
  headers=["bounding boxes", "texts", "labels"],
199
  datatype=["str", "str", "str"],
200
  col_count=(3, "fixed"),
201
  visible=True,
202
+ label=f"Data of page {num_page}",
 
 
 
 
 
 
 
 
 
 
 
203
  type="pandas",
204
  wrap=True
205
  )
206
  dfboxes.append(df)
207
 
208
+ outputboxes = [output_msg] + fileboxes + imgboxes + csvboxes + dfboxes
 
209
  submit_btn.click(app_outputs, inputs=[pdf_file], outputs=outputboxes)
 
210
  # https://github.com/gradio-app/gradio/pull/2044/files#diff-a91dd2749f68bb7d0099a0f4079a4fd2d10281e299e7b451cb1bb876a7c21975R91
211
  reset_btn.click(
212
+ lambda: [pdf_file.update(value=None), output_msg.update(value=None)] + [filebox.update(value=None) for filebox in fileboxes] + [imgbox.update(value=None) for imgbox in imgboxes] + [csvbox.update(value=None) for csvbox in csvboxes] + [dfbox.update(value=None) for dfbox in dfboxes],
213
  inputs=[],
214
+ outputs=[pdf_file, output_msg] + fileboxes + imgboxes + csvboxes + dfboxes
215
  )
216
 
217
  gr.Examples(