Commit
·
c35bb2c
1
Parent(s):
c41516f
Update app.py
Browse files
app.py
CHANGED
@@ -51,10 +51,6 @@ os.system('python -m pip install --upgrade pip')
|
|
51 |
|
52 |
## model / feature extractor / tokenizer
|
53 |
|
54 |
-
# models
|
55 |
-
model_id_lilt = "pierreguillou/lilt-xlm-roberta-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512"
|
56 |
-
model_id_layoutxlm = "pierreguillou/layout-xlm-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512"
|
57 |
-
|
58 |
# get device
|
59 |
import torch
|
60 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
@@ -66,6 +62,9 @@ tokenizer_lilt = AutoTokenizer.from_pretrained(model_id_lilt)
|
|
66 |
model_lilt = AutoModelForTokenClassification.from_pretrained(model_id_lilt);
|
67 |
model_lilt.to(device);
|
68 |
|
|
|
|
|
|
|
69 |
## model LayoutXLM
|
70 |
from transformers import LayoutLMv2ForTokenClassification # LayoutXLMTokenizerFast,
|
71 |
model_layoutxlm = LayoutLMv2ForTokenClassification.from_pretrained(model_id_layoutxlm);
|
@@ -79,17 +78,16 @@ feature_extractor = LayoutLMv2FeatureExtractor(apply_ocr=False)
|
|
79 |
from transformers import AutoTokenizer
|
80 |
tokenizer_layoutxlm = AutoTokenizer.from_pretrained(tokenizer_id_layoutxlm)
|
81 |
|
82 |
-
|
83 |
-
|
84 |
-
label2id_lilt = model_lilt.config.label2id
|
85 |
-
num_labels_lilt = len(id2label_lilt)
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
90 |
|
91 |
# APP outputs by model
|
92 |
-
def
|
93 |
filename, msg, images = pdf_to_images(uploaded_pdf)
|
94 |
num_images = len(images)
|
95 |
|
@@ -97,16 +95,28 @@ def app_outputs_by_model(uploaded_pdf, model_id, model, tokenizer, max_length, i
|
|
97 |
|
98 |
# Extraction of image data (text and bounding boxes)
|
99 |
dataset, texts_lines, texts_pars, texts_lines_par, row_indexes, par_boxes, line_boxes, lines_par_boxes = extraction_data_from_image(images)
|
|
|
100 |
# prepare our data in the format of the model
|
101 |
-
|
102 |
-
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
# Get predictions (token level)
|
105 |
-
|
106 |
-
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
108 |
# Get labeled images with lines bounding boxes
|
109 |
-
images = get_labeled_images(id2label, dataset,
|
110 |
|
111 |
img_files = list()
|
112 |
# get image of PDF without bounding boxes
|
@@ -144,97 +154,64 @@ def app_outputs_by_model(uploaded_pdf, model_id, model, tokenizer, max_length, i
|
|
144 |
df, df_empty = dict(), pd.DataFrame()
|
145 |
df[0], df[1] = df_empty.to_csv(csv_file, encoding="utf-8", index=False), df_empty.to_csv(csv_file, encoding="utf-8", index=False)
|
146 |
|
147 |
-
return msg, img_files[0], images[0], csv_files[0], df[0]
|
148 |
-
|
149 |
-
def app_outputs(uploaded_pdf):
|
150 |
-
msg_lilt, img_files_lilt, images_lilt, csv_files_lilt, df_lilt = app_outputs_by_model(uploaded_pdf,
|
151 |
-
model_id=model_id_lilt, model=model_lilt, tokenizer=tokenizer_lilt,
|
152 |
-
max_length=max_length_lilt, id2label=id2label_lilt, cls_box=cls_box, sep_box=sep_box_lilt)
|
153 |
-
|
154 |
-
msg_layoutxlm, img_files_layoutxlm, images_layoutxlm, csv_files_layoutxlm, df_layoutxlm = app_outputs_by_model(uploaded_pdf,
|
155 |
-
model_id=model_id_layoutxlm, model=model_layoutxlm, tokenizer=tokenizer_layoutxlm,
|
156 |
-
max_length=max_length_layoutxlm, id2label=id2label_layoutxlm, cls_box=cls_box, sep_box=sep_box_layoutxlm)
|
157 |
-
|
158 |
-
return msg_lilt, msg_layoutxlm, img_files_lilt, img_files_layoutxlm, images_lilt, images_layoutxlm, csv_files_lilt, csv_files_layoutxlm, df_lilt, df_layoutxlm
|
159 |
|
160 |
-
#
|
161 |
-
with gr.Blocks(title=
|
162 |
gr.HTML("""
|
163 |
-
<div style="font-family:'Times New Roman', 'Serif'; font-size:26pt; font-weight:bold; text-align:center;"><h1>Inference APP for Document Understanding at paragraph level (
|
164 |
-
<div style="margin-top: 40px"><p>(04/
|
165 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
""")
|
167 |
with gr.Row():
|
168 |
pdf_file = gr.File(label="PDF")
|
169 |
with gr.Row():
|
170 |
-
submit_btn = gr.Button(f"
|
171 |
-
reset_btn = gr.Button(value="Clear")
|
172 |
with gr.Row():
|
173 |
-
|
174 |
-
with gr.Column():
|
175 |
-
output_msg = gr.Textbox(label="LiLT output message")
|
176 |
-
output_messages.append(output_msg)
|
177 |
-
with gr.Column():
|
178 |
-
output_msg = gr.Textbox(label="LayoutXLM output message")
|
179 |
-
output_messages.append(output_msg)
|
180 |
with gr.Row():
|
181 |
fileboxes = []
|
182 |
-
|
183 |
-
file_path = gr.File(visible=True, label=f"
|
184 |
-
fileboxes.append(file_path)
|
185 |
-
with gr.Column():
|
186 |
-
file_path = gr.File(visible=True, label=f"LayoutXLM image file")
|
187 |
fileboxes.append(file_path)
|
188 |
with gr.Row():
|
189 |
imgboxes = []
|
190 |
-
|
191 |
-
img = gr.Image(type="pil", label=f"
|
192 |
-
imgboxes.append(img)
|
193 |
-
with gr.Column():
|
194 |
-
img = gr.Image(type="pil", label=f"LayoutXLM Image")
|
195 |
imgboxes.append(img)
|
196 |
with gr.Row():
|
197 |
csvboxes = []
|
198 |
-
|
199 |
-
csv = gr.File(visible=True, label=f"
|
200 |
-
csvboxes.append(csv)
|
201 |
-
with gr.Column():
|
202 |
-
csv = gr.File(visible=True, label=f"LayoutXLM csv file at paragraph level")
|
203 |
csvboxes.append(csv)
|
204 |
with gr.Row():
|
205 |
dfboxes = []
|
206 |
-
|
207 |
df = gr.Dataframe(
|
208 |
headers=["bounding boxes", "texts", "labels"],
|
209 |
datatype=["str", "str", "str"],
|
210 |
col_count=(3, "fixed"),
|
211 |
visible=True,
|
212 |
-
label=f"
|
213 |
-
type="pandas",
|
214 |
-
wrap=True
|
215 |
-
)
|
216 |
-
dfboxes.append(df)
|
217 |
-
with gr.Column():
|
218 |
-
df = gr.Dataframe(
|
219 |
-
headers=["bounding boxes", "texts", "labels"],
|
220 |
-
datatype=["str", "str", "str"],
|
221 |
-
col_count=(3, "fixed"),
|
222 |
-
visible=True,
|
223 |
-
label=f"LayoutXLM data",
|
224 |
type="pandas",
|
225 |
wrap=True
|
226 |
)
|
227 |
dfboxes.append(df)
|
228 |
|
229 |
-
outputboxes =
|
230 |
-
|
231 |
submit_btn.click(app_outputs, inputs=[pdf_file], outputs=outputboxes)
|
232 |
-
|
233 |
# https://github.com/gradio-app/gradio/pull/2044/files#diff-a91dd2749f68bb7d0099a0f4079a4fd2d10281e299e7b451cb1bb876a7c21975R91
|
234 |
reset_btn.click(
|
235 |
-
lambda: [pdf_file.update(value=None)
|
236 |
inputs=[],
|
237 |
-
outputs=[pdf_file] +
|
238 |
)
|
239 |
|
240 |
gr.Examples(
|
|
|
51 |
|
52 |
## model / feature extractor / tokenizer
|
53 |
|
|
|
|
|
|
|
|
|
54 |
# get device
|
55 |
import torch
|
56 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
62 |
model_lilt = AutoModelForTokenClassification.from_pretrained(model_id_lilt);
|
63 |
model_lilt.to(device);
|
64 |
|
65 |
+
tokenizer1 = tokenizer_lilt
|
66 |
+
model1 = model_lilt
|
67 |
+
|
68 |
## model LayoutXLM
|
69 |
from transformers import LayoutLMv2ForTokenClassification # LayoutXLMTokenizerFast,
|
70 |
model_layoutxlm = LayoutLMv2ForTokenClassification.from_pretrained(model_id_layoutxlm);
|
|
|
78 |
from transformers import AutoTokenizer
|
79 |
tokenizer_layoutxlm = AutoTokenizer.from_pretrained(tokenizer_id_layoutxlm)
|
80 |
|
81 |
+
tokenizer2 = tokenizer_layoutxlm
|
82 |
+
model2 = model_layoutxlm
|
|
|
|
|
83 |
|
84 |
+
# get labels
|
85 |
+
id2label = model_lilt.config.id2label
|
86 |
+
label2id = model_lilt.config.label2id
|
87 |
+
num_labels = len(id2label)
|
88 |
|
89 |
# APP outputs by model
|
90 |
+
def app_outputs(uploaded_pdf):
|
91 |
filename, msg, images = pdf_to_images(uploaded_pdf)
|
92 |
num_images = len(images)
|
93 |
|
|
|
95 |
|
96 |
# Extraction of image data (text and bounding boxes)
|
97 |
dataset, texts_lines, texts_pars, texts_lines_par, row_indexes, par_boxes, line_boxes, lines_par_boxes = extraction_data_from_image(images)
|
98 |
+
|
99 |
# prepare our data in the format of the model
|
100 |
+
# model1
|
101 |
+
prepare_inference_features_partial1 = partial(prepare_inference_features_paragraph, tokenizer=tokenizer1, max_length=max_length, cls_box=cls_box1, sep_box=sep_box1)
|
102 |
+
encoded_dataset1 = dataset.map(prepare_inference_features_partial1, batched=True, batch_size=64, remove_columns=dataset.column_names)
|
103 |
+
custom_encoded_dataset1 = CustomDataset(encoded_dataset1, tokenizer1)
|
104 |
+
# model2
|
105 |
+
prepare_inference_features_partial2 = partial(prepare_inference_features_paragraph, tokenizer=tokenizer2, max_length=max_length, cls_box=cls_box2, sep_box=sep_box2)
|
106 |
+
encoded_dataset2 = dataset.map(prepare_inference_features_partial2, batched=True, batch_size=64, remove_columns=dataset.column_names)
|
107 |
+
custom_encoded_dataset2 = CustomDataset(encoded_dataset2, tokenizer2)
|
108 |
+
|
109 |
# Get predictions (token level)
|
110 |
+
# model1
|
111 |
+
outputs1, images_ids_list1, chunk_ids1, input_ids1, bboxes1 = predictions_token_level(images, custom_encoded_dataset1, model_id1, model1)
|
112 |
+
# model2
|
113 |
+
outputs2, images_ids_list2, chunk_ids2, input_ids2, bboxes2 = predictions_token_level(images, custom_encoded_dataset2, model_id2, model2)
|
114 |
+
|
115 |
+
# Get predictions (paragraph level)
|
116 |
+
bboxes_list_dict, input_ids_dict_dict, probs_dict_dict, df = predictions_paragraph_level(max_length, tokenizer1, id2label, dataset, outputs1, images_ids_list1, chunk_ids1, input_ids1, bboxes1, cls_box1, sep_box1, tokenizer2, outputs2, images_ids_list2, chunk_ids2, input_ids2, bboxes2, cls_box2, sep_box2)
|
117 |
+
|
118 |
# Get labeled images with lines bounding boxes
|
119 |
+
images = get_labeled_images(id2label, dataset, images_ids_list1, bboxes_list_dict, probs_dict_dict)
|
120 |
|
121 |
img_files = list()
|
122 |
# get image of PDF without bounding boxes
|
|
|
154 |
df, df_empty = dict(), pd.DataFrame()
|
155 |
df[0], df[1] = df_empty.to_csv(csv_file, encoding="utf-8", index=False), df_empty.to_csv(csv_file, encoding="utf-8", index=False)
|
156 |
|
157 |
+
return msg, img_files[0], img_files[1], images[0], images[1], csv_files[0], csv_files[1], df[0], df[1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
158 |
|
159 |
+
# Gradio APP
|
160 |
+
with gr.Blocks(title='Inference APP for Document Understanding at paragraph level (v3 - Ensemble "LiLT + LayoutXLM" base)', css=".gradio-container") as demo:
|
161 |
gr.HTML("""
|
162 |
+
<div style="font-family:'Times New Roman', 'Serif'; font-size:26pt; font-weight:bold; text-align:center;"><h1>Inference APP for Document Understanding at paragraph level (v3 - Ensemble "LiLT + LayoutXLM" base)</h1></div>
|
163 |
+
<div style="margin-top: 40px"><p>(04/04/2023) This Inference APP uses an ensemble of 2 Document Understanding models finetuned on the dataset DocLayNet base at paragraph level (chunk size of 512 tokens) and combined with XLM-RoBERTa base: <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/pierreguillou/lilt-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512" target="_blank">LiLT base</a> and <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/pierreguillou/layout-xlm-base-finetuned-with-DocLayNet-base-at-paragraphlevel-ml512" target="_blank">LayoutXLM base</a>.</p><p>This ensemble calculates the probabilities of each block from the outputs of the models for each label before selecting the label with the highest sum of the normalized probabilities.</p>
|
164 |
+
<p>Note: <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://arxiv.org/abs/2202.13669" target="_blank">LiLT (Language-Independent Layout Transformer)</a> and <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://arxiv.org/abs/2104.08836" target="_blank">LayoutXLM: Multimodal Pre-training for Multilingual Visually-rich Document Understanding</a> are Document Understanding models that use both layout and text in order to detect labels of bounding boxes. Combined with the model <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/xlm-roberta-base" target="_blank">XML-RoBERTa base</a>, this finetuned model has the capacity to <b>understand any language</b>. Finetuned on the dataset <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/datasets/pierreguillou/DocLayNet-base" target="_blank">DocLayNet base</a>, they can <b>classifly any bounding box (and its OCR text) to 11 labels</b> (Caption, Footnote, Formula, List-item, Page-footer, Page-header, Picture, Section-header, Table, Text, Title).</p>
|
165 |
+
<p>They rely on an external OCR engine to get words and bounding boxes from the document image. Thus, let's run in this APP an OCR engine (<a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://github.com/madmaze/pytesseract#python-tesseract" target="_blank">PyTesseract</a>) to get the bounding boxes, then run the 2 models (already fine-tuned on the dataset DocLayNet base at paragraph level) on the individual tokens and then, normalized the sum of block probabilities as explained, and visualize the result at paragraph level!</p>
|
166 |
+
<p><b>It allows to get all pages of any PDF (of any language) with bounding boxes labeled at paragraph level and the associated dataframes with labeled data (bounding boxes, texts, labels) :-)</b></p></div>
|
167 |
+
<div><p>However, the inference time per page can be high when running the model on CPU due to the number of paragraph predictions to be made. Therefore, to avoid running this APP for too long, <b>only the first 2 pages are processed by this APP</b>. If you want to increase this limit, you can either clone this APP in Hugging Face Space (or run its <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://github.com/piegu/language-models/blob/master/Gradio_inference_on_Ensemble_LiLT_&_LayoutXLM_base_model_finetuned_on_DocLayNet_base_in_any_language_at_levelparagraphs_ml512.ipynb" target="_blank">notebook</a> on your own plateform) and change the value of the parameter <code>max_imgboxes</code>, or run the inference notebook "<a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://github.com/piegu/language-models/blob/master/inference_on_Ensemble_LiLT_&_LayoutXLM_base_model_finetuned_on_DocLayNet_base_in_any_language_at_levelparagraphs_ml512.ipynb" target="_blank">Document AI | Inference at paragraph level by using the association of 2 Document Understanding models (LiLT and LayoutXLM base fine-tuned on DocLayNet base dataset)</a>" on your own platform as it does not have this limit.</p></div>
|
168 |
+
<div style="margin-top: 20px"><p>Links to Document Understanding APPs:</p><ul><li>Line level: <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-linelevel-v1" target="_blank">v1 (LiLT base)</a> | <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-linelevel-v2" target="_blank">v2 (LayoutXLM base)</a> | <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-linelevel-v3" target="_blank">v3 (LilT base vs LayoutXLM base)</a></li><li>Paragraph level: <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-paragraphlevel-v1" target="_blank">v1 (LiLT base)</a> | <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-paragraphlevel-v2" target="_blank">v2 (LayoutXLM base)</a> | <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://huggingface.co/spaces/pierreguillou/Inference-APP-Document-Understanding-at-paragraphlevel-v3" target="_blank">v3 (LilT base vs LayoutXLM base)</a></li></ul></div>
|
169 |
+
<div style="margin-top: 20px"><p>More information about the DocLayNet datasets, the finetuning of the model and this APP in the following blog posts:</p>
|
170 |
+
<ul><li>(03/31/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-inference-app-and-fine-tuning-notebook-for-document-understanding-at-paragraph-level-3507af80573d" target="_blank">Document AI | Inference APP and fine-tuning notebook for Document Understanding at paragraph level with LayoutXLM base</a></li><li>(03/25/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-app-to-compare-the-document-understanding-lilt-and-layoutxlm-base-models-at-line-1c53eb481a15" target="_blank">Document AI | APP to compare the Document Understanding LiLT and LayoutXLM (base) models at line level</a></li><li>(03/05/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-inference-app-and-fine-tuning-notebook-for-document-understanding-at-line-level-with-b08fdca5f4dc" target="_blank">Document AI | Inference APP and fine-tuning notebook for Document Understanding at line level with LayoutXLM base</a></li><li>(02/14/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-inference-app-for-document-understanding-at-line-level-a35bbfa98893" target="_blank">Document AI | Inference APP for Document Understanding at line level</a></li><li>(02/10/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-document-understanding-model-at-line-level-with-lilt-tesseract-and-doclaynet-dataset-347107a643b8" target="_blank">Document AI | Document Understanding model at line level with LiLT, Tesseract and DocLayNet dataset</a></li><li>(01/31/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-doclaynet-image-viewer-app-3ac54c19956" target="_blank">Document AI | DocLayNet image viewer APP</a></li><li>(01/27/2023) <a style="text-decoration: none; border-bottom: #64b5f6 0.125em solid; color: #64b5f6" href="https://medium.com/@pierre_guillou/document-ai-processing-of-doclaynet-dataset-to-be-used-by-layout-models-of-the-hugging-face-hub-308d8bd81cdb" target="_blank">Document AI | Processing of DocLayNet dataset to be used by layout models of the Hugging Face hub (finetuning, inference)</a></li></ul></div>
|
171 |
""")
|
172 |
with gr.Row():
|
173 |
pdf_file = gr.File(label="PDF")
|
174 |
with gr.Row():
|
175 |
+
submit_btn = gr.Button(f"Display first {max_imgboxes} labeled PDF pages")
|
176 |
+
reset_btn = gr.Button(value="Clear")
|
177 |
with gr.Row():
|
178 |
+
output_msg = gr.Textbox(label="Output message")
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
with gr.Row():
|
180 |
fileboxes = []
|
181 |
+
for num_page in range(max_imgboxes):
|
182 |
+
file_path = gr.File(visible=True, label=f"Image file of the PDF page n°{num_page}")
|
|
|
|
|
|
|
183 |
fileboxes.append(file_path)
|
184 |
with gr.Row():
|
185 |
imgboxes = []
|
186 |
+
for num_page in range(max_imgboxes):
|
187 |
+
img = gr.Image(type="pil", label=f"Image of the PDF page n°{num_page}")
|
|
|
|
|
|
|
188 |
imgboxes.append(img)
|
189 |
with gr.Row():
|
190 |
csvboxes = []
|
191 |
+
for num_page in range(max_imgboxes):
|
192 |
+
csv = gr.File(visible=True, label=f"CSV file at paragraph level (page {num_page})")
|
|
|
|
|
|
|
193 |
csvboxes.append(csv)
|
194 |
with gr.Row():
|
195 |
dfboxes = []
|
196 |
+
for num_page in range(max_imgboxes):
|
197 |
df = gr.Dataframe(
|
198 |
headers=["bounding boxes", "texts", "labels"],
|
199 |
datatype=["str", "str", "str"],
|
200 |
col_count=(3, "fixed"),
|
201 |
visible=True,
|
202 |
+
label=f"Data of page {num_page}",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
203 |
type="pandas",
|
204 |
wrap=True
|
205 |
)
|
206 |
dfboxes.append(df)
|
207 |
|
208 |
+
outputboxes = [output_msg] + fileboxes + imgboxes + csvboxes + dfboxes
|
|
|
209 |
submit_btn.click(app_outputs, inputs=[pdf_file], outputs=outputboxes)
|
|
|
210 |
# https://github.com/gradio-app/gradio/pull/2044/files#diff-a91dd2749f68bb7d0099a0f4079a4fd2d10281e299e7b451cb1bb876a7c21975R91
|
211 |
reset_btn.click(
|
212 |
+
lambda: [pdf_file.update(value=None), output_msg.update(value=None)] + [filebox.update(value=None) for filebox in fileboxes] + [imgbox.update(value=None) for imgbox in imgboxes] + [csvbox.update(value=None) for csvbox in csvboxes] + [dfbox.update(value=None) for dfbox in dfboxes],
|
213 |
inputs=[],
|
214 |
+
outputs=[pdf_file, output_msg] + fileboxes + imgboxes + csvboxes + dfboxes
|
215 |
)
|
216 |
|
217 |
gr.Examples(
|