use-safetensors

by dolfim-ibm - opened Dec 3, 2024

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+86

-83

Files changed (7) hide show

.gitattributes +0 -2
README.md +73 -73
model_artifacts/{tableformer/accurate/tableformer_accurate.safetensors → layout/beehive_v0.0.5_pt/model.pt} +2 -2
model_artifacts/tableformer/{fast/tableformer_fast.safetensors → fat/otslp_all_standard_094_clean.check} +2 -2
model_artifacts/tableformer/{accurate → fat}/tm_config.json +2 -1
model_artifacts/tableformer/otslp_all_fast.check +3 -0
model_artifacts/tableformer/{fast/tm_config.json → tm_config.json} +2 -1

.gitattributes CHANGED Viewed

@@ -35,5 +35,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 model_artifacts/tableformer/otslp_all_fast.check filter=lfs diff=lfs merge=lfs -text
 model_artifacts/tableformer/fat/otslp_all_standard_094_clean.check filter=lfs diff=lfs merge=lfs -text
-model_artifacts/tableformer/accurate/otslp_all_standard_094_clean.check filter=lfs diff=lfs merge=lfs -text
-model_artifacts/tableformer/fast/otslp_all_fast.check filter=lfs diff=lfs merge=lfs -text

 *tfevents* filter=lfs diff=lfs merge=lfs -text
 model_artifacts/tableformer/otslp_all_fast.check filter=lfs diff=lfs merge=lfs -text
 model_artifacts/tableformer/fat/otslp_all_standard_094_clean.check filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,73 +1,73 @@
----
-license: cdla-permissive-2.0
----
-# Docling Models
-This page contains models that power the PDF document converion package [docling](https://github.com/DS4SD/docling).
-## Layout Model
-The layout model will take an image from a page and apply RT-DETR model in order to find different layout components. It currently detects the labels: Caption, Footnote, Formula, List-item, Page-footer, Page-header, Picture, Section-header, Table, Text, Title. As a reference (from the DocLayNet-paper), this is the performance of standard object detection methods on the DocLayNet dataset compared to human evaluation,
-|                | human   | MRCNN   | MRCNN   | FRCNN   | YOLO   |
-|----------------|---------|---------|---------|---------|--------|
-|                | human   | R50     | R101    | R101    | v5x6   |
-| Caption        | 84-89   | 68.4    | 71.5    | 70.1    | 77.7   |
-| Footnote       | 83-91   | 70.9    | 71.8    | 73.7    | 77.2   |
-| Formula        | 83-85   | 60.1    | 63.4    | 63.5    | 66.2   |
-| List-item      | 87-88   | 81.2    | 80.8    | 81.0    | 86.2   |
-| Page-footer    | 93-94   | 61.6    | 59.3    | 58.9    | 61.1   |
-| Page-header    | 85-89   | 71.9    | 70.0    | 72.0    | 67.9   |
-| Picture        | 69-71   | 71.7    | 72.7    | 72.0    | 77.1   |
-| Section-header | 83-84   | 67.6    | 69.3    | 68.4    | 74.6   |
-| Table          | 77-81   | 82.2    | 82.9    | 82.2    | 86.3   |
-| Text           | 84-86   | 84.6    | 85.8    | 85.4    | 88.1   |
-| Title          | 60-72   | 76.7    | 80.4    | 79.9    | 82.7   |
-| All            | 82-83   | 72.4    | 73.5    | 73.4    | 76.8   |
-## TableFormer
-The tableformer model will identify the structure of the table, starting from an image of a table. It uses the predicted table regions of the layout model to identify the tables. Tableformer has SOTA table structure identification,
-| Model (TEDS) | Simple table | Complex table | All tables |
-| ------------ | ------------ | ------------- | ---------- |
-|       Tabula |         78.0 |          57.8 |       67.9 |
-|    Traprange |         60.8 |          49.9 |       55.4 |
-|      Camelot |         80.0 |          66.0 |       73.0 |
-|  Acrobat Pro |         68.9 |          61.8 |       65.3 |
-|          EDD |         91.2 |          85.4 |       88.3 |
-|  TableFormer |         95.4 |          90.1 |       93.6 |
-## References
-```
-@techreport{Docling,
-  author = {Deep Search Team},
-  month = {8},
-  title = {{Docling Technical Report}},
-  url={https://arxiv.org/abs/2408.09869},
-  eprint={2408.09869},
-  doi = "10.48550/arXiv.2408.09869",
-  version = {1.0.0},
-  year = {2024}
-}
-@article{doclaynet2022,
-  title = {DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis},
-  doi = {10.1145/3534678.353904},
-  url = {https://arxiv.org/abs/2206.01062},
-  author = {Pfitzmann, Birgit and Auer, Christoph and Dolfi, Michele and Nassar, Ahmed S and Staar, Peter W J},
-  year = {2022}
-}
-@InProceedings{TableFormer2022,
-    author    = {Nassar, Ahmed and Livathinos, Nikolaos and Lysak, Maksym and Staar, Peter},
-    title     = {TableFormer: Table Structure Understanding With Transformers},
-    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
-    month     = {June},
-    year      = {2022},
-    pages     = {4614-4623},
-    doi = {https://doi.org/10.1109/CVPR52688.2022.00457}
-}
-```

+---
+license: cdla-permissive-2.0
+---
+# Docling Models
+This page contains models that power the PDF document converion package [docling](https://github.com/DS4SD/docling).
+## Layout Model
+The layout model will take an image from a poge and apply RT-DETR model in order to find different layout components. It currently detects the labels: Caption, Footnote, Formula, List-item, Page-footer, Page-header, Picture, Section-header, Table, Text, Title. As a reference (from the DocLayNet-paper), this is the performance of standard object detection methods on the DocLayNet dataset compared to human evaluation,
+|                | human   | MRCNN   | MRCNN   | FRCNN   | YOLO   |
+|----------------|---------|---------|---------|---------|--------|
+|                | human   | R50     | R101    | R101    | v5x6   |
+| Caption        | 84-89   | 68.4    | 71.5    | 70.1    | 77.7   |
+| Footnote       | 83-91   | 70.9    | 71.8    | 73.7    | 77.2   |
+| Formula        | 83-85   | 60.1    | 63.4    | 63.5    | 66.2   |
+| List-item      | 87-88   | 81.2    | 80.8    | 81.0    | 86.2   |
+| Page-footer    | 93-94   | 61.6    | 59.3    | 58.9    | 61.1   |
+| Page-header    | 85-89   | 71.9    | 70.0    | 72.0    | 67.9   |
+| Picture        | 69-71   | 71.7    | 72.7    | 72.0    | 77.1   |
+| Section-header | 83-84   | 67.6    | 69.3    | 68.4    | 74.6   |
+| Table          | 77-81   | 82.2    | 82.9    | 82.2    | 86.3   |
+| Text           | 84-86   | 84.6    | 85.8    | 85.4    | 88.1   |
+| Title          | 60-72   | 76.7    | 80.4    | 79.9    | 82.7   |
+| All            | 82-83   | 72.4    | 73.5    | 73.4    | 76.8   |
+## TableFormer
+The tableformer model will identify the structure of the table, starting from an image of a table. It uses the predicted table regions of the layout model to identify the tables. Tableformer has SOTA table structure identification,
+| Model (TEDS) | Simple table | Complex table | All tables |
+| ------------ | ------------ | ------------- | ---------- |
+|       Tabula |         78.0 |          57.8 |       67.9 |
+|    Traprange |         60.8 |          49.9 |       55.4 |
+|      Camelot |         80.0 |          66.0 |       73.0 |
+|  Acrobat Pro |         68.9 |          61.8 |       65.3 |
+|          EDD |         91.2 |          85.4 |       88.3 |
+|  TableFormer |         95.4 |          90.1 |       93.6 |
+## References
+```
+@techreport{Docling,
+  author = {Deep Search Team},
+  month = {8},
+  title = {{Docling Technical Report}},
+  url={https://arxiv.org/abs/2408.09869},
+  eprint={2408.09869},
+  doi = "10.48550/arXiv.2408.09869",
+  version = {1.0.0},
+  year = {2024}
+}
+@article{doclaynet2022,
+  title = {DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis},
+  doi = {10.1145/3534678.353904},
+  url = {https://arxiv.org/abs/2206.01062},
+  author = {Pfitzmann, Birgit and Auer, Christoph and Dolfi, Michele and Nassar, Ahmed S and Staar, Peter W J},
+  year = {2022}
+}
+@InProceedings{TableFormer2022,
+    author    = {Nassar, Ahmed and Livathinos, Nikolaos and Lysak, Maksym and Staar, Peter},
+    title     = {TableFormer: Table Structure Understanding With Transformers},
+    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+    month     = {June},
+    year      = {2022},
+    pages     = {4614-4623},
+    doi = {https://doi.org/10.1109/CVPR52688.2022.00457}
+}
+```

model_artifacts/{tableformer/accurate/tableformer_accurate.safetensors → layout/beehive_v0.0.5_pt/model.pt} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2a7d6c924b3cd12fb99a09280ca9c33a89c5d60b93253617d2e088c1a40374d9
-size 212758388

 version https://git-lfs.github.com/spec/v1
+oid sha256:b752ab5d4493457f4da5355952c68573559b5f6f091ff77f2f422b4386634743
+size 201557903

model_artifacts/tableformer/{fast/tableformer_fast.safetensors → fat/otslp_all_standard_094_clean.check} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3119563aab5a7c96fda4d621119b63fd8806272b86c30936d15507616422f718
-size 145453276

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb5ee88f5e411e2a7925837c05de5236bee1934569433b4ab58371ffba3f1da6
+size 212845885

model_artifacts/tableformer/{accurate → fat}/tm_config.json RENAMED Viewed

@@ -61,7 +61,8 @@
     "padding": false,
     "padding_size": 50,
     "disable_post_process": false,
-    "profiling": false
   },
   "debug": {
     "save_debug_images": false

     "padding": false,
     "padding_size": 50,
     "disable_post_process": false,
+    "profiling": false,
+    "device_mode": "auto"
   },
   "debug": {
     "save_debug_images": false

model_artifacts/tableformer/otslp_all_fast.check ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3299cbfd5071aa8414e1fdc5d2703f1061f557d28c741ae28c0a9098a5f67872
+size 145516093

model_artifacts/tableformer/{fast/tm_config.json → tm_config.json} RENAMED Viewed

@@ -61,7 +61,8 @@
     "padding": false,
     "padding_size": 50,
     "disable_post_process": false,
-    "profiling": false
   },
   "debug": {
     "save_debug_images": false

     "padding": false,
     "padding_size": 50,
     "disable_post_process": false,
+    "profiling": false,
+    "device_mode": "auto"
   },
   "debug": {
     "save_debug_images": false