Upload 38 files
Browse files전처리기 Docker 이미지 생성 획일화를 위한 모델
- docling-models/.DS_Store +0 -0
- docling-models/.cache/huggingface/.gitignore +1 -0
- docling-models/.cache/huggingface/download/.gitattributes.lock +0 -0
- docling-models/.cache/huggingface/download/.gitattributes.metadata +3 -0
- docling-models/.cache/huggingface/download/.gitignore.lock +0 -0
- docling-models/.cache/huggingface/download/.gitignore.metadata +3 -0
- docling-models/.cache/huggingface/download/README.md.lock +0 -0
- docling-models/.cache/huggingface/download/README.md.metadata +3 -0
- docling-models/.cache/huggingface/download/config.json.lock +0 -0
- docling-models/.cache/huggingface/download/config.json.metadata +3 -0
- docling-models/.cache/huggingface/download/model_artifacts/layout/config.json.lock +0 -0
- docling-models/.cache/huggingface/download/model_artifacts/layout/config.json.metadata +3 -0
- docling-models/.cache/huggingface/download/model_artifacts/layout/model.safetensors.lock +0 -0
- docling-models/.cache/huggingface/download/model_artifacts/layout/model.safetensors.metadata +3 -0
- docling-models/.cache/huggingface/download/model_artifacts/layout/preprocessor_config.json.lock +0 -0
- docling-models/.cache/huggingface/download/model_artifacts/layout/preprocessor_config.json.metadata +3 -0
- docling-models/.cache/huggingface/download/model_artifacts/tableformer/accurate/tableformer_accurate.safetensors.lock +0 -0
- docling-models/.cache/huggingface/download/model_artifacts/tableformer/accurate/tableformer_accurate.safetensors.metadata +3 -0
- docling-models/.cache/huggingface/download/model_artifacts/tableformer/accurate/tm_config.json.lock +0 -0
- docling-models/.cache/huggingface/download/model_artifacts/tableformer/accurate/tm_config.json.metadata +3 -0
- docling-models/.cache/huggingface/download/model_artifacts/tableformer/fast/tableformer_fast.safetensors.lock +0 -0
- docling-models/.cache/huggingface/download/model_artifacts/tableformer/fast/tableformer_fast.safetensors.metadata +3 -0
- docling-models/.cache/huggingface/download/model_artifacts/tableformer/fast/tm_config.json.lock +0 -0
- docling-models/.cache/huggingface/download/model_artifacts/tableformer/fast/tm_config.json.metadata +3 -0
- docling-models/.gitattributes +39 -0
- docling-models/.gitignore +298 -0
- docling-models/README.md +73 -0
- docling-models/config.json +3 -0
- docling-models/model_artifacts/.DS_Store +0 -0
- docling-models/model_artifacts/layout/.DS_Store +0 -0
- docling-models/model_artifacts/layout/config.json +155 -0
- docling-models/model_artifacts/layout/model.safetensors +3 -0
- docling-models/model_artifacts/layout/preprocessor_config.json +26 -0
- docling-models/model_artifacts/tableformer/.DS_Store +0 -0
- docling-models/model_artifacts/tableformer/accurate/tableformer_accurate.safetensors +3 -0
- docling-models/model_artifacts/tableformer/accurate/tm_config.json +369 -0
- docling-models/model_artifacts/tableformer/fast/tableformer_fast.safetensors +3 -0
- docling-models/model_artifacts/tableformer/fast/tm_config.json +369 -0
docling-models/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
docling-models/.cache/huggingface/.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*
|
docling-models/.cache/huggingface/download/.gitattributes.lock
ADDED
|
File without changes
|
docling-models/.cache/huggingface/download/.gitattributes.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
4659a7d29247f9f7a94102e1f313dad8e8c8f2f6
|
| 2 |
+
575b058b45230d48a06df9b1944f4bd40949db1d
|
| 3 |
+
1752803586.401013
|
docling-models/.cache/huggingface/download/.gitignore.lock
ADDED
|
File without changes
|
docling-models/.cache/huggingface/download/.gitignore.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
4659a7d29247f9f7a94102e1f313dad8e8c8f2f6
|
| 2 |
+
9430eb3c89847aa8f80af5c96df3c348fca6da63
|
| 3 |
+
1752803586.489105
|
docling-models/.cache/huggingface/download/README.md.lock
ADDED
|
File without changes
|
docling-models/.cache/huggingface/download/README.md.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
4659a7d29247f9f7a94102e1f313dad8e8c8f2f6
|
| 2 |
+
7008c0269fece2af4ca4d79d116cf5a783178e58
|
| 3 |
+
1752803586.435715
|
docling-models/.cache/huggingface/download/config.json.lock
ADDED
|
File without changes
|
docling-models/.cache/huggingface/download/config.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
4659a7d29247f9f7a94102e1f313dad8e8c8f2f6
|
| 2 |
+
4dc84c9101238bb955158144660155da99438490
|
| 3 |
+
1752803586.1848562
|
docling-models/.cache/huggingface/download/model_artifacts/layout/config.json.lock
ADDED
|
File without changes
|
docling-models/.cache/huggingface/download/model_artifacts/layout/config.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
4659a7d29247f9f7a94102e1f313dad8e8c8f2f6
|
| 2 |
+
d24c213cfa3d55cc4a301f415b54a19e7ce104cc
|
| 3 |
+
1752803586.474282
|
docling-models/.cache/huggingface/download/model_artifacts/layout/model.safetensors.lock
ADDED
|
File without changes
|
docling-models/.cache/huggingface/download/model_artifacts/layout/model.safetensors.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
4659a7d29247f9f7a94102e1f313dad8e8c8f2f6
|
| 2 |
+
31e60b4709571b613bc8736a9c982fb550d8d7a1809160a68a8282af60c8910b
|
| 3 |
+
1752803586.500781
|
docling-models/.cache/huggingface/download/model_artifacts/layout/preprocessor_config.json.lock
ADDED
|
File without changes
|
docling-models/.cache/huggingface/download/model_artifacts/layout/preprocessor_config.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
4659a7d29247f9f7a94102e1f313dad8e8c8f2f6
|
| 2 |
+
fcdff16b42e5ebc51d28f59184490c6eff91a88b
|
| 3 |
+
1752803586.39888
|
docling-models/.cache/huggingface/download/model_artifacts/tableformer/accurate/tableformer_accurate.safetensors.lock
ADDED
|
File without changes
|
docling-models/.cache/huggingface/download/model_artifacts/tableformer/accurate/tableformer_accurate.safetensors.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
4659a7d29247f9f7a94102e1f313dad8e8c8f2f6
|
| 2 |
+
2a7d6c924b3cd12fb99a09280ca9c33a89c5d60b93253617d2e088c1a40374d9
|
| 3 |
+
1752803586.472338
|
docling-models/.cache/huggingface/download/model_artifacts/tableformer/accurate/tm_config.json.lock
ADDED
|
File without changes
|
docling-models/.cache/huggingface/download/model_artifacts/tableformer/accurate/tm_config.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
4659a7d29247f9f7a94102e1f313dad8e8c8f2f6
|
| 2 |
+
5036aa114b9393f607440cb0f6cc54229b5b30e1
|
| 3 |
+
1752803586.612683
|
docling-models/.cache/huggingface/download/model_artifacts/tableformer/fast/tableformer_fast.safetensors.lock
ADDED
|
File without changes
|
docling-models/.cache/huggingface/download/model_artifacts/tableformer/fast/tableformer_fast.safetensors.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
4659a7d29247f9f7a94102e1f313dad8e8c8f2f6
|
| 2 |
+
3119563aab5a7c96fda4d621119b63fd8806272b86c30936d15507616422f718
|
| 3 |
+
1752803586.746832
|
docling-models/.cache/huggingface/download/model_artifacts/tableformer/fast/tm_config.json.lock
ADDED
|
File without changes
|
docling-models/.cache/huggingface/download/model_artifacts/tableformer/fast/tm_config.json.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
4659a7d29247f9f7a94102e1f313dad8e8c8f2f6
|
| 2 |
+
d02c5889d288b8baefd42e9e4da094f0cea07bbe
|
| 3 |
+
1752803586.813442
|
docling-models/.gitattributes
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
model_artifacts/tableformer/otslp_all_fast.check filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
model_artifacts/tableformer/fat/otslp_all_standard_094_clean.check filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
model_artifacts/tableformer/accurate/otslp_all_standard_094_clean.check filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
model_artifacts/tableformer/fast/otslp_all_fast.check filter=lfs diff=lfs merge=lfs -text
|
docling-models/.gitignore
ADDED
|
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Created by https://www.toptal.com/developers/gitignore/api/macos,visualstudiocode,python,vim,emacs
|
| 2 |
+
# Edit at https://www.toptal.com/developers/gitignore?templates=macos,visualstudiocode,python,vim,emacs
|
| 3 |
+
|
| 4 |
+
### Emacs ###
|
| 5 |
+
# -*- mode: gitignore; -*-
|
| 6 |
+
*~
|
| 7 |
+
\#*\#
|
| 8 |
+
/.emacs.desktop
|
| 9 |
+
/.emacs.desktop.lock
|
| 10 |
+
*.elc
|
| 11 |
+
auto-save-list
|
| 12 |
+
tramp
|
| 13 |
+
.\#*
|
| 14 |
+
|
| 15 |
+
# Org-mode
|
| 16 |
+
.org-id-locations
|
| 17 |
+
*_archive
|
| 18 |
+
|
| 19 |
+
# flymake-mode
|
| 20 |
+
*_flymake.*
|
| 21 |
+
|
| 22 |
+
# eshell files
|
| 23 |
+
/eshell/history
|
| 24 |
+
/eshell/lastdir
|
| 25 |
+
|
| 26 |
+
# elpa packages
|
| 27 |
+
/elpa/
|
| 28 |
+
|
| 29 |
+
# reftex files
|
| 30 |
+
*.rel
|
| 31 |
+
|
| 32 |
+
# AUCTeX auto folder
|
| 33 |
+
/auto/
|
| 34 |
+
|
| 35 |
+
# cask packages
|
| 36 |
+
.cask/
|
| 37 |
+
dist/
|
| 38 |
+
|
| 39 |
+
# Flycheck
|
| 40 |
+
flycheck_*.el
|
| 41 |
+
|
| 42 |
+
# server auth directory
|
| 43 |
+
/server/
|
| 44 |
+
|
| 45 |
+
# projectiles files
|
| 46 |
+
.projectile
|
| 47 |
+
|
| 48 |
+
# directory configuration
|
| 49 |
+
.dir-locals.el
|
| 50 |
+
|
| 51 |
+
# network security
|
| 52 |
+
/network-security.data
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
### macOS ###
|
| 56 |
+
# General
|
| 57 |
+
.DS_Store
|
| 58 |
+
.AppleDouble
|
| 59 |
+
.LSOverride
|
| 60 |
+
|
| 61 |
+
# Icon must end with two \r
|
| 62 |
+
Icon
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# Thumbnails
|
| 66 |
+
._*
|
| 67 |
+
|
| 68 |
+
# Files that might appear in the root of a volume
|
| 69 |
+
.DocumentRevisions-V100
|
| 70 |
+
.fseventsd
|
| 71 |
+
.Spotlight-V100
|
| 72 |
+
.TemporaryItems
|
| 73 |
+
.Trashes
|
| 74 |
+
.VolumeIcon.icns
|
| 75 |
+
.com.apple.timemachine.donotpresent
|
| 76 |
+
|
| 77 |
+
# Directories potentially created on remote AFP share
|
| 78 |
+
.AppleDB
|
| 79 |
+
.AppleDesktop
|
| 80 |
+
Network Trash Folder
|
| 81 |
+
Temporary Items
|
| 82 |
+
.apdisk
|
| 83 |
+
|
| 84 |
+
### macOS Patch ###
|
| 85 |
+
# iCloud generated files
|
| 86 |
+
*.icloud
|
| 87 |
+
|
| 88 |
+
### Python ###
|
| 89 |
+
# Byte-compiled / optimized / DLL files
|
| 90 |
+
__pycache__/
|
| 91 |
+
*.py[cod]
|
| 92 |
+
*$py.class
|
| 93 |
+
|
| 94 |
+
# C extensions
|
| 95 |
+
*.so
|
| 96 |
+
|
| 97 |
+
# Distribution / packaging
|
| 98 |
+
.Python
|
| 99 |
+
build/
|
| 100 |
+
develop-eggs/
|
| 101 |
+
downloads/
|
| 102 |
+
eggs/
|
| 103 |
+
.eggs/
|
| 104 |
+
lib/
|
| 105 |
+
lib64/
|
| 106 |
+
parts/
|
| 107 |
+
sdist/
|
| 108 |
+
var/
|
| 109 |
+
wheels/
|
| 110 |
+
share/python-wheels/
|
| 111 |
+
*.egg-info/
|
| 112 |
+
.installed.cfg
|
| 113 |
+
*.egg
|
| 114 |
+
MANIFEST
|
| 115 |
+
|
| 116 |
+
# PyInstaller
|
| 117 |
+
# Usually these files are written by a python script from a template
|
| 118 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 119 |
+
*.manifest
|
| 120 |
+
*.spec
|
| 121 |
+
|
| 122 |
+
# Installer logs
|
| 123 |
+
pip-log.txt
|
| 124 |
+
pip-delete-this-directory.txt
|
| 125 |
+
|
| 126 |
+
# Unit test / coverage reports
|
| 127 |
+
htmlcov/
|
| 128 |
+
.tox/
|
| 129 |
+
.nox/
|
| 130 |
+
.coverage
|
| 131 |
+
.coverage.*
|
| 132 |
+
.cache
|
| 133 |
+
nosetests.xml
|
| 134 |
+
coverage.xml
|
| 135 |
+
*.cover
|
| 136 |
+
*.py,cover
|
| 137 |
+
.hypothesis/
|
| 138 |
+
.pytest_cache/
|
| 139 |
+
cover/
|
| 140 |
+
|
| 141 |
+
# Translations
|
| 142 |
+
*.mo
|
| 143 |
+
*.pot
|
| 144 |
+
|
| 145 |
+
# Django stuff:
|
| 146 |
+
*.log
|
| 147 |
+
local_settings.py
|
| 148 |
+
db.sqlite3
|
| 149 |
+
db.sqlite3-journal
|
| 150 |
+
|
| 151 |
+
# Flask stuff:
|
| 152 |
+
instance/
|
| 153 |
+
.webassets-cache
|
| 154 |
+
|
| 155 |
+
# Scrapy stuff:
|
| 156 |
+
.scrapy
|
| 157 |
+
|
| 158 |
+
# Sphinx documentation
|
| 159 |
+
docs/_build/
|
| 160 |
+
|
| 161 |
+
# PyBuilder
|
| 162 |
+
.pybuilder/
|
| 163 |
+
target/
|
| 164 |
+
|
| 165 |
+
# Jupyter Notebook
|
| 166 |
+
.ipynb_checkpoints
|
| 167 |
+
|
| 168 |
+
# IPython
|
| 169 |
+
profile_default/
|
| 170 |
+
ipython_config.py
|
| 171 |
+
|
| 172 |
+
# pyenv
|
| 173 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 174 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 175 |
+
# .python-version
|
| 176 |
+
|
| 177 |
+
# pipenv
|
| 178 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 179 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 180 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 181 |
+
# install all needed dependencies.
|
| 182 |
+
#Pipfile.lock
|
| 183 |
+
|
| 184 |
+
# poetry
|
| 185 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 186 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 187 |
+
# commonly ignored for libraries.
|
| 188 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 189 |
+
#poetry.lock
|
| 190 |
+
|
| 191 |
+
# pdm
|
| 192 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 193 |
+
#pdm.lock
|
| 194 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 195 |
+
# in version control.
|
| 196 |
+
# https://pdm.fming.dev/#use-with-ide
|
| 197 |
+
.pdm.toml
|
| 198 |
+
|
| 199 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 200 |
+
__pypackages__/
|
| 201 |
+
|
| 202 |
+
# Celery stuff
|
| 203 |
+
celerybeat-schedule
|
| 204 |
+
celerybeat.pid
|
| 205 |
+
|
| 206 |
+
# SageMath parsed files
|
| 207 |
+
*.sage.py
|
| 208 |
+
|
| 209 |
+
# Environments
|
| 210 |
+
.env
|
| 211 |
+
.venv
|
| 212 |
+
env/
|
| 213 |
+
venv/
|
| 214 |
+
ENV/
|
| 215 |
+
env.bak/
|
| 216 |
+
venv.bak/
|
| 217 |
+
|
| 218 |
+
# Spyder project settings
|
| 219 |
+
.spyderproject
|
| 220 |
+
.spyproject
|
| 221 |
+
|
| 222 |
+
# Rope project settings
|
| 223 |
+
.ropeproject
|
| 224 |
+
|
| 225 |
+
# mkdocs documentation
|
| 226 |
+
/site
|
| 227 |
+
|
| 228 |
+
# mypy
|
| 229 |
+
.mypy_cache/
|
| 230 |
+
.dmypy.json
|
| 231 |
+
dmypy.json
|
| 232 |
+
|
| 233 |
+
# Pyre type checker
|
| 234 |
+
.pyre/
|
| 235 |
+
|
| 236 |
+
# pytype static type analyzer
|
| 237 |
+
.pytype/
|
| 238 |
+
|
| 239 |
+
# Cython debug symbols
|
| 240 |
+
cython_debug/
|
| 241 |
+
|
| 242 |
+
# PyCharm
|
| 243 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 244 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 245 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 246 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 247 |
+
#.idea/
|
| 248 |
+
|
| 249 |
+
### Python Patch ###
|
| 250 |
+
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
| 251 |
+
poetry.toml
|
| 252 |
+
|
| 253 |
+
# ruff
|
| 254 |
+
.ruff_cache/
|
| 255 |
+
|
| 256 |
+
# LSP config files
|
| 257 |
+
pyrightconfig.json
|
| 258 |
+
|
| 259 |
+
### Vim ###
|
| 260 |
+
# Swap
|
| 261 |
+
[._]*.s[a-v][a-z]
|
| 262 |
+
!*.svg # comment out if you don't need vector files
|
| 263 |
+
[._]*.sw[a-p]
|
| 264 |
+
[._]s[a-rt-v][a-z]
|
| 265 |
+
[._]ss[a-gi-z]
|
| 266 |
+
[._]sw[a-p]
|
| 267 |
+
|
| 268 |
+
# Session
|
| 269 |
+
Session.vim
|
| 270 |
+
Sessionx.vim
|
| 271 |
+
|
| 272 |
+
# Temporary
|
| 273 |
+
.netrwhist
|
| 274 |
+
# Auto-generated tag files
|
| 275 |
+
tags
|
| 276 |
+
# Persistent undo
|
| 277 |
+
[._]*.un~
|
| 278 |
+
|
| 279 |
+
### VisualStudioCode ###
|
| 280 |
+
.vscode/*
|
| 281 |
+
!.vscode/settings.json
|
| 282 |
+
!.vscode/tasks.json
|
| 283 |
+
!.vscode/launch.json
|
| 284 |
+
!.vscode/extensions.json
|
| 285 |
+
!.vscode/*.code-snippets
|
| 286 |
+
|
| 287 |
+
# Local History for Visual Studio Code
|
| 288 |
+
.history/
|
| 289 |
+
|
| 290 |
+
# Built Visual Studio Code Extensions
|
| 291 |
+
*.vsix
|
| 292 |
+
|
| 293 |
+
### VisualStudioCode Patch ###
|
| 294 |
+
# Ignore all local history of files
|
| 295 |
+
.history
|
| 296 |
+
.ionide
|
| 297 |
+
|
| 298 |
+
# End of https://www.toptal.com/developers/gitignore/api/macos,visualstudiocode,python,vim,emacs
|
docling-models/README.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: cdla-permissive-2.0
|
| 3 |
+
---
|
| 4 |
+
|
| 5 |
+
# Docling Models
|
| 6 |
+
|
| 7 |
+
This page contains models that power the PDF document converion package [docling](https://github.com/DS4SD/docling).
|
| 8 |
+
|
| 9 |
+
## Layout Model
|
| 10 |
+
|
| 11 |
+
The layout model will take an image from a poge and apply RT-DETR model in order to find different layout components. It currently detects the labels: Caption, Footnote, Formula, List-item, Page-footer, Page-header, Picture, Section-header, Table, Text, Title. As a reference (from the DocLayNet-paper), this is the performance of standard object detection methods on the DocLayNet dataset compared to human evaluation,
|
| 12 |
+
|
| 13 |
+
| | human | MRCNN | MRCNN | FRCNN | YOLO |
|
| 14 |
+
|----------------|---------|---------|---------|---------|--------|
|
| 15 |
+
| | human | R50 | R101 | R101 | v5x6 |
|
| 16 |
+
| Caption | 84-89 | 68.4 | 71.5 | 70.1 | 77.7 |
|
| 17 |
+
| Footnote | 83-91 | 70.9 | 71.8 | 73.7 | 77.2 |
|
| 18 |
+
| Formula | 83-85 | 60.1 | 63.4 | 63.5 | 66.2 |
|
| 19 |
+
| List-item | 87-88 | 81.2 | 80.8 | 81.0 | 86.2 |
|
| 20 |
+
| Page-footer | 93-94 | 61.6 | 59.3 | 58.9 | 61.1 |
|
| 21 |
+
| Page-header | 85-89 | 71.9 | 70.0 | 72.0 | 67.9 |
|
| 22 |
+
| Picture | 69-71 | 71.7 | 72.7 | 72.0 | 77.1 |
|
| 23 |
+
| Section-header | 83-84 | 67.6 | 69.3 | 68.4 | 74.6 |
|
| 24 |
+
| Table | 77-81 | 82.2 | 82.9 | 82.2 | 86.3 |
|
| 25 |
+
| Text | 84-86 | 84.6 | 85.8 | 85.4 | 88.1 |
|
| 26 |
+
| Title | 60-72 | 76.7 | 80.4 | 79.9 | 82.7 |
|
| 27 |
+
| All | 82-83 | 72.4 | 73.5 | 73.4 | 76.8 |
|
| 28 |
+
|
| 29 |
+
## TableFormer
|
| 30 |
+
|
| 31 |
+
The tableformer model will identify the structure of the table, starting from an image of a table. It uses the predicted table regions of the layout model to identify the tables. Tableformer has SOTA table structure identification,
|
| 32 |
+
|
| 33 |
+
| Model (TEDS) | Simple table | Complex table | All tables |
|
| 34 |
+
| ------------ | ------------ | ------------- | ---------- |
|
| 35 |
+
| Tabula | 78.0 | 57.8 | 67.9 |
|
| 36 |
+
| Traprange | 60.8 | 49.9 | 55.4 |
|
| 37 |
+
| Camelot | 80.0 | 66.0 | 73.0 |
|
| 38 |
+
| Acrobat Pro | 68.9 | 61.8 | 65.3 |
|
| 39 |
+
| EDD | 91.2 | 85.4 | 88.3 |
|
| 40 |
+
| TableFormer | 95.4 | 90.1 | 93.6 |
|
| 41 |
+
|
| 42 |
+
## References
|
| 43 |
+
|
| 44 |
+
```
|
| 45 |
+
@techreport{Docling,
|
| 46 |
+
author = {Deep Search Team},
|
| 47 |
+
month = {8},
|
| 48 |
+
title = {{Docling Technical Report}},
|
| 49 |
+
url={https://arxiv.org/abs/2408.09869},
|
| 50 |
+
eprint={2408.09869},
|
| 51 |
+
doi = "10.48550/arXiv.2408.09869",
|
| 52 |
+
version = {1.0.0},
|
| 53 |
+
year = {2024}
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
@article{doclaynet2022,
|
| 57 |
+
title = {DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis},
|
| 58 |
+
doi = {10.1145/3534678.353904},
|
| 59 |
+
url = {https://arxiv.org/abs/2206.01062},
|
| 60 |
+
author = {Pfitzmann, Birgit and Auer, Christoph and Dolfi, Michele and Nassar, Ahmed S and Staar, Peter W J},
|
| 61 |
+
year = {2022}
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
@InProceedings{TableFormer2022,
|
| 65 |
+
author = {Nassar, Ahmed and Livathinos, Nikolaos and Lysak, Maksym and Staar, Peter},
|
| 66 |
+
title = {TableFormer: Table Structure Understanding With Transformers},
|
| 67 |
+
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
| 68 |
+
month = {June},
|
| 69 |
+
year = {2022},
|
| 70 |
+
pages = {4614-4623},
|
| 71 |
+
doi = {https://doi.org/10.1109/CVPR52688.2022.00457}
|
| 72 |
+
}
|
| 73 |
+
```
|
docling-models/config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "docling-models"
|
| 3 |
+
}
|
docling-models/model_artifacts/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
docling-models/model_artifacts/layout/.DS_Store
ADDED
|
Binary file (8.2 kB). View file
|
|
|
docling-models/model_artifacts/layout/config.json
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"activation_dropout": 0.0,
|
| 3 |
+
"activation_function": "silu",
|
| 4 |
+
"anchor_image_size": null,
|
| 5 |
+
"architectures": [
|
| 6 |
+
"RTDetrForObjectDetection"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"auxiliary_loss": true,
|
| 10 |
+
"backbone": null,
|
| 11 |
+
"backbone_config": {
|
| 12 |
+
"depths": [
|
| 13 |
+
3,
|
| 14 |
+
4,
|
| 15 |
+
6,
|
| 16 |
+
3
|
| 17 |
+
],
|
| 18 |
+
"downsample_in_bottleneck": false,
|
| 19 |
+
"downsample_in_first_stage": false,
|
| 20 |
+
"embedding_size": 64,
|
| 21 |
+
"hidden_act": "relu",
|
| 22 |
+
"hidden_sizes": [
|
| 23 |
+
256,
|
| 24 |
+
512,
|
| 25 |
+
1024,
|
| 26 |
+
2048
|
| 27 |
+
],
|
| 28 |
+
"layer_type": "bottleneck",
|
| 29 |
+
"model_type": "rt_detr_resnet",
|
| 30 |
+
"num_channels": 3,
|
| 31 |
+
"out_features": [
|
| 32 |
+
"stage2",
|
| 33 |
+
"stage3",
|
| 34 |
+
"stage4"
|
| 35 |
+
],
|
| 36 |
+
"out_indices": [
|
| 37 |
+
2,
|
| 38 |
+
3,
|
| 39 |
+
4
|
| 40 |
+
],
|
| 41 |
+
"stage_names": [
|
| 42 |
+
"stem",
|
| 43 |
+
"stage1",
|
| 44 |
+
"stage2",
|
| 45 |
+
"stage3",
|
| 46 |
+
"stage4"
|
| 47 |
+
]
|
| 48 |
+
},
|
| 49 |
+
"backbone_kwargs": null,
|
| 50 |
+
"batch_norm_eps": 1e-05,
|
| 51 |
+
"box_noise_scale": 1.0,
|
| 52 |
+
"d_model": 256,
|
| 53 |
+
"decoder_activation_function": "relu",
|
| 54 |
+
"decoder_attention_heads": 8,
|
| 55 |
+
"decoder_ffn_dim": 1024,
|
| 56 |
+
"decoder_in_channels": [
|
| 57 |
+
256,
|
| 58 |
+
256,
|
| 59 |
+
256
|
| 60 |
+
],
|
| 61 |
+
"decoder_layers": 6,
|
| 62 |
+
"decoder_n_points": 4,
|
| 63 |
+
"disable_custom_kernels": true,
|
| 64 |
+
"dropout": 0.0,
|
| 65 |
+
"encode_proj_layers": [
|
| 66 |
+
2
|
| 67 |
+
],
|
| 68 |
+
"encoder_activation_function": "gelu",
|
| 69 |
+
"encoder_attention_heads": 8,
|
| 70 |
+
"encoder_ffn_dim": 1024,
|
| 71 |
+
"encoder_hidden_dim": 256,
|
| 72 |
+
"encoder_in_channels": [
|
| 73 |
+
512,
|
| 74 |
+
1024,
|
| 75 |
+
2048
|
| 76 |
+
],
|
| 77 |
+
"encoder_layers": 1,
|
| 78 |
+
"eos_coefficient": 0.0001,
|
| 79 |
+
"eval_size": null,
|
| 80 |
+
"feat_strides": [
|
| 81 |
+
8,
|
| 82 |
+
16,
|
| 83 |
+
32
|
| 84 |
+
],
|
| 85 |
+
"focal_loss_alpha": 0.75,
|
| 86 |
+
"focal_loss_gamma": 2.0,
|
| 87 |
+
"freeze_backbone_batch_norms": true,
|
| 88 |
+
"hidden_expansion": 1.0,
|
| 89 |
+
"id2label": {
|
| 90 |
+
"0": "background",
|
| 91 |
+
"1": "Caption",
|
| 92 |
+
"2": "Footnote",
|
| 93 |
+
"3": "Formula",
|
| 94 |
+
"4": "List-item",
|
| 95 |
+
"5": "Page-footer",
|
| 96 |
+
"6": "Page-header",
|
| 97 |
+
"7": "Picture",
|
| 98 |
+
"8": "Section-header",
|
| 99 |
+
"9": "Table",
|
| 100 |
+
"10": "Text",
|
| 101 |
+
"11": "Title",
|
| 102 |
+
"12": "Document Index",
|
| 103 |
+
"13": "Code",
|
| 104 |
+
"14": "Checkbox-Selected",
|
| 105 |
+
"15": "Checkbox-Unselected",
|
| 106 |
+
"16": "Form",
|
| 107 |
+
"17": "Key-Value Region"
|
| 108 |
+
},
|
| 109 |
+
"initializer_bias_prior_prob": null,
|
| 110 |
+
"initializer_range": 0.01,
|
| 111 |
+
"is_encoder_decoder": true,
|
| 112 |
+
"label2id": {
|
| 113 |
+
"Caption": "1",
|
| 114 |
+
"Checkbox-Selected": "14",
|
| 115 |
+
"Checkbox-Unselected": "15",
|
| 116 |
+
"Code": "13",
|
| 117 |
+
"Document Index": "12",
|
| 118 |
+
"Footnote": "2",
|
| 119 |
+
"Form": "16",
|
| 120 |
+
"Formula": "3",
|
| 121 |
+
"Key-Value Region": "17",
|
| 122 |
+
"List-item": "4",
|
| 123 |
+
"Page-footer": "5",
|
| 124 |
+
"Page-header": "6",
|
| 125 |
+
"Picture": "7",
|
| 126 |
+
"Section-header": "8",
|
| 127 |
+
"Table": "9",
|
| 128 |
+
"Text": "10",
|
| 129 |
+
"Title": "11",
|
| 130 |
+
"background": "0"
|
| 131 |
+
},
|
| 132 |
+
"label_noise_ratio": 0.5,
|
| 133 |
+
"layer_norm_eps": 1e-05,
|
| 134 |
+
"learn_initial_query": false,
|
| 135 |
+
"matcher_alpha": 0.25,
|
| 136 |
+
"matcher_bbox_cost": 5.0,
|
| 137 |
+
"matcher_class_cost": 2.0,
|
| 138 |
+
"matcher_gamma": 2.0,
|
| 139 |
+
"matcher_giou_cost": 2.0,
|
| 140 |
+
"model_type": "rt_detr",
|
| 141 |
+
"normalize_before": false,
|
| 142 |
+
"num_denoising": 100,
|
| 143 |
+
"num_feature_levels": 3,
|
| 144 |
+
"num_queries": 300,
|
| 145 |
+
"positional_encoding_temperature": 10000,
|
| 146 |
+
"torch_dtype": "float32",
|
| 147 |
+
"transformers_version": "4.51.3",
|
| 148 |
+
"use_focal_loss": true,
|
| 149 |
+
"use_pretrained_backbone": false,
|
| 150 |
+
"use_timm_backbone": false,
|
| 151 |
+
"weight_loss_bbox": 5.0,
|
| 152 |
+
"weight_loss_giou": 2.0,
|
| 153 |
+
"weight_loss_vfl": 1.0,
|
| 154 |
+
"with_box_refine": true
|
| 155 |
+
}
|
docling-models/model_artifacts/layout/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f234e230f8d332f880a81e5e652d3bae326d2e52802a07439e678b16587f3c75
|
| 3 |
+
size 171666216
|
docling-models/model_artifacts/layout/preprocessor_config.json
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_convert_annotations": true,
|
| 3 |
+
"do_normalize": false,
|
| 4 |
+
"do_pad": false,
|
| 5 |
+
"do_rescale": true,
|
| 6 |
+
"do_resize": true,
|
| 7 |
+
"format": "coco_detection",
|
| 8 |
+
"image_mean": [
|
| 9 |
+
0.485,
|
| 10 |
+
0.456,
|
| 11 |
+
0.406
|
| 12 |
+
],
|
| 13 |
+
"image_processor_type": "RTDetrImageProcessor",
|
| 14 |
+
"image_std": [
|
| 15 |
+
0.229,
|
| 16 |
+
0.224,
|
| 17 |
+
0.225
|
| 18 |
+
],
|
| 19 |
+
"pad_size": null,
|
| 20 |
+
"resample": 2,
|
| 21 |
+
"rescale_factor": 0.00392156862745098,
|
| 22 |
+
"size": {
|
| 23 |
+
"height": 640,
|
| 24 |
+
"width": 640
|
| 25 |
+
}
|
| 26 |
+
}
|
docling-models/model_artifacts/tableformer/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
docling-models/model_artifacts/tableformer/accurate/tableformer_accurate.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a7d6c924b3cd12fb99a09280ca9c33a89c5d60b93253617d2e088c1a40374d9
|
| 3 |
+
size 212758388
|
docling-models/model_artifacts/tableformer/accurate/tm_config.json
ADDED
|
@@ -0,0 +1,369 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": {
|
| 3 |
+
"type": "PTN_prepared",
|
| 4 |
+
"name": "PubTabNet_300_100_512",
|
| 5 |
+
"raw_data_dir": "./tests/test_data/ccs_api/model/",
|
| 6 |
+
"load_cells": true,
|
| 7 |
+
"bbox_format": "5plet",
|
| 8 |
+
"resized_image": 448,
|
| 9 |
+
"keep_AR": false,
|
| 10 |
+
"up_scaling_enabled": true,
|
| 11 |
+
"down_scaling_enabled": true,
|
| 12 |
+
"padding_mode": "null",
|
| 13 |
+
"padding_color": [
|
| 14 |
+
0,
|
| 15 |
+
0,
|
| 16 |
+
0
|
| 17 |
+
],
|
| 18 |
+
"image_normalization": {
|
| 19 |
+
"state": true,
|
| 20 |
+
"mean": [
|
| 21 |
+
0.94247851,
|
| 22 |
+
0.94254675,
|
| 23 |
+
0.94292611
|
| 24 |
+
],
|
| 25 |
+
"std": [
|
| 26 |
+
0.17910956,
|
| 27 |
+
0.17940403,
|
| 28 |
+
0.17931663
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
"color_jitter": true,
|
| 32 |
+
"rand_crop": true,
|
| 33 |
+
"rand_pad": true,
|
| 34 |
+
"image_grayscale": false
|
| 35 |
+
},
|
| 36 |
+
"model": {
|
| 37 |
+
"type": "TableModel04_rs",
|
| 38 |
+
"name": "14_128_256_4_true",
|
| 39 |
+
"backbone": "resnet18",
|
| 40 |
+
"enc_image_size": 28,
|
| 41 |
+
"tag_embed_dim": 16,
|
| 42 |
+
"hidden_dim": 512,
|
| 43 |
+
"tag_decoder_dim": 512,
|
| 44 |
+
"bbox_embed_dim": 256,
|
| 45 |
+
"tag_attention_dim": 256,
|
| 46 |
+
"bbox_attention_dim": 512,
|
| 47 |
+
"enc_layers": 6,
|
| 48 |
+
"dec_layers": 6,
|
| 49 |
+
"nheads": 8,
|
| 50 |
+
"dropout": 0.1,
|
| 51 |
+
"bbox_classes": 2
|
| 52 |
+
},
|
| 53 |
+
"train": {
|
| 54 |
+
"bbox": true
|
| 55 |
+
},
|
| 56 |
+
"predict": {
|
| 57 |
+
"max_steps": 1024,
|
| 58 |
+
"beam_size": 5,
|
| 59 |
+
"bbox": true,
|
| 60 |
+
"pdf_cell_iou_thres": 0.05,
|
| 61 |
+
"padding": false,
|
| 62 |
+
"padding_size": 50,
|
| 63 |
+
"disable_post_process": false,
|
| 64 |
+
"profiling": false
|
| 65 |
+
},
|
| 66 |
+
"debug": {
|
| 67 |
+
"save_debug_images": false
|
| 68 |
+
},
|
| 69 |
+
"dataset_wordmap": {
|
| 70 |
+
"word_map_tag": {
|
| 71 |
+
"<pad>": 0,
|
| 72 |
+
"<unk>": 1,
|
| 73 |
+
"<start>": 2,
|
| 74 |
+
"<end>": 3,
|
| 75 |
+
"ecel": 4,
|
| 76 |
+
"fcel": 5,
|
| 77 |
+
"lcel": 6,
|
| 78 |
+
"ucel": 7,
|
| 79 |
+
"xcel": 8,
|
| 80 |
+
"nl": 9,
|
| 81 |
+
"ched": 10,
|
| 82 |
+
"rhed": 11,
|
| 83 |
+
"srow": 12
|
| 84 |
+
},
|
| 85 |
+
"word_map_cell": {
|
| 86 |
+
" ": 13,
|
| 87 |
+
"!": 179,
|
| 88 |
+
"\"": 126,
|
| 89 |
+
"#": 101,
|
| 90 |
+
"$": 119,
|
| 91 |
+
"%": 18,
|
| 92 |
+
"&": 114,
|
| 93 |
+
"'": 108,
|
| 94 |
+
"(": 29,
|
| 95 |
+
")": 32,
|
| 96 |
+
"*": 26,
|
| 97 |
+
"+": 97,
|
| 98 |
+
",": 71,
|
| 99 |
+
"-": 63,
|
| 100 |
+
".": 34,
|
| 101 |
+
"/": 66,
|
| 102 |
+
"0": 33,
|
| 103 |
+
"1": 36,
|
| 104 |
+
"2": 43,
|
| 105 |
+
"3": 41,
|
| 106 |
+
"4": 45,
|
| 107 |
+
"5": 17,
|
| 108 |
+
"6": 37,
|
| 109 |
+
"7": 35,
|
| 110 |
+
"8": 40,
|
| 111 |
+
"9": 16,
|
| 112 |
+
":": 88,
|
| 113 |
+
";": 92,
|
| 114 |
+
"<": 73,
|
| 115 |
+
"</b>": 9,
|
| 116 |
+
"</i>": 23,
|
| 117 |
+
"</overline>": 219,
|
| 118 |
+
"</strike>": 233,
|
| 119 |
+
"</sub>": 94,
|
| 120 |
+
"</sup>": 77,
|
| 121 |
+
"</underline>": 151,
|
| 122 |
+
"<b>": 1,
|
| 123 |
+
"<end>": 280,
|
| 124 |
+
"<i>": 21,
|
| 125 |
+
"<overline>": 218,
|
| 126 |
+
"<pad>": 0,
|
| 127 |
+
"<start>": 279,
|
| 128 |
+
"<strike>": 232,
|
| 129 |
+
"<sub>": 93,
|
| 130 |
+
"<sup>": 75,
|
| 131 |
+
"<underline>": 150,
|
| 132 |
+
"<unk>": 278,
|
| 133 |
+
"=": 99,
|
| 134 |
+
">": 39,
|
| 135 |
+
"?": 96,
|
| 136 |
+
"@": 125,
|
| 137 |
+
"A": 27,
|
| 138 |
+
"B": 86,
|
| 139 |
+
"C": 19,
|
| 140 |
+
"D": 57,
|
| 141 |
+
"E": 64,
|
| 142 |
+
"F": 47,
|
| 143 |
+
"G": 44,
|
| 144 |
+
"H": 10,
|
| 145 |
+
"I": 20,
|
| 146 |
+
"J": 80,
|
| 147 |
+
"K": 81,
|
| 148 |
+
"L": 52,
|
| 149 |
+
"M": 46,
|
| 150 |
+
"N": 69,
|
| 151 |
+
"O": 65,
|
| 152 |
+
"P": 62,
|
| 153 |
+
"Q": 59,
|
| 154 |
+
"R": 60,
|
| 155 |
+
"S": 58,
|
| 156 |
+
"T": 48,
|
| 157 |
+
"U": 55,
|
| 158 |
+
"V": 2,
|
| 159 |
+
"W": 83,
|
| 160 |
+
"X": 104,
|
| 161 |
+
"Y": 89,
|
| 162 |
+
"Z": 113,
|
| 163 |
+
"[": 70,
|
| 164 |
+
"\\": 165,
|
| 165 |
+
"]": 72,
|
| 166 |
+
"^": 132,
|
| 167 |
+
"_": 84,
|
| 168 |
+
"`": 196,
|
| 169 |
+
"a": 3,
|
| 170 |
+
"b": 6,
|
| 171 |
+
"c": 54,
|
| 172 |
+
"d": 12,
|
| 173 |
+
"e": 8,
|
| 174 |
+
"f": 50,
|
| 175 |
+
"g": 28,
|
| 176 |
+
"h": 56,
|
| 177 |
+
"i": 5,
|
| 178 |
+
"j": 82,
|
| 179 |
+
"k": 95,
|
| 180 |
+
"l": 7,
|
| 181 |
+
"m": 30,
|
| 182 |
+
"n": 31,
|
| 183 |
+
"o": 15,
|
| 184 |
+
"p": 22,
|
| 185 |
+
"q": 67,
|
| 186 |
+
"r": 4,
|
| 187 |
+
"s": 51,
|
| 188 |
+
"t": 14,
|
| 189 |
+
"u": 25,
|
| 190 |
+
"v": 24,
|
| 191 |
+
"w": 53,
|
| 192 |
+
"x": 61,
|
| 193 |
+
"y": 49,
|
| 194 |
+
"z": 11,
|
| 195 |
+
"{": 158,
|
| 196 |
+
"|": 139,
|
| 197 |
+
"}": 159,
|
| 198 |
+
"~": 147,
|
| 199 |
+
"\u00a2": 203,
|
| 200 |
+
"\u00a3": 162,
|
| 201 |
+
"\u00a4": 220,
|
| 202 |
+
"\u00a5": 176,
|
| 203 |
+
"\u00a7": 142,
|
| 204 |
+
"\u00a9": 268,
|
| 205 |
+
"\u00ab": 239,
|
| 206 |
+
"\u00ad": 275,
|
| 207 |
+
"\u00ae": 130,
|
| 208 |
+
"\u00b0": 100,
|
| 209 |
+
"\u00b1": 79,
|
| 210 |
+
"\u00b6": 171,
|
| 211 |
+
"\u00b7": 137,
|
| 212 |
+
"\u00bb": 240,
|
| 213 |
+
"\u00d7": 118,
|
| 214 |
+
"\u00d8": 192,
|
| 215 |
+
"\u00df": 197,
|
| 216 |
+
"\u00e6": 261,
|
| 217 |
+
"\u00f7": 225,
|
| 218 |
+
"\u00f8": 163,
|
| 219 |
+
"\u0131": 242,
|
| 220 |
+
"\u0142": 267,
|
| 221 |
+
"\u01c2": 211,
|
| 222 |
+
"\u025b": 223,
|
| 223 |
+
"\u02b9": 248,
|
| 224 |
+
"\u02c2": 195,
|
| 225 |
+
"\u02c3": 208,
|
| 226 |
+
"\u02c6": 253,
|
| 227 |
+
"\u0300": 209,
|
| 228 |
+
"\u0301": 131,
|
| 229 |
+
"\u0302": 138,
|
| 230 |
+
"\u0303": 156,
|
| 231 |
+
"\u0304": 152,
|
| 232 |
+
"\u0306": 222,
|
| 233 |
+
"\u0307": 247,
|
| 234 |
+
"\u0308": 103,
|
| 235 |
+
"\u030a": 102,
|
| 236 |
+
"\u030c": 254,
|
| 237 |
+
"\u0327": 155,
|
| 238 |
+
"\u0328": 269,
|
| 239 |
+
"\u0338": 170,
|
| 240 |
+
"\u0391": 173,
|
| 241 |
+
"\u0392": 169,
|
| 242 |
+
"\u0393": 180,
|
| 243 |
+
"\u0394": 85,
|
| 244 |
+
"\u0398": 243,
|
| 245 |
+
"\u0399": 271,
|
| 246 |
+
"\u039b": 272,
|
| 247 |
+
"\u03a0": 213,
|
| 248 |
+
"\u03a3": 185,
|
| 249 |
+
"\u03a6": 148,
|
| 250 |
+
"\u03a7": 212,
|
| 251 |
+
"\u03a8": 141,
|
| 252 |
+
"\u03a9": 161,
|
| 253 |
+
"\u03b1": 90,
|
| 254 |
+
"\u03b2": 107,
|
| 255 |
+
"\u03b3": 110,
|
| 256 |
+
"\u03b4": 153,
|
| 257 |
+
"\u03b5": 166,
|
| 258 |
+
"\u03b6": 178,
|
| 259 |
+
"\u03b7": 146,
|
| 260 |
+
"\u03b8": 186,
|
| 261 |
+
"\u03b9": 229,
|
| 262 |
+
"\u03ba": 164,
|
| 263 |
+
"\u03bb": 91,
|
| 264 |
+
"\u03bc": 78,
|
| 265 |
+
"\u03bd": 230,
|
| 266 |
+
"\u03be": 244,
|
| 267 |
+
"\u03c0": 127,
|
| 268 |
+
"\u03c1": 149,
|
| 269 |
+
"\u03c3": 116,
|
| 270 |
+
"\u03c4": 198,
|
| 271 |
+
"\u03c5": 189,
|
| 272 |
+
"\u03c6": 140,
|
| 273 |
+
"\u03c7": 124,
|
| 274 |
+
"\u03c8": 216,
|
| 275 |
+
"\u03c9": 167,
|
| 276 |
+
"\u0410": 273,
|
| 277 |
+
"\u0421": 194,
|
| 278 |
+
"\u115f": 217,
|
| 279 |
+
"\u200b": 265,
|
| 280 |
+
"\u2010": 117,
|
| 281 |
+
"\u2012": 135,
|
| 282 |
+
"\u2013": 42,
|
| 283 |
+
"\u2014": 106,
|
| 284 |
+
"\u2015": 228,
|
| 285 |
+
"\u2016": 259,
|
| 286 |
+
"\u2018": 123,
|
| 287 |
+
"\u2019": 121,
|
| 288 |
+
"\u201c": 87,
|
| 289 |
+
"\u201d": 115,
|
| 290 |
+
"\u201e": 245,
|
| 291 |
+
"\u2020": 109,
|
| 292 |
+
"\u2021": 129,
|
| 293 |
+
"\u2022": 128,
|
| 294 |
+
"\u2028": 190,
|
| 295 |
+
"\u2030": 154,
|
| 296 |
+
"\u2032": 68,
|
| 297 |
+
"\u203b": 224,
|
| 298 |
+
"\u2044": 188,
|
| 299 |
+
"\u204e": 199,
|
| 300 |
+
"\u2061": 200,
|
| 301 |
+
"\u20ac": 184,
|
| 302 |
+
"\u2190": 202,
|
| 303 |
+
"\u2191": 112,
|
| 304 |
+
"\u2192": 120,
|
| 305 |
+
"\u2193": 111,
|
| 306 |
+
"\u2194": 183,
|
| 307 |
+
"\u21d1": 266,
|
| 308 |
+
"\u21d2": 264,
|
| 309 |
+
"\u21d3": 255,
|
| 310 |
+
"\u2205": 215,
|
| 311 |
+
"\u2206": 175,
|
| 312 |
+
"\u2208": 262,
|
| 313 |
+
"\u2211": 160,
|
| 314 |
+
"\u2212": 76,
|
| 315 |
+
"\u2216": 206,
|
| 316 |
+
"\u2217": 105,
|
| 317 |
+
"\u2218": 246,
|
| 318 |
+
"\u2219": 236,
|
| 319 |
+
"\u221a": 187,
|
| 320 |
+
"\u221e": 207,
|
| 321 |
+
"\u2223": 260,
|
| 322 |
+
"\u2225": 193,
|
| 323 |
+
"\u2227": 182,
|
| 324 |
+
"\u2229": 256,
|
| 325 |
+
"\u222b": 258,
|
| 326 |
+
"\u223c": 98,
|
| 327 |
+
"\u2248": 210,
|
| 328 |
+
"\u2264": 38,
|
| 329 |
+
"\u2265": 74,
|
| 330 |
+
"\u2266": 214,
|
| 331 |
+
"\u2267": 181,
|
| 332 |
+
"\u2295": 263,
|
| 333 |
+
"\u22c5": 174,
|
| 334 |
+
"\u22c6": 191,
|
| 335 |
+
"\u22ee": 277,
|
| 336 |
+
"\u22ef": 270,
|
| 337 |
+
"\u2500": 205,
|
| 338 |
+
"\u2551": 231,
|
| 339 |
+
"\u25a0": 250,
|
| 340 |
+
"\u25a1": 177,
|
| 341 |
+
"\u25aa": 145,
|
| 342 |
+
"\u25b2": 136,
|
| 343 |
+
"\u25b3": 143,
|
| 344 |
+
"\u25bc": 251,
|
| 345 |
+
"\u25c6": 226,
|
| 346 |
+
"\u25ca": 235,
|
| 347 |
+
"\u25cb": 227,
|
| 348 |
+
"\u25cf": 172,
|
| 349 |
+
"\u25e6": 274,
|
| 350 |
+
"\u2605": 204,
|
| 351 |
+
"\u2606": 144,
|
| 352 |
+
"\u2640": 133,
|
| 353 |
+
"\u2642": 134,
|
| 354 |
+
"\u2663": 252,
|
| 355 |
+
"\u2666": 157,
|
| 356 |
+
"\u266f": 221,
|
| 357 |
+
"\u2713": 122,
|
| 358 |
+
"\u2714": 249,
|
| 359 |
+
"\u2717": 201,
|
| 360 |
+
"\u2794": 168,
|
| 361 |
+
"\u27a2": 276,
|
| 362 |
+
"\u2a7d": 234,
|
| 363 |
+
"\u2a7e": 241,
|
| 364 |
+
"\u3008": 237,
|
| 365 |
+
"\u3009": 238,
|
| 366 |
+
"\ufeff": 257
|
| 367 |
+
}
|
| 368 |
+
}
|
| 369 |
+
}
|
docling-models/model_artifacts/tableformer/fast/tableformer_fast.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3119563aab5a7c96fda4d621119b63fd8806272b86c30936d15507616422f718
|
| 3 |
+
size 145453276
|
docling-models/model_artifacts/tableformer/fast/tm_config.json
ADDED
|
@@ -0,0 +1,369 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset": {
|
| 3 |
+
"type": "PTN_prepared",
|
| 4 |
+
"name": "PubTabNet_300_100_512",
|
| 5 |
+
"raw_data_dir": "./tests/test_data/ccs_api/model/",
|
| 6 |
+
"load_cells": true,
|
| 7 |
+
"bbox_format": "5plet",
|
| 8 |
+
"resized_image": 448,
|
| 9 |
+
"keep_AR": false,
|
| 10 |
+
"up_scaling_enabled": true,
|
| 11 |
+
"down_scaling_enabled": true,
|
| 12 |
+
"padding_mode": "null",
|
| 13 |
+
"padding_color": [
|
| 14 |
+
0,
|
| 15 |
+
0,
|
| 16 |
+
0
|
| 17 |
+
],
|
| 18 |
+
"image_normalization": {
|
| 19 |
+
"state": true,
|
| 20 |
+
"mean": [
|
| 21 |
+
0.94247851,
|
| 22 |
+
0.94254675,
|
| 23 |
+
0.94292611
|
| 24 |
+
],
|
| 25 |
+
"std": [
|
| 26 |
+
0.17910956,
|
| 27 |
+
0.17940403,
|
| 28 |
+
0.17931663
|
| 29 |
+
]
|
| 30 |
+
},
|
| 31 |
+
"color_jitter": true,
|
| 32 |
+
"rand_crop": true,
|
| 33 |
+
"rand_pad": true,
|
| 34 |
+
"image_grayscale": false
|
| 35 |
+
},
|
| 36 |
+
"model": {
|
| 37 |
+
"type": "TableModel04_rs",
|
| 38 |
+
"name": "14_128_256_4_true",
|
| 39 |
+
"backbone": "resnet18",
|
| 40 |
+
"enc_image_size": 28,
|
| 41 |
+
"tag_embed_dim": 16,
|
| 42 |
+
"hidden_dim": 512,
|
| 43 |
+
"tag_decoder_dim": 512,
|
| 44 |
+
"bbox_embed_dim": 256,
|
| 45 |
+
"tag_attention_dim": 256,
|
| 46 |
+
"bbox_attention_dim": 512,
|
| 47 |
+
"enc_layers": 4,
|
| 48 |
+
"dec_layers": 2,
|
| 49 |
+
"nheads": 8,
|
| 50 |
+
"dropout": 0.1,
|
| 51 |
+
"bbox_classes": 2
|
| 52 |
+
},
|
| 53 |
+
"train": {
|
| 54 |
+
"bbox": true
|
| 55 |
+
},
|
| 56 |
+
"predict": {
|
| 57 |
+
"max_steps": 1024,
|
| 58 |
+
"beam_size": 5,
|
| 59 |
+
"bbox": true,
|
| 60 |
+
"pdf_cell_iou_thres": 0.05,
|
| 61 |
+
"padding": false,
|
| 62 |
+
"padding_size": 50,
|
| 63 |
+
"disable_post_process": false,
|
| 64 |
+
"profiling": false
|
| 65 |
+
},
|
| 66 |
+
"debug": {
|
| 67 |
+
"save_debug_images": false
|
| 68 |
+
},
|
| 69 |
+
"dataset_wordmap": {
|
| 70 |
+
"word_map_tag": {
|
| 71 |
+
"<pad>": 0,
|
| 72 |
+
"<unk>": 1,
|
| 73 |
+
"<start>": 2,
|
| 74 |
+
"<end>": 3,
|
| 75 |
+
"ecel": 4,
|
| 76 |
+
"fcel": 5,
|
| 77 |
+
"lcel": 6,
|
| 78 |
+
"ucel": 7,
|
| 79 |
+
"xcel": 8,
|
| 80 |
+
"nl": 9,
|
| 81 |
+
"ched": 10,
|
| 82 |
+
"rhed": 11,
|
| 83 |
+
"srow": 12
|
| 84 |
+
},
|
| 85 |
+
"word_map_cell": {
|
| 86 |
+
" ": 13,
|
| 87 |
+
"!": 179,
|
| 88 |
+
"\"": 126,
|
| 89 |
+
"#": 101,
|
| 90 |
+
"$": 119,
|
| 91 |
+
"%": 18,
|
| 92 |
+
"&": 114,
|
| 93 |
+
"'": 108,
|
| 94 |
+
"(": 29,
|
| 95 |
+
")": 32,
|
| 96 |
+
"*": 26,
|
| 97 |
+
"+": 97,
|
| 98 |
+
",": 71,
|
| 99 |
+
"-": 63,
|
| 100 |
+
".": 34,
|
| 101 |
+
"/": 66,
|
| 102 |
+
"0": 33,
|
| 103 |
+
"1": 36,
|
| 104 |
+
"2": 43,
|
| 105 |
+
"3": 41,
|
| 106 |
+
"4": 45,
|
| 107 |
+
"5": 17,
|
| 108 |
+
"6": 37,
|
| 109 |
+
"7": 35,
|
| 110 |
+
"8": 40,
|
| 111 |
+
"9": 16,
|
| 112 |
+
":": 88,
|
| 113 |
+
";": 92,
|
| 114 |
+
"<": 73,
|
| 115 |
+
"</b>": 9,
|
| 116 |
+
"</i>": 23,
|
| 117 |
+
"</overline>": 219,
|
| 118 |
+
"</strike>": 233,
|
| 119 |
+
"</sub>": 94,
|
| 120 |
+
"</sup>": 77,
|
| 121 |
+
"</underline>": 151,
|
| 122 |
+
"<b>": 1,
|
| 123 |
+
"<end>": 280,
|
| 124 |
+
"<i>": 21,
|
| 125 |
+
"<overline>": 218,
|
| 126 |
+
"<pad>": 0,
|
| 127 |
+
"<start>": 279,
|
| 128 |
+
"<strike>": 232,
|
| 129 |
+
"<sub>": 93,
|
| 130 |
+
"<sup>": 75,
|
| 131 |
+
"<underline>": 150,
|
| 132 |
+
"<unk>": 278,
|
| 133 |
+
"=": 99,
|
| 134 |
+
">": 39,
|
| 135 |
+
"?": 96,
|
| 136 |
+
"@": 125,
|
| 137 |
+
"A": 27,
|
| 138 |
+
"B": 86,
|
| 139 |
+
"C": 19,
|
| 140 |
+
"D": 57,
|
| 141 |
+
"E": 64,
|
| 142 |
+
"F": 47,
|
| 143 |
+
"G": 44,
|
| 144 |
+
"H": 10,
|
| 145 |
+
"I": 20,
|
| 146 |
+
"J": 80,
|
| 147 |
+
"K": 81,
|
| 148 |
+
"L": 52,
|
| 149 |
+
"M": 46,
|
| 150 |
+
"N": 69,
|
| 151 |
+
"O": 65,
|
| 152 |
+
"P": 62,
|
| 153 |
+
"Q": 59,
|
| 154 |
+
"R": 60,
|
| 155 |
+
"S": 58,
|
| 156 |
+
"T": 48,
|
| 157 |
+
"U": 55,
|
| 158 |
+
"V": 2,
|
| 159 |
+
"W": 83,
|
| 160 |
+
"X": 104,
|
| 161 |
+
"Y": 89,
|
| 162 |
+
"Z": 113,
|
| 163 |
+
"[": 70,
|
| 164 |
+
"\\": 165,
|
| 165 |
+
"]": 72,
|
| 166 |
+
"^": 132,
|
| 167 |
+
"_": 84,
|
| 168 |
+
"`": 196,
|
| 169 |
+
"a": 3,
|
| 170 |
+
"b": 6,
|
| 171 |
+
"c": 54,
|
| 172 |
+
"d": 12,
|
| 173 |
+
"e": 8,
|
| 174 |
+
"f": 50,
|
| 175 |
+
"g": 28,
|
| 176 |
+
"h": 56,
|
| 177 |
+
"i": 5,
|
| 178 |
+
"j": 82,
|
| 179 |
+
"k": 95,
|
| 180 |
+
"l": 7,
|
| 181 |
+
"m": 30,
|
| 182 |
+
"n": 31,
|
| 183 |
+
"o": 15,
|
| 184 |
+
"p": 22,
|
| 185 |
+
"q": 67,
|
| 186 |
+
"r": 4,
|
| 187 |
+
"s": 51,
|
| 188 |
+
"t": 14,
|
| 189 |
+
"u": 25,
|
| 190 |
+
"v": 24,
|
| 191 |
+
"w": 53,
|
| 192 |
+
"x": 61,
|
| 193 |
+
"y": 49,
|
| 194 |
+
"z": 11,
|
| 195 |
+
"{": 158,
|
| 196 |
+
"|": 139,
|
| 197 |
+
"}": 159,
|
| 198 |
+
"~": 147,
|
| 199 |
+
"\u00a2": 203,
|
| 200 |
+
"\u00a3": 162,
|
| 201 |
+
"\u00a4": 220,
|
| 202 |
+
"\u00a5": 176,
|
| 203 |
+
"\u00a7": 142,
|
| 204 |
+
"\u00a9": 268,
|
| 205 |
+
"\u00ab": 239,
|
| 206 |
+
"\u00ad": 275,
|
| 207 |
+
"\u00ae": 130,
|
| 208 |
+
"\u00b0": 100,
|
| 209 |
+
"\u00b1": 79,
|
| 210 |
+
"\u00b6": 171,
|
| 211 |
+
"\u00b7": 137,
|
| 212 |
+
"\u00bb": 240,
|
| 213 |
+
"\u00d7": 118,
|
| 214 |
+
"\u00d8": 192,
|
| 215 |
+
"\u00df": 197,
|
| 216 |
+
"\u00e6": 261,
|
| 217 |
+
"\u00f7": 225,
|
| 218 |
+
"\u00f8": 163,
|
| 219 |
+
"\u0131": 242,
|
| 220 |
+
"\u0142": 267,
|
| 221 |
+
"\u01c2": 211,
|
| 222 |
+
"\u025b": 223,
|
| 223 |
+
"\u02b9": 248,
|
| 224 |
+
"\u02c2": 195,
|
| 225 |
+
"\u02c3": 208,
|
| 226 |
+
"\u02c6": 253,
|
| 227 |
+
"\u0300": 209,
|
| 228 |
+
"\u0301": 131,
|
| 229 |
+
"\u0302": 138,
|
| 230 |
+
"\u0303": 156,
|
| 231 |
+
"\u0304": 152,
|
| 232 |
+
"\u0306": 222,
|
| 233 |
+
"\u0307": 247,
|
| 234 |
+
"\u0308": 103,
|
| 235 |
+
"\u030a": 102,
|
| 236 |
+
"\u030c": 254,
|
| 237 |
+
"\u0327": 155,
|
| 238 |
+
"\u0328": 269,
|
| 239 |
+
"\u0338": 170,
|
| 240 |
+
"\u0391": 173,
|
| 241 |
+
"\u0392": 169,
|
| 242 |
+
"\u0393": 180,
|
| 243 |
+
"\u0394": 85,
|
| 244 |
+
"\u0398": 243,
|
| 245 |
+
"\u0399": 271,
|
| 246 |
+
"\u039b": 272,
|
| 247 |
+
"\u03a0": 213,
|
| 248 |
+
"\u03a3": 185,
|
| 249 |
+
"\u03a6": 148,
|
| 250 |
+
"\u03a7": 212,
|
| 251 |
+
"\u03a8": 141,
|
| 252 |
+
"\u03a9": 161,
|
| 253 |
+
"\u03b1": 90,
|
| 254 |
+
"\u03b2": 107,
|
| 255 |
+
"\u03b3": 110,
|
| 256 |
+
"\u03b4": 153,
|
| 257 |
+
"\u03b5": 166,
|
| 258 |
+
"\u03b6": 178,
|
| 259 |
+
"\u03b7": 146,
|
| 260 |
+
"\u03b8": 186,
|
| 261 |
+
"\u03b9": 229,
|
| 262 |
+
"\u03ba": 164,
|
| 263 |
+
"\u03bb": 91,
|
| 264 |
+
"\u03bc": 78,
|
| 265 |
+
"\u03bd": 230,
|
| 266 |
+
"\u03be": 244,
|
| 267 |
+
"\u03c0": 127,
|
| 268 |
+
"\u03c1": 149,
|
| 269 |
+
"\u03c3": 116,
|
| 270 |
+
"\u03c4": 198,
|
| 271 |
+
"\u03c5": 189,
|
| 272 |
+
"\u03c6": 140,
|
| 273 |
+
"\u03c7": 124,
|
| 274 |
+
"\u03c8": 216,
|
| 275 |
+
"\u03c9": 167,
|
| 276 |
+
"\u0410": 273,
|
| 277 |
+
"\u0421": 194,
|
| 278 |
+
"\u115f": 217,
|
| 279 |
+
"\u200b": 265,
|
| 280 |
+
"\u2010": 117,
|
| 281 |
+
"\u2012": 135,
|
| 282 |
+
"\u2013": 42,
|
| 283 |
+
"\u2014": 106,
|
| 284 |
+
"\u2015": 228,
|
| 285 |
+
"\u2016": 259,
|
| 286 |
+
"\u2018": 123,
|
| 287 |
+
"\u2019": 121,
|
| 288 |
+
"\u201c": 87,
|
| 289 |
+
"\u201d": 115,
|
| 290 |
+
"\u201e": 245,
|
| 291 |
+
"\u2020": 109,
|
| 292 |
+
"\u2021": 129,
|
| 293 |
+
"\u2022": 128,
|
| 294 |
+
"\u2028": 190,
|
| 295 |
+
"\u2030": 154,
|
| 296 |
+
"\u2032": 68,
|
| 297 |
+
"\u203b": 224,
|
| 298 |
+
"\u2044": 188,
|
| 299 |
+
"\u204e": 199,
|
| 300 |
+
"\u2061": 200,
|
| 301 |
+
"\u20ac": 184,
|
| 302 |
+
"\u2190": 202,
|
| 303 |
+
"\u2191": 112,
|
| 304 |
+
"\u2192": 120,
|
| 305 |
+
"\u2193": 111,
|
| 306 |
+
"\u2194": 183,
|
| 307 |
+
"\u21d1": 266,
|
| 308 |
+
"\u21d2": 264,
|
| 309 |
+
"\u21d3": 255,
|
| 310 |
+
"\u2205": 215,
|
| 311 |
+
"\u2206": 175,
|
| 312 |
+
"\u2208": 262,
|
| 313 |
+
"\u2211": 160,
|
| 314 |
+
"\u2212": 76,
|
| 315 |
+
"\u2216": 206,
|
| 316 |
+
"\u2217": 105,
|
| 317 |
+
"\u2218": 246,
|
| 318 |
+
"\u2219": 236,
|
| 319 |
+
"\u221a": 187,
|
| 320 |
+
"\u221e": 207,
|
| 321 |
+
"\u2223": 260,
|
| 322 |
+
"\u2225": 193,
|
| 323 |
+
"\u2227": 182,
|
| 324 |
+
"\u2229": 256,
|
| 325 |
+
"\u222b": 258,
|
| 326 |
+
"\u223c": 98,
|
| 327 |
+
"\u2248": 210,
|
| 328 |
+
"\u2264": 38,
|
| 329 |
+
"\u2265": 74,
|
| 330 |
+
"\u2266": 214,
|
| 331 |
+
"\u2267": 181,
|
| 332 |
+
"\u2295": 263,
|
| 333 |
+
"\u22c5": 174,
|
| 334 |
+
"\u22c6": 191,
|
| 335 |
+
"\u22ee": 277,
|
| 336 |
+
"\u22ef": 270,
|
| 337 |
+
"\u2500": 205,
|
| 338 |
+
"\u2551": 231,
|
| 339 |
+
"\u25a0": 250,
|
| 340 |
+
"\u25a1": 177,
|
| 341 |
+
"\u25aa": 145,
|
| 342 |
+
"\u25b2": 136,
|
| 343 |
+
"\u25b3": 143,
|
| 344 |
+
"\u25bc": 251,
|
| 345 |
+
"\u25c6": 226,
|
| 346 |
+
"\u25ca": 235,
|
| 347 |
+
"\u25cb": 227,
|
| 348 |
+
"\u25cf": 172,
|
| 349 |
+
"\u25e6": 274,
|
| 350 |
+
"\u2605": 204,
|
| 351 |
+
"\u2606": 144,
|
| 352 |
+
"\u2640": 133,
|
| 353 |
+
"\u2642": 134,
|
| 354 |
+
"\u2663": 252,
|
| 355 |
+
"\u2666": 157,
|
| 356 |
+
"\u266f": 221,
|
| 357 |
+
"\u2713": 122,
|
| 358 |
+
"\u2714": 249,
|
| 359 |
+
"\u2717": 201,
|
| 360 |
+
"\u2794": 168,
|
| 361 |
+
"\u27a2": 276,
|
| 362 |
+
"\u2a7d": 234,
|
| 363 |
+
"\u2a7e": 241,
|
| 364 |
+
"\u3008": 237,
|
| 365 |
+
"\u3009": 238,
|
| 366 |
+
"\ufeff": 257
|
| 367 |
+
}
|
| 368 |
+
}
|
| 369 |
+
}
|