Upload folder using huggingface_hub
Browse files- README.md +3 -0
- config.json +32 -0
- preprocessor_config.json +19 -0
- pytorch_model.bin +3 -0
- tokenizer_config.json +6 -0
- vocab_cell.json +1 -0
- vocab_html.json +1 -0
README.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
---
|
config.json
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"EOS_CELL": 0,
|
3 |
+
"EOS_HTML": 0,
|
4 |
+
"SEP_CELL": 0,
|
5 |
+
"SEP_HTML": 0,
|
6 |
+
"SOC_HTML": 0,
|
7 |
+
"SOS_CELL": 0,
|
8 |
+
"SOS_HTML": 0,
|
9 |
+
"_attn_implementation_autoset": true,
|
10 |
+
"_name_or_path": "/data/pretrained_models/MuTabNet",
|
11 |
+
"channels": 512,
|
12 |
+
"d_model": 512,
|
13 |
+
"dropout": 0.2,
|
14 |
+
"gca": [
|
15 |
+
"GCA"
|
16 |
+
],
|
17 |
+
"gcb_heads": 1,
|
18 |
+
"gcb_ratio": 0.0625,
|
19 |
+
"heads": 8,
|
20 |
+
"init_std": 0.02,
|
21 |
+
"max_len_cell": 8000,
|
22 |
+
"max_len_html": 800,
|
23 |
+
"model_type": "mutabnet",
|
24 |
+
"num_blocks_cell": 1,
|
25 |
+
"num_blocks_html": 3,
|
26 |
+
"num_emb_cell": 281,
|
27 |
+
"num_emb_html": 43,
|
28 |
+
"resnet_dim": 3,
|
29 |
+
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.49.0",
|
31 |
+
"window": 300
|
32 |
+
}
|
preprocessor_config.json
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"do_resize": true,
|
4 |
+
"model_type": "mutabnet",
|
5 |
+
"image_processor_type": "MuTabNetImageProcessor",
|
6 |
+
"image_mean": [
|
7 |
+
0.5,
|
8 |
+
0.5,
|
9 |
+
0.5
|
10 |
+
],
|
11 |
+
"image_std": [
|
12 |
+
0.5,
|
13 |
+
0.5,
|
14 |
+
0.5
|
15 |
+
],
|
16 |
+
"resample": 2,
|
17 |
+
"size": 520
|
18 |
+
}
|
19 |
+
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae33db69070e3aaecce4e8254ebc19615f1e319a047a027b642c6403a95612f4
|
3 |
+
size 207531338
|
tokenizer_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"auto_map": {
|
3 |
+
"AutoTokenizer": ["MuTabNetTokenizer", null]
|
4 |
+
},
|
5 |
+
"tokenizer_class": "MuTabNetTokenizer"
|
6 |
+
}
|
vocab_cell.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["V", "a", "r", "i", "b", "l", "e", "H", "z", "d", " ", "t", "o", "9", "5", "%", "C", "I", "<i>", "p", "</i>", "v", "u", "*", "A", "g", "(", "m", "n", ")", "0", ".", "7", "1", "6", "\u2264", ">", "8", "3", "\u2013", "2", "G", "4", "M", "F", "T", "y", "f", "s", "L", "w", "c", "U", "h", "D", "S", "Q", "R", "x", "P", "-", "E", "O", "/", "k", ",", "+", "N", "K", "q", "\u2032", "[", "]", "<", "\u2265", "<sup>", "\u2212", "</sup>", "\u03bc", "\u00b1", "J", "j", "W", "_", "\u0394", "B", "\u201c", ":", "Y", "\u03b1", "\u03bb", ";", "<sub>", "</sub>", "?", "\u223c", "<b>", "</b>", "=", "\u00b0", "#", "\u030a", "\u0308", "\u0302", "\u2019", "Z", "X", "\u2217", "\u2014", "\u03b2", "'", "\u2020", "~", "@", "\"", "\u03b3", "\u2193", "\u2191", "&", "\u2021", "\u03c7", "\u201d", "\u03c3", "\u00a7", "|", "\u00b6", "\u2010", "\u00d7", "$", "\u2192", "\u221a", "\u2713", "\u2018", "\\", "\u221e", "\u03c0", "\u2022", "\u00ae", "^", "\u2206", "\u2267", "<underline>", "</underline>", "\u0301", "\u2640", "\u2642", "\u2012", "\u204e", "\u25b2", "\u00b7", "\u00a3", "\u03c6", "\u03a8", "\u00df", "\u25b3", "\u2606", "\u25aa", "\u03b7", "\u20ac", "\u2227", "\u0303", "\u03a6", "\u03c1", "\u0304", "\u03b4", "\u2030", "\u0327", "\u03a9", "\u2666", "{", "}", "\u0300", "\u2211", "\u222b", "\u00f8", "\u03ba", "\u03b5", "\u00a5", "\u203b", "`", "\u03c9", "\u03a3", "\u2794", "\u2016", "\u0392", "\u0338", "\u2500", "\u25cf", "\u2a7e", "\u03a7", "\u0391", "\u22c5", "\u25c6", "\u2605", "\u25a0", "\u03c8", "\u01c2", "\u25a1", "\u03b6", "!", "\u0393", "\u2194", "\u03b8", "\u2044", "\u3008", "\u3009", "\u2015", "\u03c5", "\u03c4", "\u22c6", "\u00d8", "\u00a9", "\u2225", "\u0421", "\u02c2", "\u27a2", "\u025b", "\u2061", "\u2717", "\u2190", "\u25cb", "\u00a2", "\u2a7d", "\u2216", "\u02c3", "\u00ad", "\u2248", "\u03a0", "\u030c", "\u2266", "\u2205", "\u115f", "<overline>", "</overline>", "\u2223", "\u00a4", "\u266f", "\u0306", "\u03be", "\u00f7", "\u25bc", "\ufeff", "\u03b9", "\u03bd", "\u2551", "<strike>", "</strike>", "\u25e6", "\u200b", "\u25ca", "\u2219", "\u00ab", "\u00bb", "\u0142", "\u0131", "\u0398", "\u2208", "\u201e", "\u2218", "\u2714", "\u0307", "\u00e6", "\u02b9", "\u02c6", "\u2663", "\u21d3", "\u2229", "\u2295", "\u21d2", "\u21d1", "\u0328", "\u0399", "\u039b", "\u22ef", "\u0410", "\u22ee", "<SOS>", "<EOS>", "<PAD>", "<SEP>", "<UKN>"]
|
vocab_html.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["<thead>", "</thead>", "<tbody>", "</tbody>", "<tr>", "</tr>", "<td", ">", "</td>", "<td></td>", "<eb></eb>", "<eb1></eb1>", "<eb2></eb2>", "<eb3></eb3>", "<eb4></eb4>", "<eb5></eb5>", "<eb6></eb6>", "<eb7></eb7>", "<eb8></eb8>", "<eb9></eb9>", "<eb10></eb10>", " colspan=\"2\"", " colspan=\"3\"", " colspan=\"4\"", " colspan=\"5\"", " colspan=\"6\"", " colspan=\"7\"", " colspan=\"8\"", " colspan=\"9\"", " colspan=\"10\"", " rowspan=\"2\"", " rowspan=\"3\"", " rowspan=\"4\"", " rowspan=\"5\"", " rowspan=\"6\"", " rowspan=\"7\"", " rowspan=\"8\"", " rowspan=\"9\"", " rowspan=\"10\"", "<SOS>", "<EOS>", "<PAD>", "<UKN>"]
|