visualjoyce commited on
Commit
2d5a0f6
·
verified ·
1 Parent(s): 09515be

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: mit
3
+ ---
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "EOS_CELL": 0,
3
+ "EOS_HTML": 0,
4
+ "SEP_CELL": 0,
5
+ "SEP_HTML": 0,
6
+ "SOC_HTML": 0,
7
+ "SOS_CELL": 0,
8
+ "SOS_HTML": 0,
9
+ "_attn_implementation_autoset": true,
10
+ "_name_or_path": "/data/pretrained_models/MuTabNet",
11
+ "channels": 512,
12
+ "d_model": 512,
13
+ "dropout": 0.2,
14
+ "gca": [
15
+ "GCA"
16
+ ],
17
+ "gcb_heads": 1,
18
+ "gcb_ratio": 0.0625,
19
+ "heads": 8,
20
+ "init_std": 0.02,
21
+ "max_len_cell": 8000,
22
+ "max_len_html": 800,
23
+ "model_type": "mutabnet",
24
+ "num_blocks_cell": 1,
25
+ "num_blocks_html": 3,
26
+ "num_emb_cell": 281,
27
+ "num_emb_html": 43,
28
+ "resnet_dim": 3,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.49.0",
31
+ "window": 300
32
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_resize": true,
4
+ "model_type": "mutabnet",
5
+ "image_processor_type": "MuTabNetImageProcessor",
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "size": 520
18
+ }
19
+
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae33db69070e3aaecce4e8254ebc19615f1e319a047a027b642c6403a95612f4
3
+ size 207531338
tokenizer_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoTokenizer": ["MuTabNetTokenizer", null]
4
+ },
5
+ "tokenizer_class": "MuTabNetTokenizer"
6
+ }
vocab_cell.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["V", "a", "r", "i", "b", "l", "e", "H", "z", "d", " ", "t", "o", "9", "5", "%", "C", "I", "<i>", "p", "</i>", "v", "u", "*", "A", "g", "(", "m", "n", ")", "0", ".", "7", "1", "6", "\u2264", ">", "8", "3", "\u2013", "2", "G", "4", "M", "F", "T", "y", "f", "s", "L", "w", "c", "U", "h", "D", "S", "Q", "R", "x", "P", "-", "E", "O", "/", "k", ",", "+", "N", "K", "q", "\u2032", "[", "]", "<", "\u2265", "<sup>", "\u2212", "</sup>", "\u03bc", "\u00b1", "J", "j", "W", "_", "\u0394", "B", "\u201c", ":", "Y", "\u03b1", "\u03bb", ";", "<sub>", "</sub>", "?", "\u223c", "<b>", "</b>", "=", "\u00b0", "#", "\u030a", "\u0308", "\u0302", "\u2019", "Z", "X", "\u2217", "\u2014", "\u03b2", "'", "\u2020", "~", "@", "\"", "\u03b3", "\u2193", "\u2191", "&", "\u2021", "\u03c7", "\u201d", "\u03c3", "\u00a7", "|", "\u00b6", "\u2010", "\u00d7", "$", "\u2192", "\u221a", "\u2713", "\u2018", "\\", "\u221e", "\u03c0", "\u2022", "\u00ae", "^", "\u2206", "\u2267", "<underline>", "</underline>", "\u0301", "\u2640", "\u2642", "\u2012", "\u204e", "\u25b2", "\u00b7", "\u00a3", "\u03c6", "\u03a8", "\u00df", "\u25b3", "\u2606", "\u25aa", "\u03b7", "\u20ac", "\u2227", "\u0303", "\u03a6", "\u03c1", "\u0304", "\u03b4", "\u2030", "\u0327", "\u03a9", "\u2666", "{", "}", "\u0300", "\u2211", "\u222b", "\u00f8", "\u03ba", "\u03b5", "\u00a5", "\u203b", "`", "\u03c9", "\u03a3", "\u2794", "\u2016", "\u0392", "\u0338", "\u2500", "\u25cf", "\u2a7e", "\u03a7", "\u0391", "\u22c5", "\u25c6", "\u2605", "\u25a0", "\u03c8", "\u01c2", "\u25a1", "\u03b6", "!", "\u0393", "\u2194", "\u03b8", "\u2044", "\u3008", "\u3009", "\u2015", "\u03c5", "\u03c4", "\u22c6", "\u00d8", "\u00a9", "\u2225", "\u0421", "\u02c2", "\u27a2", "\u025b", "\u2061", "\u2717", "\u2190", "\u25cb", "\u00a2", "\u2a7d", "\u2216", "\u02c3", "\u00ad", "\u2248", "\u03a0", "\u030c", "\u2266", "\u2205", "\u115f", "<overline>", "</overline>", "\u2223", "\u00a4", "\u266f", "\u0306", "\u03be", "\u00f7", "\u25bc", "\ufeff", "\u03b9", "\u03bd", "\u2551", "<strike>", "</strike>", "\u25e6", "\u200b", "\u25ca", "\u2219", "\u00ab", "\u00bb", "\u0142", "\u0131", "\u0398", "\u2208", "\u201e", "\u2218", "\u2714", "\u0307", "\u00e6", "\u02b9", "\u02c6", "\u2663", "\u21d3", "\u2229", "\u2295", "\u21d2", "\u21d1", "\u0328", "\u0399", "\u039b", "\u22ef", "\u0410", "\u22ee", "<SOS>", "<EOS>", "<PAD>", "<SEP>", "<UKN>"]
vocab_html.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["<thead>", "</thead>", "<tbody>", "</tbody>", "<tr>", "</tr>", "<td", ">", "</td>", "<td></td>", "<eb></eb>", "<eb1></eb1>", "<eb2></eb2>", "<eb3></eb3>", "<eb4></eb4>", "<eb5></eb5>", "<eb6></eb6>", "<eb7></eb7>", "<eb8></eb8>", "<eb9></eb9>", "<eb10></eb10>", " colspan=\"2\"", " colspan=\"3\"", " colspan=\"4\"", " colspan=\"5\"", " colspan=\"6\"", " colspan=\"7\"", " colspan=\"8\"", " colspan=\"9\"", " colspan=\"10\"", " rowspan=\"2\"", " rowspan=\"3\"", " rowspan=\"4\"", " rowspan=\"5\"", " rowspan=\"6\"", " rowspan=\"7\"", " rowspan=\"8\"", " rowspan=\"9\"", " rowspan=\"10\"", "<SOS>", "<EOS>", "<PAD>", "<UKN>"]