Text Generation
Transformers
PyTorch
Safetensors
llama
text-generation-inference
mfromm commited on
Commit
caf0502
·
verified ·
1 Parent(s): 9aa975e

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoTokenizer": [
9
+ "gptx_tokenizer.SPTokenizer",
10
+ null
11
+ ]
12
+ },
13
+ "bos_token_id": 1,
14
+ "eos_token_id": 4,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 4096,
17
+ "initializer_range": 0.0158,
18
+ "intermediate_size": 13440,
19
+ "max_position_embeddings": 4096,
20
+ "model_type": "llama",
21
+ "num_attention_heads": 32,
22
+ "num_hidden_layers": 32,
23
+ "num_key_value_heads": 2,
24
+ "pad_token_id": 3,
25
+ "pretraining_tp": 1,
26
+ "rms_norm_eps": 1e-05,
27
+ "rope_scaling": null,
28
+ "rope_theta": 10000.0,
29
+ "tie_word_embeddings": true,
30
+ "tokenizer_class": "SPTokenizer",
31
+ "transformers_version": "4.38.1",
32
+ "use_cache": true,
33
+ "vocab_size": 250880
34
+ }
gptx_tokenizer.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This module supplies `transformers`-compatible wrappers for
3
+ `GPTXTokenizer`s.
4
+
5
+ The tokenizers in this do not conform to the `PreTrainedTokenizer` API,
6
+ but allow for better practical usage.
7
+ """
8
+
9
+ from typing import List
10
+
11
+ from gptx_tokenizer.hf_wrappers import (
12
+ HFTokenizer as _HFTokenizer,
13
+ SPTokenizer as _SPTokenizer,
14
+ )
15
+
16
+ class HFTokenizer(_HFTokenizer):
17
+ # The tokenizer is ridiculously slow without this; however, this
18
+ # doesn't implement all APIs of `PreTrainedTokenizer`.
19
+ def encode(self, text: str, **kwargs) -> List[int]:
20
+ return_tokens = kwargs.pop('return_tokens', False)
21
+ return self._tok.encode(text, return_tokens=return_tokens)
22
+
23
+
24
+ class SPTokenizer(_SPTokenizer):
25
+ # `is_continuation` does not work without this, but it doesn't
26
+ # implement all APIs of `PreTrainedTokenizer`.
27
+ def encode(self, text: str, **kwargs) -> List[int]:
28
+ return_tokens = kwargs.pop('return_tokens', False)
29
+ is_continuation = kwargs.pop('is_continuation', False)
30
+ return self._tok.encode(
31
+ text,
32
+ return_tokens=return_tokens,
33
+ is_continuation=is_continuation,
34
+ )
pytorch_model-00001-of-00004.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05319fd8f2ca0b076cd179e250c73f7cfd7ac4a1c57525db057bc94690ae58b3
3
+ size 3953224892
pytorch_model-00002-of-00004.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aff5a1f04d4e861ea1b08a7e1e61afe6cdbf5e9ba7b1ddeac9eaba74134c31d
3
+ size 4016233683
pytorch_model-00003-of-00004.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9db1331d9645b8fa62a353c6b6a8620fee3daa6474f297d730dabb9e9bbcee3d
3
+ size 4016233683
pytorch_model-00004-of-00004.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6595d0f85fcc78f53ea2ff3084801d72e9c29003a627f5f03dc1bee2a9e4a31c
3
+ size 2921472588
pytorch_model.bin.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14772871168
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "pytorch_model-00001-of-00004.bin",
7
+ "model.embed_tokens.weight": "pytorch_model-00001-of-00004.bin",
8
+ "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
9
+ "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin",
10
+ "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
11
+ "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
12
+ "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
13
+ "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
14
+ "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
15
+ "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
16
+ "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
17
+ "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
18
+ "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin",
19
+ "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
20
+ "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
21
+ "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
22
+ "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
23
+ "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
24
+ "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
25
+ "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
26
+ "model.layers.10.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
27
+ "model.layers.10.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
28
+ "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
29
+ "model.layers.10.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
30
+ "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
31
+ "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
32
+ "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
33
+ "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
34
+ "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
35
+ "model.layers.11.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
36
+ "model.layers.11.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
37
+ "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
38
+ "model.layers.11.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
39
+ "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
40
+ "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
41
+ "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
42
+ "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
43
+ "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
44
+ "model.layers.12.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
45
+ "model.layers.12.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
46
+ "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
47
+ "model.layers.12.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
48
+ "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
49
+ "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
50
+ "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
51
+ "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
52
+ "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
53
+ "model.layers.13.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
54
+ "model.layers.13.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
55
+ "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
56
+ "model.layers.13.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
57
+ "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
58
+ "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
59
+ "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
60
+ "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
61
+ "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
62
+ "model.layers.14.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
63
+ "model.layers.14.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
64
+ "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
65
+ "model.layers.14.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
66
+ "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
67
+ "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
68
+ "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
69
+ "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
70
+ "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
71
+ "model.layers.15.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
72
+ "model.layers.15.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
73
+ "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
74
+ "model.layers.15.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
75
+ "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
76
+ "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
77
+ "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
78
+ "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
79
+ "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
80
+ "model.layers.16.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
81
+ "model.layers.16.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
82
+ "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
83
+ "model.layers.16.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
84
+ "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
85
+ "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
86
+ "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
87
+ "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
88
+ "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
89
+ "model.layers.17.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
90
+ "model.layers.17.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
91
+ "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
92
+ "model.layers.17.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
93
+ "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
94
+ "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
95
+ "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
96
+ "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
97
+ "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
98
+ "model.layers.18.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
99
+ "model.layers.18.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
100
+ "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
101
+ "model.layers.18.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
102
+ "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
103
+ "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
104
+ "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
105
+ "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
106
+ "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
107
+ "model.layers.19.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
108
+ "model.layers.19.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
109
+ "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
110
+ "model.layers.19.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
111
+ "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
112
+ "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
113
+ "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
114
+ "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
115
+ "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
116
+ "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
117
+ "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin",
118
+ "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
119
+ "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
120
+ "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
121
+ "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
122
+ "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
123
+ "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
124
+ "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
125
+ "model.layers.20.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
126
+ "model.layers.20.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
127
+ "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
128
+ "model.layers.20.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
129
+ "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
130
+ "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
131
+ "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
132
+ "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
133
+ "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
134
+ "model.layers.21.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
135
+ "model.layers.21.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
136
+ "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
137
+ "model.layers.21.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
138
+ "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
139
+ "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
140
+ "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
141
+ "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
142
+ "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
143
+ "model.layers.22.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
144
+ "model.layers.22.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
145
+ "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
146
+ "model.layers.22.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
147
+ "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
148
+ "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
149
+ "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
150
+ "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
151
+ "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
152
+ "model.layers.23.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
153
+ "model.layers.23.mlp.down_proj.weight": "pytorch_model-00003-of-00004.bin",
154
+ "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
155
+ "model.layers.23.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
156
+ "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
157
+ "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
158
+ "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
159
+ "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
160
+ "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
161
+ "model.layers.24.input_layernorm.weight": "pytorch_model-00003-of-00004.bin",
162
+ "model.layers.24.mlp.down_proj.weight": "pytorch_model-00004-of-00004.bin",
163
+ "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00003-of-00004.bin",
164
+ "model.layers.24.mlp.up_proj.weight": "pytorch_model-00003-of-00004.bin",
165
+ "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00003-of-00004.bin",
166
+ "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00003-of-00004.bin",
167
+ "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00003-of-00004.bin",
168
+ "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00003-of-00004.bin",
169
+ "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00003-of-00004.bin",
170
+ "model.layers.25.input_layernorm.weight": "pytorch_model-00004-of-00004.bin",
171
+ "model.layers.25.mlp.down_proj.weight": "pytorch_model-00004-of-00004.bin",
172
+ "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00004-of-00004.bin",
173
+ "model.layers.25.mlp.up_proj.weight": "pytorch_model-00004-of-00004.bin",
174
+ "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00004-of-00004.bin",
175
+ "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00004-of-00004.bin",
176
+ "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00004-of-00004.bin",
177
+ "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00004-of-00004.bin",
178
+ "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00004-of-00004.bin",
179
+ "model.layers.26.input_layernorm.weight": "pytorch_model-00004-of-00004.bin",
180
+ "model.layers.26.mlp.down_proj.weight": "pytorch_model-00004-of-00004.bin",
181
+ "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00004-of-00004.bin",
182
+ "model.layers.26.mlp.up_proj.weight": "pytorch_model-00004-of-00004.bin",
183
+ "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00004-of-00004.bin",
184
+ "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00004-of-00004.bin",
185
+ "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00004-of-00004.bin",
186
+ "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00004-of-00004.bin",
187
+ "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00004-of-00004.bin",
188
+ "model.layers.27.input_layernorm.weight": "pytorch_model-00004-of-00004.bin",
189
+ "model.layers.27.mlp.down_proj.weight": "pytorch_model-00004-of-00004.bin",
190
+ "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00004-of-00004.bin",
191
+ "model.layers.27.mlp.up_proj.weight": "pytorch_model-00004-of-00004.bin",
192
+ "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00004-of-00004.bin",
193
+ "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00004-of-00004.bin",
194
+ "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00004-of-00004.bin",
195
+ "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00004-of-00004.bin",
196
+ "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00004-of-00004.bin",
197
+ "model.layers.28.input_layernorm.weight": "pytorch_model-00004-of-00004.bin",
198
+ "model.layers.28.mlp.down_proj.weight": "pytorch_model-00004-of-00004.bin",
199
+ "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00004-of-00004.bin",
200
+ "model.layers.28.mlp.up_proj.weight": "pytorch_model-00004-of-00004.bin",
201
+ "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00004-of-00004.bin",
202
+ "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00004-of-00004.bin",
203
+ "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00004-of-00004.bin",
204
+ "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00004-of-00004.bin",
205
+ "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00004-of-00004.bin",
206
+ "model.layers.29.input_layernorm.weight": "pytorch_model-00004-of-00004.bin",
207
+ "model.layers.29.mlp.down_proj.weight": "pytorch_model-00004-of-00004.bin",
208
+ "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00004-of-00004.bin",
209
+ "model.layers.29.mlp.up_proj.weight": "pytorch_model-00004-of-00004.bin",
210
+ "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00004-of-00004.bin",
211
+ "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00004-of-00004.bin",
212
+ "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00004-of-00004.bin",
213
+ "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00004-of-00004.bin",
214
+ "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00004-of-00004.bin",
215
+ "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
216
+ "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00004.bin",
217
+ "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
218
+ "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
219
+ "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
220
+ "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
221
+ "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
222
+ "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
223
+ "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
224
+ "model.layers.30.input_layernorm.weight": "pytorch_model-00004-of-00004.bin",
225
+ "model.layers.30.mlp.down_proj.weight": "pytorch_model-00004-of-00004.bin",
226
+ "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00004-of-00004.bin",
227
+ "model.layers.30.mlp.up_proj.weight": "pytorch_model-00004-of-00004.bin",
228
+ "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00004-of-00004.bin",
229
+ "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00004-of-00004.bin",
230
+ "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00004-of-00004.bin",
231
+ "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00004-of-00004.bin",
232
+ "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00004-of-00004.bin",
233
+ "model.layers.31.input_layernorm.weight": "pytorch_model-00004-of-00004.bin",
234
+ "model.layers.31.mlp.down_proj.weight": "pytorch_model-00004-of-00004.bin",
235
+ "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00004-of-00004.bin",
236
+ "model.layers.31.mlp.up_proj.weight": "pytorch_model-00004-of-00004.bin",
237
+ "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00004-of-00004.bin",
238
+ "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00004-of-00004.bin",
239
+ "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00004-of-00004.bin",
240
+ "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00004-of-00004.bin",
241
+ "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00004-of-00004.bin",
242
+ "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00004.bin",
243
+ "model.layers.4.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
244
+ "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00004.bin",
245
+ "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00004.bin",
246
+ "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00004.bin",
247
+ "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00004.bin",
248
+ "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00004.bin",
249
+ "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00004.bin",
250
+ "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00004.bin",
251
+ "model.layers.5.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
252
+ "model.layers.5.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
253
+ "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
254
+ "model.layers.5.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
255
+ "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
256
+ "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
257
+ "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
258
+ "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
259
+ "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
260
+ "model.layers.6.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
261
+ "model.layers.6.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
262
+ "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
263
+ "model.layers.6.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
264
+ "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
265
+ "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
266
+ "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
267
+ "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
268
+ "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
269
+ "model.layers.7.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
270
+ "model.layers.7.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
271
+ "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
272
+ "model.layers.7.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
273
+ "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
274
+ "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
275
+ "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
276
+ "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
277
+ "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
278
+ "model.layers.8.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
279
+ "model.layers.8.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
280
+ "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
281
+ "model.layers.8.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
282
+ "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
283
+ "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
284
+ "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
285
+ "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
286
+ "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
287
+ "model.layers.9.input_layernorm.weight": "pytorch_model-00002-of-00004.bin",
288
+ "model.layers.9.mlp.down_proj.weight": "pytorch_model-00002-of-00004.bin",
289
+ "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00002-of-00004.bin",
290
+ "model.layers.9.mlp.up_proj.weight": "pytorch_model-00002-of-00004.bin",
291
+ "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00002-of-00004.bin",
292
+ "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00002-of-00004.bin",
293
+ "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00002-of-00004.bin",
294
+ "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00004.bin",
295
+ "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00004.bin",
296
+ "model.norm.weight": "pytorch_model-00004-of-00004.bin"
297
+ }
298
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "</s>",
4
+ "<placeholder_tok_0>",
5
+ "<placeholder_tok_1>",
6
+ "<placeholder_tok_2>",
7
+ "<placeholder_tok_3>",
8
+ "<placeholder_tok_4>",
9
+ "<placeholder_tok_5>",
10
+ "<placeholder_tok_6>",
11
+ "<placeholder_tok_7>",
12
+ "<placeholder_tok_8>",
13
+ "<placeholder_tok_9>",
14
+ "<placeholder_tok_10>",
15
+ "<placeholder_tok_11>",
16
+ "<placeholder_tok_12>",
17
+ "<placeholder_tok_13>",
18
+ "<placeholder_tok_14>",
19
+ "<placeholder_tok_15>",
20
+ "<placeholder_tok_16>",
21
+ "<placeholder_tok_17>",
22
+ "<placeholder_tok_18>",
23
+ "<placeholder_tok_19>",
24
+ "<placeholder_tok_20>",
25
+ "<placeholder_tok_21>",
26
+ "<placeholder_tok_22>",
27
+ "<placeholder_tok_23>",
28
+ "<placeholder_tok_24>",
29
+ "<placeholder_tok_25>",
30
+ "<placeholder_tok_26>",
31
+ "<placeholder_tok_27>",
32
+ "<placeholder_tok_28>",
33
+ "<placeholder_tok_29>",
34
+ "<placeholder_tok_30>",
35
+ "<placeholder_tok_31>",
36
+ "<placeholder_tok_32>",
37
+ "<placeholder_tok_33>",
38
+ "<placeholder_tok_34>",
39
+ "<placeholder_tok_35>",
40
+ "<placeholder_tok_36>",
41
+ "<placeholder_tok_37>",
42
+ "<placeholder_tok_38>",
43
+ "<placeholder_tok_39>",
44
+ "<placeholder_tok_40>",
45
+ "<placeholder_tok_41>",
46
+ "<placeholder_tok_42>",
47
+ "<placeholder_tok_43>",
48
+ "<placeholder_tok_44>",
49
+ "<placeholder_tok_45>",
50
+ "<placeholder_tok_46>",
51
+ "<placeholder_tok_47>",
52
+ "<placeholder_tok_48>",
53
+ "<placeholder_tok_49>",
54
+ "<placeholder_tok_50>",
55
+ "<placeholder_tok_51>",
56
+ "<placeholder_tok_52>",
57
+ "<placeholder_tok_53>",
58
+ "<placeholder_tok_54>",
59
+ "<placeholder_tok_55>",
60
+ "<placeholder_tok_56>",
61
+ "<placeholder_tok_57>",
62
+ "<placeholder_tok_58>",
63
+ "<placeholder_tok_59>",
64
+ "<placeholder_tok_60>",
65
+ "<placeholder_tok_61>",
66
+ "<placeholder_tok_62>",
67
+ "<placeholder_tok_63>",
68
+ "<placeholder_tok_64>",
69
+ "<placeholder_tok_65>",
70
+ "<placeholder_tok_66>",
71
+ "<placeholder_tok_67>",
72
+ "<placeholder_tok_68>",
73
+ "<placeholder_tok_69>",
74
+ "<placeholder_tok_70>",
75
+ "<placeholder_tok_71>",
76
+ "<placeholder_tok_72>",
77
+ "<placeholder_tok_73>",
78
+ "<placeholder_tok_74>",
79
+ "<placeholder_tok_75>",
80
+ "<placeholder_tok_76>",
81
+ "<placeholder_tok_77>",
82
+ "<placeholder_tok_78>",
83
+ "<placeholder_tok_79>",
84
+ "<placeholder_tok_80>",
85
+ "<placeholder_tok_81>",
86
+ "<placeholder_tok_82>",
87
+ "<placeholder_tok_83>",
88
+ "<placeholder_tok_84>",
89
+ "<placeholder_tok_85>",
90
+ "<placeholder_tok_86>",
91
+ "<placeholder_tok_87>",
92
+ "<placeholder_tok_88>",
93
+ "<placeholder_tok_89>",
94
+ "<placeholder_tok_90>",
95
+ "<placeholder_tok_91>",
96
+ "<placeholder_tok_92>",
97
+ "<placeholder_tok_93>",
98
+ "<placeholder_tok_94>",
99
+ "<placeholder_tok_95>",
100
+ "<placeholder_tok_96>",
101
+ "<placeholder_tok_97>",
102
+ "<placeholder_tok_98>",
103
+ "<placeholder_tok_99>",
104
+ "<placeholder_tok_100>",
105
+ "<placeholder_tok_101>",
106
+ "<placeholder_tok_102>",
107
+ "<placeholder_tok_103>",
108
+ "<placeholder_tok_104>",
109
+ "<placeholder_tok_105>",
110
+ "<placeholder_tok_106>",
111
+ "<placeholder_tok_107>",
112
+ "<placeholder_tok_108>",
113
+ "<placeholder_tok_109>",
114
+ "<placeholder_tok_110>",
115
+ "<placeholder_tok_111>",
116
+ "<placeholder_tok_112>",
117
+ "<placeholder_tok_113>",
118
+ "<placeholder_tok_114>",
119
+ "<placeholder_tok_115>",
120
+ "<placeholder_tok_116>",
121
+ "<placeholder_tok_117>",
122
+ "<placeholder_tok_118>",
123
+ "<placeholder_tok_119>",
124
+ "<placeholder_tok_120>",
125
+ "<placeholder_tok_121>",
126
+ "<placeholder_tok_122>",
127
+ "<placeholder_tok_123>",
128
+ "<placeholder_tok_124>",
129
+ "<placeholder_tok_125>",
130
+ "<placeholder_tok_126>",
131
+ "<placeholder_tok_127>",
132
+ "<placeholder_tok_128>",
133
+ "<placeholder_tok_129>",
134
+ "<placeholder_tok_130>",
135
+ "<placeholder_tok_131>",
136
+ "<placeholder_tok_132>",
137
+ "<placeholder_tok_133>",
138
+ "<placeholder_tok_134>",
139
+ "<placeholder_tok_135>",
140
+ "<placeholder_tok_136>",
141
+ "<placeholder_tok_137>",
142
+ "<placeholder_tok_138>",
143
+ "<placeholder_tok_139>",
144
+ "<placeholder_tok_140>",
145
+ "<placeholder_tok_141>",
146
+ "<placeholder_tok_142>",
147
+ "<placeholder_tok_143>",
148
+ "<placeholder_tok_144>",
149
+ "<placeholder_tok_145>",
150
+ "<placeholder_tok_146>",
151
+ "<placeholder_tok_147>",
152
+ "<placeholder_tok_148>",
153
+ "<placeholder_tok_149>",
154
+ "<placeholder_tok_150>",
155
+ "<placeholder_tok_151>",
156
+ "<placeholder_tok_152>",
157
+ "<placeholder_tok_153>",
158
+ "<placeholder_tok_154>",
159
+ "<placeholder_tok_155>",
160
+ "<placeholder_tok_156>",
161
+ "<placeholder_tok_157>",
162
+ "<placeholder_tok_158>",
163
+ "<placeholder_tok_159>",
164
+ "<placeholder_tok_160>",
165
+ "<placeholder_tok_161>",
166
+ "<placeholder_tok_162>",
167
+ "<placeholder_tok_163>",
168
+ "<placeholder_tok_164>",
169
+ "<placeholder_tok_165>",
170
+ "<placeholder_tok_166>",
171
+ "<placeholder_tok_167>",
172
+ "<placeholder_tok_168>",
173
+ "<placeholder_tok_169>",
174
+ "<placeholder_tok_170>",
175
+ "<placeholder_tok_171>",
176
+ "<placeholder_tok_172>",
177
+ "<placeholder_tok_173>",
178
+ "<placeholder_tok_174>",
179
+ "<placeholder_tok_175>",
180
+ "<placeholder_tok_176>",
181
+ "<placeholder_tok_177>",
182
+ "<placeholder_tok_178>",
183
+ "<placeholder_tok_179>",
184
+ "<placeholder_tok_180>",
185
+ "<placeholder_tok_181>",
186
+ "<placeholder_tok_182>",
187
+ "<placeholder_tok_183>",
188
+ "<placeholder_tok_184>",
189
+ "<placeholder_tok_185>",
190
+ "<placeholder_tok_186>",
191
+ "<placeholder_tok_187>",
192
+ "<placeholder_tok_188>",
193
+ "<placeholder_tok_189>",
194
+ "<placeholder_tok_190>",
195
+ "<placeholder_tok_191>",
196
+ "<placeholder_tok_192>",
197
+ "<placeholder_tok_193>",
198
+ "<placeholder_tok_194>",
199
+ "<placeholder_tok_195>",
200
+ "<placeholder_tok_196>",
201
+ "<placeholder_tok_197>",
202
+ "<placeholder_tok_198>",
203
+ "<placeholder_tok_199>",
204
+ "<placeholder_tok_200>",
205
+ "<placeholder_tok_201>",
206
+ "<placeholder_tok_202>",
207
+ "<placeholder_tok_203>",
208
+ "<placeholder_tok_204>",
209
+ "<placeholder_tok_205>",
210
+ "<placeholder_tok_206>",
211
+ "<placeholder_tok_207>",
212
+ "<placeholder_tok_208>",
213
+ "<placeholder_tok_209>",
214
+ "<placeholder_tok_210>",
215
+ "<placeholder_tok_211>",
216
+ "<placeholder_tok_212>",
217
+ "<placeholder_tok_213>",
218
+ "<placeholder_tok_214>",
219
+ "<placeholder_tok_215>",
220
+ "<placeholder_tok_216>",
221
+ "<placeholder_tok_217>",
222
+ "<placeholder_tok_218>",
223
+ "<placeholder_tok_219>",
224
+ "<placeholder_tok_220>",
225
+ "<placeholder_tok_221>",
226
+ "<placeholder_tok_222>",
227
+ "<placeholder_tok_223>",
228
+ "<placeholder_tok_224>",
229
+ "<placeholder_tok_225>",
230
+ "<placeholder_tok_226>",
231
+ "<placeholder_tok_227>",
232
+ "<placeholder_tok_228>",
233
+ "<placeholder_tok_229>",
234
+ "<placeholder_tok_230>",
235
+ "<placeholder_tok_231>",
236
+ "<placeholder_tok_232>",
237
+ "<placeholder_tok_233>",
238
+ "<placeholder_tok_234>",
239
+ "<placeholder_tok_235>",
240
+ "<placeholder_tok_236>",
241
+ "<placeholder_tok_237>",
242
+ "<placeholder_tok_238>",
243
+ "<placeholder_tok_239>",
244
+ "<placeholder_tok_240>",
245
+ "<placeholder_tok_241>",
246
+ "<placeholder_tok_242>",
247
+ "<placeholder_tok_243>",
248
+ "<placeholder_tok_244>",
249
+ "<placeholder_tok_245>",
250
+ "<placeholder_tok_246>",
251
+ "<placeholder_tok_247>",
252
+ "<placeholder_tok_248>",
253
+ "<placeholder_tok_249>",
254
+ "<placeholder_tok_250>",
255
+ "<placeholder_tok_251>",
256
+ "<placeholder_tok_252>",
257
+ "<placeholder_tok_253>",
258
+ "<placeholder_tok_254>",
259
+ "<placeholder_tok_255>"
260
+ ],
261
+ "bos_token": "<s>",
262
+ "eos_token": "<eod>",
263
+ "pad_token": "<pad>"
264
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08d0c8316539a853f2fe6e14f51f0df583011dfb078fa08c8b6dc5c15a19a7e6
3
+ size 4719922
tokenizer_config.json ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "num_threads": 224,
3
+ "split_by_whitespace": true,
4
+ "model_type": "unigram",
5
+ "vocab_size": 250680,
6
+ "character_coverage": 0.9999,
7
+ "byte_fallback": true,
8
+ "split_by_number": true,
9
+ "split_digits": true,
10
+ "normalization_rule_name": "nfkc",
11
+ "max_sentence_length": 4096,
12
+ "shuffle_input_sentence": true,
13
+ "input_sentence_size": 0,
14
+ "train_extremely_large_corpus": true,
15
+ "allow_whitespace_only_pieces": true,
16
+ "required_chars": "",
17
+ "remove_extra_whitespaces": false,
18
+ "user_defined_symbols": [
19
+ "<s>",
20
+ "</s>",
21
+ "<pad>",
22
+ "<eod>",
23
+ "<placeholder_tok_0>",
24
+ "<placeholder_tok_1>",
25
+ "<placeholder_tok_2>",
26
+ "<placeholder_tok_3>",
27
+ "<placeholder_tok_4>",
28
+ "<placeholder_tok_5>",
29
+ "<placeholder_tok_6>",
30
+ "<placeholder_tok_7>",
31
+ "<placeholder_tok_8>",
32
+ "<placeholder_tok_9>",
33
+ "<placeholder_tok_10>",
34
+ "<placeholder_tok_11>",
35
+ "<placeholder_tok_12>",
36
+ "<placeholder_tok_13>",
37
+ "<placeholder_tok_14>",
38
+ "<placeholder_tok_15>",
39
+ "<placeholder_tok_16>",
40
+ "<placeholder_tok_17>",
41
+ "<placeholder_tok_18>",
42
+ "<placeholder_tok_19>",
43
+ "<placeholder_tok_20>",
44
+ "<placeholder_tok_21>",
45
+ "<placeholder_tok_22>",
46
+ "<placeholder_tok_23>",
47
+ "<placeholder_tok_24>",
48
+ "<placeholder_tok_25>",
49
+ "<placeholder_tok_26>",
50
+ "<placeholder_tok_27>",
51
+ "<placeholder_tok_28>",
52
+ "<placeholder_tok_29>",
53
+ "<placeholder_tok_30>",
54
+ "<placeholder_tok_31>",
55
+ "<placeholder_tok_32>",
56
+ "<placeholder_tok_33>",
57
+ "<placeholder_tok_34>",
58
+ "<placeholder_tok_35>",
59
+ "<placeholder_tok_36>",
60
+ "<placeholder_tok_37>",
61
+ "<placeholder_tok_38>",
62
+ "<placeholder_tok_39>",
63
+ "<placeholder_tok_40>",
64
+ "<placeholder_tok_41>",
65
+ "<placeholder_tok_42>",
66
+ "<placeholder_tok_43>",
67
+ "<placeholder_tok_44>",
68
+ "<placeholder_tok_45>",
69
+ "<placeholder_tok_46>",
70
+ "<placeholder_tok_47>",
71
+ "<placeholder_tok_48>",
72
+ "<placeholder_tok_49>",
73
+ "<placeholder_tok_50>",
74
+ "<placeholder_tok_51>",
75
+ "<placeholder_tok_52>",
76
+ "<placeholder_tok_53>",
77
+ "<placeholder_tok_54>",
78
+ "<placeholder_tok_55>",
79
+ "<placeholder_tok_56>",
80
+ "<placeholder_tok_57>",
81
+ "<placeholder_tok_58>",
82
+ "<placeholder_tok_59>",
83
+ "<placeholder_tok_60>",
84
+ "<placeholder_tok_61>",
85
+ "<placeholder_tok_62>",
86
+ "<placeholder_tok_63>",
87
+ "<placeholder_tok_64>",
88
+ "<placeholder_tok_65>",
89
+ "<placeholder_tok_66>",
90
+ "<placeholder_tok_67>",
91
+ "<placeholder_tok_68>",
92
+ "<placeholder_tok_69>",
93
+ "<placeholder_tok_70>",
94
+ "<placeholder_tok_71>",
95
+ "<placeholder_tok_72>",
96
+ "<placeholder_tok_73>",
97
+ "<placeholder_tok_74>",
98
+ "<placeholder_tok_75>",
99
+ "<placeholder_tok_76>",
100
+ "<placeholder_tok_77>",
101
+ "<placeholder_tok_78>",
102
+ "<placeholder_tok_79>",
103
+ "<placeholder_tok_80>",
104
+ "<placeholder_tok_81>",
105
+ "<placeholder_tok_82>",
106
+ "<placeholder_tok_83>",
107
+ "<placeholder_tok_84>",
108
+ "<placeholder_tok_85>",
109
+ "<placeholder_tok_86>",
110
+ "<placeholder_tok_87>",
111
+ "<placeholder_tok_88>",
112
+ "<placeholder_tok_89>",
113
+ "<placeholder_tok_90>",
114
+ "<placeholder_tok_91>",
115
+ "<placeholder_tok_92>",
116
+ "<placeholder_tok_93>",
117
+ "<placeholder_tok_94>",
118
+ "<placeholder_tok_95>",
119
+ "<placeholder_tok_96>",
120
+ "<placeholder_tok_97>",
121
+ "<placeholder_tok_98>",
122
+ "<placeholder_tok_99>",
123
+ "<placeholder_tok_100>",
124
+ "<placeholder_tok_101>",
125
+ "<placeholder_tok_102>",
126
+ "<placeholder_tok_103>",
127
+ "<placeholder_tok_104>",
128
+ "<placeholder_tok_105>",
129
+ "<placeholder_tok_106>",
130
+ "<placeholder_tok_107>",
131
+ "<placeholder_tok_108>",
132
+ "<placeholder_tok_109>",
133
+ "<placeholder_tok_110>",
134
+ "<placeholder_tok_111>",
135
+ "<placeholder_tok_112>",
136
+ "<placeholder_tok_113>",
137
+ "<placeholder_tok_114>",
138
+ "<placeholder_tok_115>",
139
+ "<placeholder_tok_116>",
140
+ "<placeholder_tok_117>",
141
+ "<placeholder_tok_118>",
142
+ "<placeholder_tok_119>",
143
+ "<placeholder_tok_120>",
144
+ "<placeholder_tok_121>",
145
+ "<placeholder_tok_122>",
146
+ "<placeholder_tok_123>",
147
+ "<placeholder_tok_124>",
148
+ "<placeholder_tok_125>",
149
+ "<placeholder_tok_126>",
150
+ "<placeholder_tok_127>",
151
+ "<placeholder_tok_128>",
152
+ "<placeholder_tok_129>",
153
+ "<placeholder_tok_130>",
154
+ "<placeholder_tok_131>",
155
+ "<placeholder_tok_132>",
156
+ "<placeholder_tok_133>",
157
+ "<placeholder_tok_134>",
158
+ "<placeholder_tok_135>",
159
+ "<placeholder_tok_136>",
160
+ "<placeholder_tok_137>",
161
+ "<placeholder_tok_138>",
162
+ "<placeholder_tok_139>",
163
+ "<placeholder_tok_140>",
164
+ "<placeholder_tok_141>",
165
+ "<placeholder_tok_142>",
166
+ "<placeholder_tok_143>",
167
+ "<placeholder_tok_144>",
168
+ "<placeholder_tok_145>",
169
+ "<placeholder_tok_146>",
170
+ "<placeholder_tok_147>",
171
+ "<placeholder_tok_148>",
172
+ "<placeholder_tok_149>",
173
+ "<placeholder_tok_150>",
174
+ "<placeholder_tok_151>",
175
+ "<placeholder_tok_152>",
176
+ "<placeholder_tok_153>",
177
+ "<placeholder_tok_154>",
178
+ "<placeholder_tok_155>",
179
+ "<placeholder_tok_156>",
180
+ "<placeholder_tok_157>",
181
+ "<placeholder_tok_158>",
182
+ "<placeholder_tok_159>",
183
+ "<placeholder_tok_160>",
184
+ "<placeholder_tok_161>",
185
+ "<placeholder_tok_162>",
186
+ "<placeholder_tok_163>",
187
+ "<placeholder_tok_164>",
188
+ "<placeholder_tok_165>",
189
+ "<placeholder_tok_166>",
190
+ "<placeholder_tok_167>",
191
+ "<placeholder_tok_168>",
192
+ "<placeholder_tok_169>",
193
+ "<placeholder_tok_170>",
194
+ "<placeholder_tok_171>",
195
+ "<placeholder_tok_172>",
196
+ "<placeholder_tok_173>",
197
+ "<placeholder_tok_174>",
198
+ "<placeholder_tok_175>",
199
+ "<placeholder_tok_176>",
200
+ "<placeholder_tok_177>",
201
+ "<placeholder_tok_178>",
202
+ "<placeholder_tok_179>",
203
+ "<placeholder_tok_180>",
204
+ "<placeholder_tok_181>",
205
+ "<placeholder_tok_182>",
206
+ "<placeholder_tok_183>",
207
+ "<placeholder_tok_184>",
208
+ "<placeholder_tok_185>",
209
+ "<placeholder_tok_186>",
210
+ "<placeholder_tok_187>",
211
+ "<placeholder_tok_188>",
212
+ "<placeholder_tok_189>",
213
+ "<placeholder_tok_190>",
214
+ "<placeholder_tok_191>",
215
+ "<placeholder_tok_192>",
216
+ "<placeholder_tok_193>",
217
+ "<placeholder_tok_194>",
218
+ "<placeholder_tok_195>",
219
+ "<placeholder_tok_196>",
220
+ "<placeholder_tok_197>",
221
+ "<placeholder_tok_198>",
222
+ "<placeholder_tok_199>",
223
+ "<placeholder_tok_200>",
224
+ "<placeholder_tok_201>",
225
+ "<placeholder_tok_202>",
226
+ "<placeholder_tok_203>",
227
+ "<placeholder_tok_204>",
228
+ "<placeholder_tok_205>",
229
+ "<placeholder_tok_206>",
230
+ "<placeholder_tok_207>",
231
+ "<placeholder_tok_208>",
232
+ "<placeholder_tok_209>",
233
+ "<placeholder_tok_210>",
234
+ "<placeholder_tok_211>",
235
+ "<placeholder_tok_212>",
236
+ "<placeholder_tok_213>",
237
+ "<placeholder_tok_214>",
238
+ "<placeholder_tok_215>",
239
+ "<placeholder_tok_216>",
240
+ "<placeholder_tok_217>",
241
+ "<placeholder_tok_218>",
242
+ "<placeholder_tok_219>",
243
+ "<placeholder_tok_220>",
244
+ "<placeholder_tok_221>",
245
+ "<placeholder_tok_222>",
246
+ "<placeholder_tok_223>",
247
+ "<placeholder_tok_224>",
248
+ "<placeholder_tok_225>",
249
+ "<placeholder_tok_226>",
250
+ "<placeholder_tok_227>",
251
+ "<placeholder_tok_228>",
252
+ "<placeholder_tok_229>",
253
+ "<placeholder_tok_230>",
254
+ "<placeholder_tok_231>",
255
+ "<placeholder_tok_232>",
256
+ "<placeholder_tok_233>",
257
+ "<placeholder_tok_234>",
258
+ "<placeholder_tok_235>",
259
+ "<placeholder_tok_236>",
260
+ "<placeholder_tok_237>",
261
+ "<placeholder_tok_238>",
262
+ "<placeholder_tok_239>",
263
+ "<placeholder_tok_240>",
264
+ "<placeholder_tok_241>",
265
+ "<placeholder_tok_242>",
266
+ "<placeholder_tok_243>",
267
+ "<placeholder_tok_244>",
268
+ "<placeholder_tok_245>",
269
+ "<placeholder_tok_246>",
270
+ "<placeholder_tok_247>",
271
+ "<placeholder_tok_248>",
272
+ "<placeholder_tok_249>",
273
+ "<placeholder_tok_250>",
274
+ "<placeholder_tok_251>",
275
+ "<placeholder_tok_252>",
276
+ "<placeholder_tok_253>",
277
+ "<placeholder_tok_254>",
278
+ "<placeholder_tok_255>"
279
+ ],
280
+ "datasets_dir": "/home/fhgiais/gptx_ablations/bias_analysis/data/tokenizer/temp/",
281
+ "save_dir": "/home/fhgiais/gptx_ablations/bias_analysis/tokenizer/24",
282
+ "text_key": "text",
283
+ "cache_dir": "/home/fhgiais/gptx_ablations/bias_analysis/tokenizer/24/cache",
284
+ "library": "sentencepiece",
285
+ "auto_map": {
286
+ "AutoTokenizer": [
287
+ "gptx_tokenizer.SPTokenizer",
288
+ null
289
+ ]
290
+ },
291
+ "tokenizer_class": "SPTokenizer"
292
+ }