Upload folder using huggingface_hub
Browse files- chat_template.jinja +21 -9
- config.json +8 -8
- model.safetensors +2 -2
- quant_log.csv +112 -224
- tokenizer.json +2 -2
- tokenizer_config.json +0 -8
chat_template.jinja
CHANGED
@@ -1,9 +1,21 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
'
|
6 |
-
'
|
7 |
-
|
8 |
-
|
9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
{{- bos_token }}
|
3 |
+
{%- for message in messages %}
|
4 |
+
{%- if message['role'] == 'assistant' %}
|
5 |
+
{{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>' + '
|
6 |
+
' }}
|
7 |
+
{%- generation %}
|
8 |
+
{{- message['content'] + '<|eot_id|>' }}
|
9 |
+
{%- endgeneration %}
|
10 |
+
{{- '
|
11 |
+
' }}
|
12 |
+
{%- else %}
|
13 |
+
{{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>' + '
|
14 |
+
' + message['content'] + '<|eot_id|>' + '
|
15 |
+
' }}
|
16 |
+
{%- endif %}
|
17 |
+
{%- endfor %}
|
18 |
+
{%- if add_generation_prompt %}
|
19 |
+
{{- '<|start_header_id|>assistant<|end_header_id|>
|
20 |
+
' }}
|
21 |
+
{%- endif %}
|
config.json
CHANGED
@@ -6,16 +6,16 @@
|
|
6 |
"attention_dropout": 0.0,
|
7 |
"bos_token_id": 128000,
|
8 |
"eos_token_id": 128001,
|
9 |
-
"head_dim":
|
10 |
"hidden_act": "silu",
|
11 |
-
"hidden_size":
|
12 |
"initializer_range": 0.02,
|
13 |
-
"intermediate_size":
|
14 |
"max_position_embeddings": 131072,
|
15 |
"mlp_bias": false,
|
16 |
"model_type": "llama",
|
17 |
"num_attention_heads": 32,
|
18 |
-
"num_hidden_layers":
|
19 |
"num_key_value_heads": 8,
|
20 |
"pretraining_tp": 1,
|
21 |
"quantization_config": {
|
@@ -41,16 +41,16 @@
|
|
41 |
},
|
42 |
"rms_norm_eps": 1e-05,
|
43 |
"rope_scaling": {
|
44 |
-
"factor":
|
45 |
"high_freq_factor": 4.0,
|
46 |
"low_freq_factor": 1.0,
|
47 |
"original_max_position_embeddings": 8192,
|
48 |
"rope_type": "llama3"
|
49 |
},
|
50 |
"rope_theta": 500000.0,
|
51 |
-
"tie_word_embeddings":
|
52 |
"torch_dtype": "bfloat16",
|
53 |
"transformers_version": "4.53.1",
|
54 |
-
"use_cache":
|
55 |
-
"vocab_size":
|
56 |
}
|
|
|
6 |
"attention_dropout": 0.0,
|
7 |
"bos_token_id": 128000,
|
8 |
"eos_token_id": 128001,
|
9 |
+
"head_dim": 64,
|
10 |
"hidden_act": "silu",
|
11 |
+
"hidden_size": 2048,
|
12 |
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 8192,
|
14 |
"max_position_embeddings": 131072,
|
15 |
"mlp_bias": false,
|
16 |
"model_type": "llama",
|
17 |
"num_attention_heads": 32,
|
18 |
+
"num_hidden_layers": 16,
|
19 |
"num_key_value_heads": 8,
|
20 |
"pretraining_tp": 1,
|
21 |
"quantization_config": {
|
|
|
41 |
},
|
42 |
"rms_norm_eps": 1e-05,
|
43 |
"rope_scaling": {
|
44 |
+
"factor": 32.0,
|
45 |
"high_freq_factor": 4.0,
|
46 |
"low_freq_factor": 1.0,
|
47 |
"original_max_position_embeddings": 8192,
|
48 |
"rope_type": "llama3"
|
49 |
},
|
50 |
"rope_theta": 500000.0,
|
51 |
+
"tie_word_embeddings": true,
|
52 |
"torch_dtype": "bfloat16",
|
53 |
"transformers_version": "4.53.1",
|
54 |
+
"use_cache": false,
|
55 |
+
"vocab_size": 128256
|
56 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88f0bb257c743062340499ea96abff61e5b9ceb8b077aacaee691c1f6be38494
|
3 |
+
size 787209792
|
quant_log.csv
CHANGED
@@ -1,225 +1,113 @@
|
|
1 |
layer,module,loss,samples,damp,time
|
2 |
-
0,self_attn.k_proj,
|
3 |
-
0,self_attn.v_proj,0.
|
4 |
-
0,self_attn.q_proj,18.
|
5 |
-
0,self_attn.o_proj,0.
|
6 |
-
0,mlp.up_proj,
|
7 |
-
0,mlp.gate_proj,
|
8 |
-
0,mlp.down_proj,0.
|
9 |
-
1,self_attn.k_proj,
|
10 |
-
1,self_attn.v_proj,0.
|
11 |
-
1,self_attn.q_proj,
|
12 |
-
1,self_attn.o_proj,0.
|
13 |
-
1,mlp.up_proj,
|
14 |
-
1,mlp.gate_proj,
|
15 |
-
1,mlp.down_proj,
|
16 |
-
2,self_attn.k_proj,
|
17 |
-
2,self_attn.v_proj,1.
|
18 |
-
2,self_attn.q_proj,
|
19 |
-
2,self_attn.o_proj,0.
|
20 |
-
2,mlp.up_proj,
|
21 |
-
2,mlp.gate_proj,
|
22 |
-
2,mlp.down_proj,0.
|
23 |
-
3,self_attn.k_proj,
|
24 |
-
3,self_attn.v_proj,2.
|
25 |
-
3,self_attn.q_proj,
|
26 |
-
3,self_attn.o_proj,0.
|
27 |
-
3,mlp.up_proj,
|
28 |
-
3,mlp.gate_proj,
|
29 |
-
3,mlp.down_proj,0.
|
30 |
-
4,self_attn.k_proj,
|
31 |
-
4,self_attn.v_proj,2.
|
32 |
-
4,self_attn.q_proj,
|
33 |
-
4,self_attn.o_proj,0.
|
34 |
-
4,mlp.up_proj,
|
35 |
-
4,mlp.gate_proj,
|
36 |
-
4,mlp.down_proj,0.
|
37 |
-
5,self_attn.k_proj,
|
38 |
-
5,self_attn.v_proj,2.
|
39 |
-
5,self_attn.q_proj,
|
40 |
-
5,self_attn.o_proj,0.
|
41 |
-
5,mlp.up_proj,
|
42 |
-
5,mlp.gate_proj,
|
43 |
-
5,mlp.down_proj,0.
|
44 |
-
6,self_attn.k_proj,
|
45 |
-
6,self_attn.v_proj,3.
|
46 |
-
6,self_attn.q_proj,
|
47 |
-
6,self_attn.o_proj,0.
|
48 |
-
6,mlp.up_proj,
|
49 |
-
6,mlp.gate_proj,
|
50 |
-
6,mlp.down_proj,0.
|
51 |
-
7,self_attn.k_proj,
|
52 |
-
7,self_attn.v_proj,
|
53 |
-
7,self_attn.q_proj,
|
54 |
-
7,self_attn.o_proj,0.
|
55 |
-
7,mlp.up_proj,
|
56 |
-
7,mlp.gate_proj,
|
57 |
-
7,mlp.down_proj,0.
|
58 |
-
8,self_attn.k_proj,
|
59 |
-
8,self_attn.v_proj,4.
|
60 |
-
8,self_attn.q_proj,
|
61 |
-
8,self_attn.o_proj,0.
|
62 |
-
8,mlp.up_proj,
|
63 |
-
8,mlp.gate_proj,
|
64 |
-
8,mlp.down_proj,
|
65 |
-
9,self_attn.k_proj,
|
66 |
-
9,self_attn.v_proj,
|
67 |
-
9,self_attn.q_proj,
|
68 |
-
9,self_attn.o_proj,0.
|
69 |
-
9,mlp.up_proj,
|
70 |
-
9,mlp.gate_proj,
|
71 |
-
9,mlp.down_proj,
|
72 |
-
10,self_attn.k_proj,
|
73 |
-
10,self_attn.v_proj,
|
74 |
-
10,self_attn.q_proj,
|
75 |
-
10,self_attn.o_proj,0.
|
76 |
-
10,mlp.up_proj,
|
77 |
-
10,mlp.gate_proj,
|
78 |
-
10,mlp.down_proj,
|
79 |
-
11,self_attn.k_proj,
|
80 |
-
11,self_attn.v_proj,5.
|
81 |
-
11,self_attn.q_proj,
|
82 |
-
11,self_attn.o_proj,0.
|
83 |
-
11,mlp.up_proj,
|
84 |
-
11,mlp.gate_proj,
|
85 |
-
11,mlp.down_proj,
|
86 |
-
12,self_attn.k_proj,
|
87 |
-
12,self_attn.v_proj,5.
|
88 |
-
12,self_attn.q_proj,
|
89 |
-
12,self_attn.o_proj,0.
|
90 |
-
12,mlp.up_proj,
|
91 |
-
12,mlp.gate_proj,
|
92 |
-
12,mlp.down_proj,
|
93 |
-
13,self_attn.k_proj,
|
94 |
-
13,self_attn.v_proj,
|
95 |
-
13,self_attn.q_proj,
|
96 |
-
13,self_attn.o_proj,0.
|
97 |
-
13,mlp.up_proj,
|
98 |
-
13,mlp.gate_proj,
|
99 |
-
13,mlp.down_proj,
|
100 |
-
14,self_attn.k_proj,
|
101 |
-
14,self_attn.v_proj,
|
102 |
-
14,self_attn.q_proj,
|
103 |
-
14,self_attn.o_proj,
|
104 |
-
14,mlp.up_proj,
|
105 |
-
14,mlp.gate_proj,
|
106 |
-
14,mlp.down_proj,
|
107 |
-
15,self_attn.k_proj,
|
108 |
-
15,self_attn.v_proj,
|
109 |
-
15,self_attn.q_proj,
|
110 |
-
15,self_attn.o_proj,
|
111 |
-
15,mlp.up_proj,
|
112 |
-
15,mlp.gate_proj,
|
113 |
-
15,mlp.down_proj,
|
114 |
-
16,self_attn.k_proj,61.05425644,0.01000,0.950
|
115 |
-
16,self_attn.v_proj,6.67351246,0.01000,0.953
|
116 |
-
16,self_attn.q_proj,100.83403015,0.01000,0.961
|
117 |
-
16,self_attn.o_proj,0.69056815,0.01000,0.962
|
118 |
-
16,mlp.up_proj,88.04904938,0.01000,0.977
|
119 |
-
16,mlp.gate_proj,137.32760620,0.01000,0.976
|
120 |
-
16,mlp.down_proj,2.13290906,0.01000,4.007
|
121 |
-
17,self_attn.k_proj,66.14538574,0.01000,0.956
|
122 |
-
17,self_attn.v_proj,7.93916798,0.01000,0.963
|
123 |
-
17,self_attn.q_proj,109.25076294,0.01000,0.973
|
124 |
-
17,self_attn.o_proj,0.67304528,0.01000,0.972
|
125 |
-
17,mlp.up_proj,92.86845398,0.01000,0.984
|
126 |
-
17,mlp.gate_proj,148.43232727,0.01000,0.978
|
127 |
-
17,mlp.down_proj,2.42646503,0.01000,4.064
|
128 |
-
18,self_attn.k_proj,70.76748657,0.01000,0.952
|
129 |
-
18,self_attn.v_proj,7.24831295,0.01000,0.944
|
130 |
-
18,self_attn.q_proj,106.46402740,0.01000,0.962
|
131 |
-
18,self_attn.o_proj,0.36137423,0.01000,0.962
|
132 |
-
18,mlp.up_proj,93.72953796,0.01000,0.983
|
133 |
-
18,mlp.gate_proj,151.20054626,0.01000,0.977
|
134 |
-
18,mlp.down_proj,2.41029239,0.01000,4.024
|
135 |
-
19,self_attn.k_proj,62.90951157,0.01000,0.952
|
136 |
-
19,self_attn.v_proj,8.11475182,0.01000,0.975
|
137 |
-
19,self_attn.q_proj,106.72672272,0.01000,0.960
|
138 |
-
19,self_attn.o_proj,0.45973969,0.01000,0.959
|
139 |
-
19,mlp.up_proj,96.43962860,0.01000,0.983
|
140 |
-
19,mlp.gate_proj,158.58799744,0.01000,0.977
|
141 |
-
19,mlp.down_proj,2.60704017,0.01000,4.036
|
142 |
-
20,self_attn.k_proj,66.16004944,0.01000,0.952
|
143 |
-
20,self_attn.v_proj,8.60017872,0.01000,0.960
|
144 |
-
20,self_attn.q_proj,106.42929077,0.01000,0.975
|
145 |
-
20,self_attn.o_proj,0.42740995,0.01000,0.966
|
146 |
-
20,mlp.up_proj,104.03691864,0.01000,0.986
|
147 |
-
20,mlp.gate_proj,169.39122009,0.01000,2.004
|
148 |
-
20,mlp.down_proj,2.89748049,0.01000,4.059
|
149 |
-
21,self_attn.k_proj,64.93406677,0.01000,0.948
|
150 |
-
21,self_attn.v_proj,9.05480289,0.01000,0.959
|
151 |
-
21,self_attn.q_proj,102.93681335,0.01000,0.966
|
152 |
-
21,self_attn.o_proj,0.57246387,0.01000,0.967
|
153 |
-
21,mlp.up_proj,109.80873108,0.01000,0.985
|
154 |
-
21,mlp.gate_proj,180.10443115,0.01000,0.980
|
155 |
-
21,mlp.down_proj,3.18497372,0.01000,4.002
|
156 |
-
22,self_attn.k_proj,68.19435120,0.01000,0.956
|
157 |
-
22,self_attn.v_proj,10.42789459,0.01000,0.948
|
158 |
-
22,self_attn.q_proj,102.62083435,0.01000,0.959
|
159 |
-
22,self_attn.o_proj,0.82003343,0.01000,0.959
|
160 |
-
22,mlp.up_proj,115.58087921,0.01000,0.992
|
161 |
-
22,mlp.gate_proj,187.88652039,0.01000,0.987
|
162 |
-
22,mlp.down_proj,3.17831731,0.01000,3.990
|
163 |
-
23,self_attn.k_proj,65.93269348,0.01000,0.962
|
164 |
-
23,self_attn.v_proj,11.55161858,0.01000,0.955
|
165 |
-
23,self_attn.q_proj,106.37304688,0.01000,0.966
|
166 |
-
23,self_attn.o_proj,0.79565775,0.01000,0.963
|
167 |
-
23,mlp.up_proj,122.20275116,0.01000,0.984
|
168 |
-
23,mlp.gate_proj,197.79428101,0.01000,0.987
|
169 |
-
23,mlp.down_proj,3.40926170,0.01000,4.092
|
170 |
-
24,self_attn.k_proj,65.81439209,0.01000,0.960
|
171 |
-
24,self_attn.v_proj,14.61132812,0.01000,0.975
|
172 |
-
24,self_attn.q_proj,109.37326050,0.01000,0.963
|
173 |
-
24,self_attn.o_proj,0.96981573,0.01000,0.967
|
174 |
-
24,mlp.up_proj,131.75790405,0.01000,0.978
|
175 |
-
24,mlp.gate_proj,213.20829773,0.01000,0.975
|
176 |
-
24,mlp.down_proj,3.76813459,0.01000,4.002
|
177 |
-
25,self_attn.k_proj,62.32714081,0.01000,0.951
|
178 |
-
25,self_attn.v_proj,15.46690750,0.01000,0.965
|
179 |
-
25,self_attn.q_proj,109.55595398,0.01000,0.976
|
180 |
-
25,self_attn.o_proj,1.38681269,0.01000,0.964
|
181 |
-
25,mlp.up_proj,140.07965088,0.01000,1.005
|
182 |
-
25,mlp.gate_proj,226.35734558,0.01000,0.983
|
183 |
-
25,mlp.down_proj,4.22223282,0.01000,4.021
|
184 |
-
26,self_attn.k_proj,69.03659821,0.01000,0.956
|
185 |
-
26,self_attn.v_proj,14.82448673,0.01000,0.947
|
186 |
-
26,self_attn.q_proj,109.22200775,0.01000,1.008
|
187 |
-
26,self_attn.o_proj,1.21192455,0.01000,0.973
|
188 |
-
26,mlp.up_proj,151.80865479,0.01000,0.987
|
189 |
-
26,mlp.gate_proj,245.63934326,0.01000,0.980
|
190 |
-
26,mlp.down_proj,4.96812248,0.01000,3.998
|
191 |
-
27,self_attn.k_proj,73.81638336,0.01000,0.952
|
192 |
-
27,self_attn.v_proj,21.00315094,0.01000,0.944
|
193 |
-
27,self_attn.q_proj,113.85871887,0.01000,0.963
|
194 |
-
27,self_attn.o_proj,1.86845124,0.01000,0.964
|
195 |
-
27,mlp.up_proj,164.77142334,0.01000,0.989
|
196 |
-
27,mlp.gate_proj,265.11877441,0.01000,0.984
|
197 |
-
27,mlp.down_proj,5.92129993,0.01000,4.004
|
198 |
-
28,self_attn.k_proj,61.08584976,0.01000,0.954
|
199 |
-
28,self_attn.v_proj,19.68507385,0.01000,0.968
|
200 |
-
28,self_attn.q_proj,108.52185822,0.01000,0.962
|
201 |
-
28,self_attn.o_proj,2.66862583,0.01000,0.966
|
202 |
-
28,mlp.up_proj,185.00190735,0.01000,0.984
|
203 |
-
28,mlp.gate_proj,284.06256104,0.01000,0.983
|
204 |
-
28,mlp.down_proj,7.90922022,0.01000,4.070
|
205 |
-
29,self_attn.k_proj,65.70452881,0.01000,0.952
|
206 |
-
29,self_attn.v_proj,23.99268150,0.01000,0.972
|
207 |
-
29,self_attn.q_proj,115.29597473,0.01000,0.973
|
208 |
-
29,self_attn.o_proj,2.71498299,0.01000,0.969
|
209 |
-
29,mlp.up_proj,195.88598633,0.01000,0.988
|
210 |
-
29,mlp.gate_proj,286.53732300,0.01000,0.981
|
211 |
-
29,mlp.down_proj,10.72382736,0.01000,4.052
|
212 |
-
30,self_attn.k_proj,65.22695923,0.01000,0.952
|
213 |
-
30,self_attn.v_proj,32.39274597,0.01000,0.952
|
214 |
-
30,self_attn.q_proj,109.58857727,0.01000,0.975
|
215 |
-
30,self_attn.o_proj,6.09502459,0.01000,0.970
|
216 |
-
30,mlp.up_proj,210.70892334,0.01000,0.988
|
217 |
-
30,mlp.gate_proj,316.56787109,0.01000,0.985
|
218 |
-
30,mlp.down_proj,18.89646912,0.01000,4.010
|
219 |
-
31,self_attn.k_proj,47.18705368,0.01000,0.956
|
220 |
-
31,self_attn.v_proj,20.02719879,0.01000,0.950
|
221 |
-
31,self_attn.q_proj,90.60032654,0.01000,0.971
|
222 |
-
31,self_attn.o_proj,7.11097431,0.01000,0.965
|
223 |
-
31,mlp.up_proj,202.48239136,0.01000,0.984
|
224 |
-
31,mlp.gate_proj,296.04345703,0.01000,0.980
|
225 |
-
31,mlp.down_proj,60.59933090,0.01000,4.000
|
|
|
1 |
layer,module,loss,samples,damp,time
|
2 |
+
0,self_attn.k_proj,8.79510689,0.01000,0.659
|
3 |
+
0,self_attn.v_proj,0.21052508,0.01000,0.466
|
4 |
+
0,self_attn.q_proj,18.25806046,0.01000,0.473
|
5 |
+
0,self_attn.o_proj,0.02193195,0.01000,0.470
|
6 |
+
0,mlp.up_proj,15.79567146,0.01000,0.476
|
7 |
+
0,mlp.gate_proj,20.15350723,0.01000,0.474
|
8 |
+
0,mlp.down_proj,0.15333621,0.01000,2.037
|
9 |
+
1,self_attn.k_proj,18.12412453,0.01000,0.472
|
10 |
+
1,self_attn.v_proj,0.98016936,0.01000,0.471
|
11 |
+
1,self_attn.q_proj,33.08531570,0.01000,0.476
|
12 |
+
1,self_attn.o_proj,0.11857565,0.01000,0.472
|
13 |
+
1,mlp.up_proj,24.87150764,0.01000,0.482
|
14 |
+
1,mlp.gate_proj,34.42536926,0.01000,0.479
|
15 |
+
1,mlp.down_proj,20.08703995,0.01000,1.996
|
16 |
+
2,self_attn.k_proj,28.00672913,0.01000,0.469
|
17 |
+
2,self_attn.v_proj,1.93891215,0.01000,0.468
|
18 |
+
2,self_attn.q_proj,55.34558868,0.01000,0.478
|
19 |
+
2,self_attn.o_proj,0.09205560,0.01000,0.470
|
20 |
+
2,mlp.up_proj,30.34683800,0.01000,0.479
|
21 |
+
2,mlp.gate_proj,49.06833267,0.01000,0.501
|
22 |
+
2,mlp.down_proj,0.42902470,0.01000,2.014
|
23 |
+
3,self_attn.k_proj,21.30574417,0.01000,0.473
|
24 |
+
3,self_attn.v_proj,2.84672379,0.01000,0.469
|
25 |
+
3,self_attn.q_proj,46.99731827,0.01000,0.474
|
26 |
+
3,self_attn.o_proj,0.16826847,0.01000,0.472
|
27 |
+
3,mlp.up_proj,38.18422699,0.01000,0.479
|
28 |
+
3,mlp.gate_proj,76.93240356,0.01000,0.477
|
29 |
+
3,mlp.down_proj,0.58045858,0.01000,1.992
|
30 |
+
4,self_attn.k_proj,24.07534409,0.01000,0.467
|
31 |
+
4,self_attn.v_proj,2.88128042,0.01000,0.468
|
32 |
+
4,self_attn.q_proj,50.19567108,0.01000,0.471
|
33 |
+
4,self_attn.o_proj,0.22788243,0.01000,0.471
|
34 |
+
4,mlp.up_proj,40.49008560,0.01000,0.480
|
35 |
+
4,mlp.gate_proj,88.19657898,0.01000,0.475
|
36 |
+
4,mlp.down_proj,0.64514506,0.01000,1.998
|
37 |
+
5,self_attn.k_proj,35.62877274,0.01000,0.473
|
38 |
+
5,self_attn.v_proj,2.56014800,0.01000,0.469
|
39 |
+
5,self_attn.q_proj,64.17327118,0.01000,0.478
|
40 |
+
5,self_attn.o_proj,0.23322049,0.01000,0.507
|
41 |
+
5,mlp.up_proj,42.68757629,0.01000,0.481
|
42 |
+
5,mlp.gate_proj,79.94951630,0.01000,0.480
|
43 |
+
5,mlp.down_proj,0.71073961,0.01000,2.018
|
44 |
+
6,self_attn.k_proj,32.50390625,0.01000,0.467
|
45 |
+
6,self_attn.v_proj,3.48658395,0.01000,0.467
|
46 |
+
6,self_attn.q_proj,50.43141556,0.01000,0.477
|
47 |
+
6,self_attn.o_proj,0.32279876,0.01000,0.473
|
48 |
+
6,mlp.up_proj,43.27687836,0.01000,0.481
|
49 |
+
6,mlp.gate_proj,79.23511505,0.01000,0.478
|
50 |
+
6,mlp.down_proj,0.72877872,0.01000,2.031
|
51 |
+
7,self_attn.k_proj,33.91743851,0.01000,0.475
|
52 |
+
7,self_attn.v_proj,4.19138622,0.01000,0.469
|
53 |
+
7,self_attn.q_proj,63.09676743,0.01000,0.482
|
54 |
+
7,self_attn.o_proj,0.31201273,0.01000,0.481
|
55 |
+
7,mlp.up_proj,48.48965454,0.01000,0.484
|
56 |
+
7,mlp.gate_proj,79.57895660,0.01000,0.493
|
57 |
+
7,mlp.down_proj,0.84213424,0.01000,1.988
|
58 |
+
8,self_attn.k_proj,42.15065765,0.01000,0.471
|
59 |
+
8,self_attn.v_proj,4.18408966,0.01000,0.469
|
60 |
+
8,self_attn.q_proj,69.34562683,0.01000,0.479
|
61 |
+
8,self_attn.o_proj,0.36488774,0.01000,0.819
|
62 |
+
8,mlp.up_proj,56.41041946,0.01000,0.480
|
63 |
+
8,mlp.gate_proj,89.70135498,0.01000,0.482
|
64 |
+
8,mlp.down_proj,1.05656636,0.01000,1.997
|
65 |
+
9,self_attn.k_proj,35.16567993,0.01000,0.469
|
66 |
+
9,self_attn.v_proj,5.07817173,0.01000,0.469
|
67 |
+
9,self_attn.q_proj,86.08435059,0.01000,0.470
|
68 |
+
9,self_attn.o_proj,0.62493443,0.01000,0.472
|
69 |
+
9,mlp.up_proj,64.02466583,0.01000,0.482
|
70 |
+
9,mlp.gate_proj,104.88742828,0.01000,0.485
|
71 |
+
9,mlp.down_proj,1.33367336,0.01000,2.080
|
72 |
+
10,self_attn.k_proj,43.94557190,0.01000,0.473
|
73 |
+
10,self_attn.v_proj,5.46747303,0.01000,0.471
|
74 |
+
10,self_attn.q_proj,92.83677673,0.01000,0.472
|
75 |
+
10,self_attn.o_proj,0.59127998,0.01000,0.470
|
76 |
+
10,mlp.up_proj,79.12550354,0.01000,0.480
|
77 |
+
10,mlp.gate_proj,125.22569275,0.01000,0.480
|
78 |
+
10,mlp.down_proj,1.87667823,0.01000,2.018
|
79 |
+
11,self_attn.k_proj,51.54523468,0.01000,0.472
|
80 |
+
11,self_attn.v_proj,5.58656693,0.01000,0.466
|
81 |
+
11,self_attn.q_proj,90.90484619,0.01000,0.474
|
82 |
+
11,self_attn.o_proj,0.48921397,0.01000,0.472
|
83 |
+
11,mlp.up_proj,96.23802185,0.01000,0.478
|
84 |
+
11,mlp.gate_proj,149.39210510,0.01000,0.478
|
85 |
+
11,mlp.down_proj,2.70367146,0.01000,2.027
|
86 |
+
12,self_attn.k_proj,49.63491821,0.01000,0.471
|
87 |
+
12,self_attn.v_proj,5.90558290,0.01000,0.466
|
88 |
+
12,self_attn.q_proj,93.67311096,0.01000,0.479
|
89 |
+
12,self_attn.o_proj,0.51877552,0.01000,0.469
|
90 |
+
12,mlp.up_proj,112.33209229,0.01000,0.478
|
91 |
+
12,mlp.gate_proj,165.25785828,0.01000,0.480
|
92 |
+
12,mlp.down_proj,4.04239559,0.01000,2.022
|
93 |
+
13,self_attn.k_proj,52.68299866,0.01000,0.470
|
94 |
+
13,self_attn.v_proj,10.03530312,0.01000,0.470
|
95 |
+
13,self_attn.q_proj,107.74049377,0.01000,0.477
|
96 |
+
13,self_attn.o_proj,0.80708027,0.01000,0.469
|
97 |
+
13,mlp.up_proj,133.44732666,0.01000,0.480
|
98 |
+
13,mlp.gate_proj,179.16419983,0.01000,0.479
|
99 |
+
13,mlp.down_proj,5.91810417,0.01000,2.132
|
100 |
+
14,self_attn.k_proj,49.46713257,0.01000,0.467
|
101 |
+
14,self_attn.v_proj,18.70763016,0.01000,0.465
|
102 |
+
14,self_attn.q_proj,103.55518341,0.01000,0.468
|
103 |
+
14,self_attn.o_proj,1.71897697,0.01000,0.470
|
104 |
+
14,mlp.up_proj,148.42242432,0.01000,0.479
|
105 |
+
14,mlp.gate_proj,214.55715942,0.01000,0.480
|
106 |
+
14,mlp.down_proj,7.16841698,0.01000,2.001
|
107 |
+
15,self_attn.k_proj,46.09174347,0.01000,0.473
|
108 |
+
15,self_attn.v_proj,20.09922600,0.01000,0.469
|
109 |
+
15,self_attn.q_proj,97.27299500,0.01000,0.473
|
110 |
+
15,self_attn.o_proj,4.08784580,0.01000,0.472
|
111 |
+
15,mlp.up_proj,185.49623108,0.01000,0.479
|
112 |
+
15,mlp.gate_proj,249.19700623,0.01000,0.481
|
113 |
+
15,mlp.down_proj,15.86927128,0.01000,2.031
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
|
3 |
+
size 17209920
|
tokenizer_config.json
CHANGED
@@ -2047,14 +2047,6 @@
|
|
2047 |
"rstrip": false,
|
2048 |
"single_word": false,
|
2049 |
"special": true
|
2050 |
-
},
|
2051 |
-
"128256": {
|
2052 |
-
"content": "<pad>",
|
2053 |
-
"lstrip": false,
|
2054 |
-
"normalized": false,
|
2055 |
-
"rstrip": false,
|
2056 |
-
"single_word": false,
|
2057 |
-
"special": true
|
2058 |
}
|
2059 |
},
|
2060 |
"bos_token": "<|begin_of_text|>",
|
|
|
2047 |
"rstrip": false,
|
2048 |
"single_word": false,
|
2049 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|