Upload folder using huggingface_hub
Browse files- chat_template.jinja +9 -21
- config.json +8 -8
- model.safetensors +2 -2
- quant_log.csv +224 -112
- tokenizer.json +2 -2
- tokenizer_config.json +8 -0
chat_template.jinja
CHANGED
@@ -1,21 +1,9 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
'
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
{{- '
|
11 |
-
' }}
|
12 |
-
{%- else %}
|
13 |
-
{{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>' + '
|
14 |
-
' + message['content'] + '<|eot_id|>' + '
|
15 |
-
' }}
|
16 |
-
{%- endif %}
|
17 |
-
{%- endfor %}
|
18 |
-
{%- if add_generation_prompt %}
|
19 |
-
{{- '<|start_header_id|>assistant<|end_header_id|>
|
20 |
-
' }}
|
21 |
-
{%- endif %}
|
|
|
1 |
+
{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|system|>
|
2 |
+
' + message['content'] + '
|
3 |
+
' }}{% elif message['role'] == 'user' %}{{ '<|user|>
|
4 |
+
' + message['content'] + '
|
5 |
+
' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|assistant|>
|
6 |
+
' + message['content'] + eos_token + '
|
7 |
+
' }}{% else %}{{ '<|assistant|>
|
8 |
+
' + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>
|
9 |
+
' }}{% endif %}{% endfor %}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.json
CHANGED
@@ -6,16 +6,16 @@
|
|
6 |
"attention_dropout": 0.0,
|
7 |
"bos_token_id": 128000,
|
8 |
"eos_token_id": 128001,
|
9 |
-
"head_dim":
|
10 |
"hidden_act": "silu",
|
11 |
-
"hidden_size":
|
12 |
"initializer_range": 0.02,
|
13 |
-
"intermediate_size":
|
14 |
"max_position_embeddings": 131072,
|
15 |
"mlp_bias": false,
|
16 |
"model_type": "llama",
|
17 |
"num_attention_heads": 32,
|
18 |
-
"num_hidden_layers":
|
19 |
"num_key_value_heads": 8,
|
20 |
"pretraining_tp": 1,
|
21 |
"quantization_config": {
|
@@ -41,16 +41,16 @@
|
|
41 |
},
|
42 |
"rms_norm_eps": 1e-05,
|
43 |
"rope_scaling": {
|
44 |
-
"factor":
|
45 |
"high_freq_factor": 4.0,
|
46 |
"low_freq_factor": 1.0,
|
47 |
"original_max_position_embeddings": 8192,
|
48 |
"rope_type": "llama3"
|
49 |
},
|
50 |
"rope_theta": 500000.0,
|
51 |
-
"tie_word_embeddings":
|
52 |
"torch_dtype": "bfloat16",
|
53 |
"transformers_version": "4.53.1",
|
54 |
-
"use_cache":
|
55 |
-
"vocab_size":
|
56 |
}
|
|
|
6 |
"attention_dropout": 0.0,
|
7 |
"bos_token_id": 128000,
|
8 |
"eos_token_id": 128001,
|
9 |
+
"head_dim": 128,
|
10 |
"hidden_act": "silu",
|
11 |
+
"hidden_size": 4096,
|
12 |
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 14336,
|
14 |
"max_position_embeddings": 131072,
|
15 |
"mlp_bias": false,
|
16 |
"model_type": "llama",
|
17 |
"num_attention_heads": 32,
|
18 |
+
"num_hidden_layers": 32,
|
19 |
"num_key_value_heads": 8,
|
20 |
"pretraining_tp": 1,
|
21 |
"quantization_config": {
|
|
|
41 |
},
|
42 |
"rms_norm_eps": 1e-05,
|
43 |
"rope_scaling": {
|
44 |
+
"factor": 8.0,
|
45 |
"high_freq_factor": 4.0,
|
46 |
"low_freq_factor": 1.0,
|
47 |
"original_max_position_embeddings": 8192,
|
48 |
"rope_type": "llama3"
|
49 |
},
|
50 |
"rope_theta": 500000.0,
|
51 |
+
"tie_word_embeddings": false,
|
52 |
"torch_dtype": "bfloat16",
|
53 |
"transformers_version": "4.53.1",
|
54 |
+
"use_cache": true,
|
55 |
+
"vocab_size": 128264
|
56 |
}
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:db35c00ca313f38797f303234a579e81dd4d7d4c17fa8321045e975c99ee13ff
|
3 |
+
size 3974612224
|
quant_log.csv
CHANGED
@@ -1,113 +1,225 @@
|
|
1 |
layer,module,loss,samples,damp,time
|
2 |
-
0,self_attn.k_proj,
|
3 |
-
0,self_attn.v_proj,0.
|
4 |
-
0,self_attn.q_proj,18.
|
5 |
-
0,self_attn.o_proj,0.
|
6 |
-
0,mlp.up_proj,
|
7 |
-
0,mlp.gate_proj,
|
8 |
-
0,mlp.down_proj,0.
|
9 |
-
1,self_attn.k_proj,
|
10 |
-
1,self_attn.v_proj,0.
|
11 |
-
1,self_attn.q_proj,
|
12 |
-
1,self_attn.o_proj,0.
|
13 |
-
1,mlp.up_proj,
|
14 |
-
1,mlp.gate_proj,
|
15 |
-
1,mlp.down_proj,
|
16 |
-
2,self_attn.k_proj,
|
17 |
-
2,self_attn.v_proj,1.
|
18 |
-
2,self_attn.q_proj,
|
19 |
-
2,self_attn.o_proj,0.
|
20 |
-
2,mlp.up_proj,
|
21 |
-
2,mlp.gate_proj,
|
22 |
-
2,mlp.down_proj,0.
|
23 |
-
3,self_attn.k_proj,
|
24 |
-
3,self_attn.v_proj,2.
|
25 |
-
3,self_attn.q_proj,
|
26 |
-
3,self_attn.o_proj,0.
|
27 |
-
3,mlp.up_proj,
|
28 |
-
3,mlp.gate_proj,
|
29 |
-
3,mlp.down_proj,0.
|
30 |
-
4,self_attn.k_proj,
|
31 |
-
4,self_attn.v_proj,2.
|
32 |
-
4,self_attn.q_proj,
|
33 |
-
4,self_attn.o_proj,0.
|
34 |
-
4,mlp.up_proj,
|
35 |
-
4,mlp.gate_proj,
|
36 |
-
4,mlp.down_proj,0.
|
37 |
-
5,self_attn.k_proj,
|
38 |
-
5,self_attn.v_proj,2.
|
39 |
-
5,self_attn.q_proj,
|
40 |
-
5,self_attn.o_proj,0.
|
41 |
-
5,mlp.up_proj,
|
42 |
-
5,mlp.gate_proj,
|
43 |
-
5,mlp.down_proj,0.
|
44 |
-
6,self_attn.k_proj,
|
45 |
-
6,self_attn.v_proj,3.
|
46 |
-
6,self_attn.q_proj,
|
47 |
-
6,self_attn.o_proj,0.
|
48 |
-
6,mlp.up_proj,
|
49 |
-
6,mlp.gate_proj,
|
50 |
-
6,mlp.down_proj,0.
|
51 |
-
7,self_attn.k_proj,
|
52 |
-
7,self_attn.v_proj,
|
53 |
-
7,self_attn.q_proj,
|
54 |
-
7,self_attn.o_proj,0.
|
55 |
-
7,mlp.up_proj,
|
56 |
-
7,mlp.gate_proj,
|
57 |
-
7,mlp.down_proj,0.
|
58 |
-
8,self_attn.k_proj,
|
59 |
-
8,self_attn.v_proj,4.
|
60 |
-
8,self_attn.q_proj,
|
61 |
-
8,self_attn.o_proj,0.
|
62 |
-
8,mlp.up_proj,
|
63 |
-
8,mlp.gate_proj,
|
64 |
-
8,mlp.down_proj,
|
65 |
-
9,self_attn.k_proj,
|
66 |
-
9,self_attn.v_proj,
|
67 |
-
9,self_attn.q_proj,
|
68 |
-
9,self_attn.o_proj,0.
|
69 |
-
9,mlp.up_proj,
|
70 |
-
9,mlp.gate_proj,
|
71 |
-
9,mlp.down_proj,
|
72 |
-
10,self_attn.k_proj,
|
73 |
-
10,self_attn.v_proj,
|
74 |
-
10,self_attn.q_proj,
|
75 |
-
10,self_attn.o_proj,0.
|
76 |
-
10,mlp.up_proj,
|
77 |
-
10,mlp.gate_proj,
|
78 |
-
10,mlp.down_proj,
|
79 |
-
11,self_attn.k_proj,
|
80 |
-
11,self_attn.v_proj,5.
|
81 |
-
11,self_attn.q_proj,
|
82 |
-
11,self_attn.o_proj,0.
|
83 |
-
11,mlp.up_proj,
|
84 |
-
11,mlp.gate_proj,
|
85 |
-
11,mlp.down_proj,
|
86 |
-
12,self_attn.k_proj,
|
87 |
-
12,self_attn.v_proj,5.
|
88 |
-
12,self_attn.q_proj,
|
89 |
-
12,self_attn.o_proj,0.
|
90 |
-
12,mlp.up_proj,
|
91 |
-
12,mlp.gate_proj,
|
92 |
-
12,mlp.down_proj,
|
93 |
-
13,self_attn.k_proj,
|
94 |
-
13,self_attn.v_proj,
|
95 |
-
13,self_attn.q_proj,
|
96 |
-
13,self_attn.o_proj,0.
|
97 |
-
13,mlp.up_proj,
|
98 |
-
13,mlp.gate_proj,
|
99 |
-
13,mlp.down_proj,
|
100 |
-
14,self_attn.k_proj,
|
101 |
-
14,self_attn.v_proj,
|
102 |
-
14,self_attn.q_proj,
|
103 |
-
14,self_attn.o_proj,
|
104 |
-
14,mlp.up_proj,
|
105 |
-
14,mlp.gate_proj,
|
106 |
-
14,mlp.down_proj,
|
107 |
-
15,self_attn.k_proj,
|
108 |
-
15,self_attn.v_proj,
|
109 |
-
15,self_attn.q_proj,
|
110 |
-
15,self_attn.o_proj,
|
111 |
-
15,mlp.up_proj,
|
112 |
-
15,mlp.gate_proj,
|
113 |
-
15,mlp.down_proj,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
layer,module,loss,samples,damp,time
|
2 |
+
0,self_attn.k_proj,11.50315285,0.01000,1.200
|
3 |
+
0,self_attn.v_proj,0.08434783,0.01000,0.942
|
4 |
+
0,self_attn.q_proj,18.04431343,0.01000,0.974
|
5 |
+
0,self_attn.o_proj,0.00746359,0.01000,0.954
|
6 |
+
0,mlp.up_proj,6.94379950,0.01000,0.970
|
7 |
+
0,mlp.gate_proj,8.43739128,0.01000,0.971
|
8 |
+
0,mlp.down_proj,0.03982410,0.01000,4.106
|
9 |
+
1,self_attn.k_proj,9.79508400,0.01000,0.958
|
10 |
+
1,self_attn.v_proj,0.30334821,0.01000,0.964
|
11 |
+
1,self_attn.q_proj,17.00190735,0.01000,0.977
|
12 |
+
1,self_attn.o_proj,0.02521426,0.01000,0.967
|
13 |
+
1,mlp.up_proj,14.35952663,0.01000,0.979
|
14 |
+
1,mlp.gate_proj,17.07050514,0.01000,0.983
|
15 |
+
1,mlp.down_proj,9.23333359,0.01000,4.086
|
16 |
+
2,self_attn.k_proj,30.43294144,0.01000,0.950
|
17 |
+
2,self_attn.v_proj,1.15037918,0.01000,0.970
|
18 |
+
2,self_attn.q_proj,45.58643723,0.01000,0.964
|
19 |
+
2,self_attn.o_proj,0.03051591,0.01000,0.967
|
20 |
+
2,mlp.up_proj,18.03928757,0.01000,0.982
|
21 |
+
2,mlp.gate_proj,22.90150261,0.01000,0.980
|
22 |
+
2,mlp.down_proj,0.10555533,0.01000,4.078
|
23 |
+
3,self_attn.k_proj,27.10592270,0.01000,1.535
|
24 |
+
3,self_attn.v_proj,2.30466604,0.01000,0.947
|
25 |
+
3,self_attn.q_proj,47.55756378,0.01000,0.963
|
26 |
+
3,self_attn.o_proj,0.05661678,0.01000,0.964
|
27 |
+
3,mlp.up_proj,27.88656998,0.01000,0.980
|
28 |
+
3,mlp.gate_proj,40.16275024,0.01000,0.975
|
29 |
+
3,mlp.down_proj,0.23143937,0.01000,4.022
|
30 |
+
4,self_attn.k_proj,27.37422943,0.01000,0.948
|
31 |
+
4,self_attn.v_proj,2.49580097,0.01000,0.970
|
32 |
+
4,self_attn.q_proj,47.32018280,0.01000,0.967
|
33 |
+
4,self_attn.o_proj,0.11866941,0.01000,0.966
|
34 |
+
4,mlp.up_proj,34.95981979,0.01000,0.982
|
35 |
+
4,mlp.gate_proj,57.44453812,0.01000,0.976
|
36 |
+
4,mlp.down_proj,0.39879456,0.01000,4.011
|
37 |
+
5,self_attn.k_proj,44.15615082,0.01000,0.947
|
38 |
+
5,self_attn.v_proj,2.45797062,0.01000,0.943
|
39 |
+
5,self_attn.q_proj,69.22374725,0.01000,0.957
|
40 |
+
5,self_attn.o_proj,0.13216227,0.01000,0.961
|
41 |
+
5,mlp.up_proj,44.49318695,0.01000,0.983
|
42 |
+
5,mlp.gate_proj,71.99307251,0.01000,0.977
|
43 |
+
5,mlp.down_proj,0.57819533,0.01000,4.072
|
44 |
+
6,self_attn.k_proj,39.04605484,0.01000,0.945
|
45 |
+
6,self_attn.v_proj,3.02949667,0.01000,0.941
|
46 |
+
6,self_attn.q_proj,68.57389832,0.01000,0.968
|
47 |
+
6,self_attn.o_proj,0.20356959,0.01000,0.961
|
48 |
+
6,mlp.up_proj,51.34634399,0.01000,0.985
|
49 |
+
6,mlp.gate_proj,83.72372437,0.01000,0.980
|
50 |
+
6,mlp.down_proj,0.74748993,0.01000,3.988
|
51 |
+
7,self_attn.k_proj,41.18905640,0.01000,0.951
|
52 |
+
7,self_attn.v_proj,3.36715841,0.01000,0.944
|
53 |
+
7,self_attn.q_proj,67.75445557,0.01000,1.042
|
54 |
+
7,self_attn.o_proj,0.25776845,0.01000,0.961
|
55 |
+
7,mlp.up_proj,54.56769562,0.01000,0.981
|
56 |
+
7,mlp.gate_proj,83.44071960,0.01000,0.979
|
57 |
+
7,mlp.down_proj,0.84187621,0.01000,4.054
|
58 |
+
8,self_attn.k_proj,51.15142441,0.01000,0.957
|
59 |
+
8,self_attn.v_proj,4.34867573,0.01000,0.976
|
60 |
+
8,self_attn.q_proj,82.86448669,0.01000,0.967
|
61 |
+
8,self_attn.o_proj,0.30111048,0.01000,0.960
|
62 |
+
8,mlp.up_proj,57.04904175,0.01000,0.985
|
63 |
+
8,mlp.gate_proj,88.49418640,0.01000,0.980
|
64 |
+
8,mlp.down_proj,0.88172758,0.01000,4.088
|
65 |
+
9,self_attn.k_proj,50.66983795,0.01000,0.950
|
66 |
+
9,self_attn.v_proj,6.26596260,0.01000,1.005
|
67 |
+
9,self_attn.q_proj,84.79583740,0.01000,0.964
|
68 |
+
9,self_attn.o_proj,0.39540437,0.01000,0.961
|
69 |
+
9,mlp.up_proj,59.31522369,0.01000,1.601
|
70 |
+
9,mlp.gate_proj,92.27600861,0.01000,0.976
|
71 |
+
9,mlp.down_proj,0.90846139,0.01000,4.001
|
72 |
+
10,self_attn.k_proj,60.17391205,0.01000,0.953
|
73 |
+
10,self_attn.v_proj,4.89551830,0.01000,0.994
|
74 |
+
10,self_attn.q_proj,98.11512756,0.01000,0.967
|
75 |
+
10,self_attn.o_proj,0.34281918,0.01000,0.960
|
76 |
+
10,mlp.up_proj,62.74011230,0.01000,0.982
|
77 |
+
10,mlp.gate_proj,90.72794342,0.01000,0.980
|
78 |
+
10,mlp.down_proj,0.96149510,0.01000,3.990
|
79 |
+
11,self_attn.k_proj,55.94424438,0.01000,0.952
|
80 |
+
11,self_attn.v_proj,5.04172277,0.01000,0.943
|
81 |
+
11,self_attn.q_proj,86.99108887,0.01000,0.971
|
82 |
+
11,self_attn.o_proj,0.33490360,0.01000,0.991
|
83 |
+
11,mlp.up_proj,65.00627899,0.01000,0.994
|
84 |
+
11,mlp.gate_proj,90.79618835,0.01000,0.993
|
85 |
+
11,mlp.down_proj,1.04935539,0.01000,4.030
|
86 |
+
12,self_attn.k_proj,42.39485168,0.01000,0.950
|
87 |
+
12,self_attn.v_proj,5.80718994,0.01000,0.952
|
88 |
+
12,self_attn.q_proj,74.00858307,0.01000,0.970
|
89 |
+
12,self_attn.o_proj,0.50482625,0.01000,0.958
|
90 |
+
12,mlp.up_proj,66.45251465,0.01000,0.981
|
91 |
+
12,mlp.gate_proj,87.68470764,0.01000,0.979
|
92 |
+
12,mlp.down_proj,1.16635084,0.01000,4.013
|
93 |
+
13,self_attn.k_proj,66.97894287,0.01000,0.957
|
94 |
+
13,self_attn.v_proj,6.70645237,0.01000,0.947
|
95 |
+
13,self_attn.q_proj,102.37200928,0.01000,0.969
|
96 |
+
13,self_attn.o_proj,0.47240034,0.01000,0.969
|
97 |
+
13,mlp.up_proj,72.47052765,0.01000,0.991
|
98 |
+
13,mlp.gate_proj,95.45094299,0.01000,0.989
|
99 |
+
13,mlp.down_proj,1.29506612,0.01000,4.084
|
100 |
+
14,self_attn.k_proj,65.34709930,0.01000,0.953
|
101 |
+
14,self_attn.v_proj,6.54276228,0.01000,0.978
|
102 |
+
14,self_attn.q_proj,95.75817871,0.01000,0.963
|
103 |
+
14,self_attn.o_proj,0.56570506,0.01000,0.988
|
104 |
+
14,mlp.up_proj,77.22135162,0.01000,0.983
|
105 |
+
14,mlp.gate_proj,107.80537415,0.01000,0.976
|
106 |
+
14,mlp.down_proj,1.62657499,0.01000,4.082
|
107 |
+
15,self_attn.k_proj,60.99359131,0.01000,0.948
|
108 |
+
15,self_attn.v_proj,7.83463526,0.01000,1.000
|
109 |
+
15,self_attn.q_proj,114.51682281,0.01000,1.005
|
110 |
+
15,self_attn.o_proj,0.98620689,0.01000,0.966
|
111 |
+
15,mlp.up_proj,82.91996765,0.01000,0.985
|
112 |
+
15,mlp.gate_proj,122.00700378,0.01000,0.991
|
113 |
+
15,mlp.down_proj,1.85101867,0.01000,4.006
|
114 |
+
16,self_attn.k_proj,61.05425644,0.01000,0.950
|
115 |
+
16,self_attn.v_proj,6.67351246,0.01000,0.953
|
116 |
+
16,self_attn.q_proj,100.83403015,0.01000,0.961
|
117 |
+
16,self_attn.o_proj,0.69056815,0.01000,0.962
|
118 |
+
16,mlp.up_proj,88.04904938,0.01000,0.977
|
119 |
+
16,mlp.gate_proj,137.32760620,0.01000,0.976
|
120 |
+
16,mlp.down_proj,2.13290906,0.01000,4.007
|
121 |
+
17,self_attn.k_proj,66.14538574,0.01000,0.956
|
122 |
+
17,self_attn.v_proj,7.93916798,0.01000,0.963
|
123 |
+
17,self_attn.q_proj,109.25076294,0.01000,0.973
|
124 |
+
17,self_attn.o_proj,0.67304528,0.01000,0.972
|
125 |
+
17,mlp.up_proj,92.86845398,0.01000,0.984
|
126 |
+
17,mlp.gate_proj,148.43232727,0.01000,0.978
|
127 |
+
17,mlp.down_proj,2.42646503,0.01000,4.064
|
128 |
+
18,self_attn.k_proj,70.76748657,0.01000,0.952
|
129 |
+
18,self_attn.v_proj,7.24831295,0.01000,0.944
|
130 |
+
18,self_attn.q_proj,106.46402740,0.01000,0.962
|
131 |
+
18,self_attn.o_proj,0.36137423,0.01000,0.962
|
132 |
+
18,mlp.up_proj,93.72953796,0.01000,0.983
|
133 |
+
18,mlp.gate_proj,151.20054626,0.01000,0.977
|
134 |
+
18,mlp.down_proj,2.41029239,0.01000,4.024
|
135 |
+
19,self_attn.k_proj,62.90951157,0.01000,0.952
|
136 |
+
19,self_attn.v_proj,8.11475182,0.01000,0.975
|
137 |
+
19,self_attn.q_proj,106.72672272,0.01000,0.960
|
138 |
+
19,self_attn.o_proj,0.45973969,0.01000,0.959
|
139 |
+
19,mlp.up_proj,96.43962860,0.01000,0.983
|
140 |
+
19,mlp.gate_proj,158.58799744,0.01000,0.977
|
141 |
+
19,mlp.down_proj,2.60704017,0.01000,4.036
|
142 |
+
20,self_attn.k_proj,66.16004944,0.01000,0.952
|
143 |
+
20,self_attn.v_proj,8.60017872,0.01000,0.960
|
144 |
+
20,self_attn.q_proj,106.42929077,0.01000,0.975
|
145 |
+
20,self_attn.o_proj,0.42740995,0.01000,0.966
|
146 |
+
20,mlp.up_proj,104.03691864,0.01000,0.986
|
147 |
+
20,mlp.gate_proj,169.39122009,0.01000,2.004
|
148 |
+
20,mlp.down_proj,2.89748049,0.01000,4.059
|
149 |
+
21,self_attn.k_proj,64.93406677,0.01000,0.948
|
150 |
+
21,self_attn.v_proj,9.05480289,0.01000,0.959
|
151 |
+
21,self_attn.q_proj,102.93681335,0.01000,0.966
|
152 |
+
21,self_attn.o_proj,0.57246387,0.01000,0.967
|
153 |
+
21,mlp.up_proj,109.80873108,0.01000,0.985
|
154 |
+
21,mlp.gate_proj,180.10443115,0.01000,0.980
|
155 |
+
21,mlp.down_proj,3.18497372,0.01000,4.002
|
156 |
+
22,self_attn.k_proj,68.19435120,0.01000,0.956
|
157 |
+
22,self_attn.v_proj,10.42789459,0.01000,0.948
|
158 |
+
22,self_attn.q_proj,102.62083435,0.01000,0.959
|
159 |
+
22,self_attn.o_proj,0.82003343,0.01000,0.959
|
160 |
+
22,mlp.up_proj,115.58087921,0.01000,0.992
|
161 |
+
22,mlp.gate_proj,187.88652039,0.01000,0.987
|
162 |
+
22,mlp.down_proj,3.17831731,0.01000,3.990
|
163 |
+
23,self_attn.k_proj,65.93269348,0.01000,0.962
|
164 |
+
23,self_attn.v_proj,11.55161858,0.01000,0.955
|
165 |
+
23,self_attn.q_proj,106.37304688,0.01000,0.966
|
166 |
+
23,self_attn.o_proj,0.79565775,0.01000,0.963
|
167 |
+
23,mlp.up_proj,122.20275116,0.01000,0.984
|
168 |
+
23,mlp.gate_proj,197.79428101,0.01000,0.987
|
169 |
+
23,mlp.down_proj,3.40926170,0.01000,4.092
|
170 |
+
24,self_attn.k_proj,65.81439209,0.01000,0.960
|
171 |
+
24,self_attn.v_proj,14.61132812,0.01000,0.975
|
172 |
+
24,self_attn.q_proj,109.37326050,0.01000,0.963
|
173 |
+
24,self_attn.o_proj,0.96981573,0.01000,0.967
|
174 |
+
24,mlp.up_proj,131.75790405,0.01000,0.978
|
175 |
+
24,mlp.gate_proj,213.20829773,0.01000,0.975
|
176 |
+
24,mlp.down_proj,3.76813459,0.01000,4.002
|
177 |
+
25,self_attn.k_proj,62.32714081,0.01000,0.951
|
178 |
+
25,self_attn.v_proj,15.46690750,0.01000,0.965
|
179 |
+
25,self_attn.q_proj,109.55595398,0.01000,0.976
|
180 |
+
25,self_attn.o_proj,1.38681269,0.01000,0.964
|
181 |
+
25,mlp.up_proj,140.07965088,0.01000,1.005
|
182 |
+
25,mlp.gate_proj,226.35734558,0.01000,0.983
|
183 |
+
25,mlp.down_proj,4.22223282,0.01000,4.021
|
184 |
+
26,self_attn.k_proj,69.03659821,0.01000,0.956
|
185 |
+
26,self_attn.v_proj,14.82448673,0.01000,0.947
|
186 |
+
26,self_attn.q_proj,109.22200775,0.01000,1.008
|
187 |
+
26,self_attn.o_proj,1.21192455,0.01000,0.973
|
188 |
+
26,mlp.up_proj,151.80865479,0.01000,0.987
|
189 |
+
26,mlp.gate_proj,245.63934326,0.01000,0.980
|
190 |
+
26,mlp.down_proj,4.96812248,0.01000,3.998
|
191 |
+
27,self_attn.k_proj,73.81638336,0.01000,0.952
|
192 |
+
27,self_attn.v_proj,21.00315094,0.01000,0.944
|
193 |
+
27,self_attn.q_proj,113.85871887,0.01000,0.963
|
194 |
+
27,self_attn.o_proj,1.86845124,0.01000,0.964
|
195 |
+
27,mlp.up_proj,164.77142334,0.01000,0.989
|
196 |
+
27,mlp.gate_proj,265.11877441,0.01000,0.984
|
197 |
+
27,mlp.down_proj,5.92129993,0.01000,4.004
|
198 |
+
28,self_attn.k_proj,61.08584976,0.01000,0.954
|
199 |
+
28,self_attn.v_proj,19.68507385,0.01000,0.968
|
200 |
+
28,self_attn.q_proj,108.52185822,0.01000,0.962
|
201 |
+
28,self_attn.o_proj,2.66862583,0.01000,0.966
|
202 |
+
28,mlp.up_proj,185.00190735,0.01000,0.984
|
203 |
+
28,mlp.gate_proj,284.06256104,0.01000,0.983
|
204 |
+
28,mlp.down_proj,7.90922022,0.01000,4.070
|
205 |
+
29,self_attn.k_proj,65.70452881,0.01000,0.952
|
206 |
+
29,self_attn.v_proj,23.99268150,0.01000,0.972
|
207 |
+
29,self_attn.q_proj,115.29597473,0.01000,0.973
|
208 |
+
29,self_attn.o_proj,2.71498299,0.01000,0.969
|
209 |
+
29,mlp.up_proj,195.88598633,0.01000,0.988
|
210 |
+
29,mlp.gate_proj,286.53732300,0.01000,0.981
|
211 |
+
29,mlp.down_proj,10.72382736,0.01000,4.052
|
212 |
+
30,self_attn.k_proj,65.22695923,0.01000,0.952
|
213 |
+
30,self_attn.v_proj,32.39274597,0.01000,0.952
|
214 |
+
30,self_attn.q_proj,109.58857727,0.01000,0.975
|
215 |
+
30,self_attn.o_proj,6.09502459,0.01000,0.970
|
216 |
+
30,mlp.up_proj,210.70892334,0.01000,0.988
|
217 |
+
30,mlp.gate_proj,316.56787109,0.01000,0.985
|
218 |
+
30,mlp.down_proj,18.89646912,0.01000,4.010
|
219 |
+
31,self_attn.k_proj,47.18705368,0.01000,0.956
|
220 |
+
31,self_attn.v_proj,20.02719879,0.01000,0.950
|
221 |
+
31,self_attn.q_proj,90.60032654,0.01000,0.971
|
222 |
+
31,self_attn.o_proj,7.11097431,0.01000,0.965
|
223 |
+
31,mlp.up_proj,202.48239136,0.01000,0.984
|
224 |
+
31,mlp.gate_proj,296.04345703,0.01000,0.980
|
225 |
+
31,mlp.down_proj,60.59933090,0.01000,4.000
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9400df98529060210393c40f08cb127f7c0df584338b3fbfdba8cf82a33c1ade
|
3 |
+
size 17210102
|
tokenizer_config.json
CHANGED
@@ -2047,6 +2047,14 @@
|
|
2047 |
"rstrip": false,
|
2048 |
"single_word": false,
|
2049 |
"special": true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2050 |
}
|
2051 |
},
|
2052 |
"bos_token": "<|begin_of_text|>",
|
|
|
2047 |
"rstrip": false,
|
2048 |
"single_word": false,
|
2049 |
"special": true
|
2050 |
+
},
|
2051 |
+
"128256": {
|
2052 |
+
"content": "<pad>",
|
2053 |
+
"lstrip": false,
|
2054 |
+
"normalized": false,
|
2055 |
+
"rstrip": false,
|
2056 |
+
"single_word": false,
|
2057 |
+
"special": true
|
2058 |
}
|
2059 |
},
|
2060 |
"bos_token": "<|begin_of_text|>",
|