Matt300209 commited on
Commit
c6e9021
·
verified ·
1 Parent(s): de430e1

Upload folder using huggingface_hub

Browse files
chat_template.jinja CHANGED
@@ -1,21 +1,9 @@
1
-
2
- {{- bos_token }}
3
- {%- for message in messages %}
4
- {%- if message['role'] == 'assistant' %}
5
- {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>' + '
6
- ' }}
7
- {%- generation %}
8
- {{- message['content'] + '<|eot_id|>' }}
9
- {%- endgeneration %}
10
- {{- '
11
- ' }}
12
- {%- else %}
13
- {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>' + '
14
- ' + message['content'] + '<|eot_id|>' + '
15
- ' }}
16
- {%- endif %}
17
- {%- endfor %}
18
- {%- if add_generation_prompt %}
19
- {{- '<|start_header_id|>assistant<|end_header_id|>
20
- ' }}
21
- {%- endif %}
 
1
+ {% for message in messages %}{% if message['role'] == 'system' %}{{ '<|system|>
2
+ ' + message['content'] + '
3
+ ' }}{% elif message['role'] == 'user' %}{{ '<|user|>
4
+ ' + message['content'] + '
5
+ ' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|assistant|>
6
+ ' + message['content'] + eos_token + '
7
+ ' }}{% else %}{{ '<|assistant|>
8
+ ' + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>
9
+ ' }}{% endif %}{% endfor %}
 
 
 
 
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -6,16 +6,16 @@
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 128000,
8
  "eos_token_id": 128001,
9
- "head_dim": 64,
10
  "hidden_act": "silu",
11
- "hidden_size": 2048,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 8192,
14
  "max_position_embeddings": 131072,
15
  "mlp_bias": false,
16
  "model_type": "llama",
17
  "num_attention_heads": 32,
18
- "num_hidden_layers": 16,
19
  "num_key_value_heads": 8,
20
  "pretraining_tp": 1,
21
  "quantization_config": {
@@ -41,16 +41,16 @@
41
  },
42
  "rms_norm_eps": 1e-05,
43
  "rope_scaling": {
44
- "factor": 32.0,
45
  "high_freq_factor": 4.0,
46
  "low_freq_factor": 1.0,
47
  "original_max_position_embeddings": 8192,
48
  "rope_type": "llama3"
49
  },
50
  "rope_theta": 500000.0,
51
- "tie_word_embeddings": true,
52
  "torch_dtype": "bfloat16",
53
  "transformers_version": "4.53.1",
54
- "use_cache": false,
55
- "vocab_size": 128256
56
  }
 
6
  "attention_dropout": 0.0,
7
  "bos_token_id": 128000,
8
  "eos_token_id": 128001,
9
+ "head_dim": 128,
10
  "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 14336,
14
  "max_position_embeddings": 131072,
15
  "mlp_bias": false,
16
  "model_type": "llama",
17
  "num_attention_heads": 32,
18
+ "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
20
  "pretraining_tp": 1,
21
  "quantization_config": {
 
41
  },
42
  "rms_norm_eps": 1e-05,
43
  "rope_scaling": {
44
+ "factor": 8.0,
45
  "high_freq_factor": 4.0,
46
  "low_freq_factor": 1.0,
47
  "original_max_position_embeddings": 8192,
48
  "rope_type": "llama3"
49
  },
50
  "rope_theta": 500000.0,
51
+ "tie_word_embeddings": false,
52
  "torch_dtype": "bfloat16",
53
  "transformers_version": "4.53.1",
54
+ "use_cache": true,
55
+ "vocab_size": 128264
56
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88f0bb257c743062340499ea96abff61e5b9ceb8b077aacaee691c1f6be38494
3
- size 787209792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db35c00ca313f38797f303234a579e81dd4d7d4c17fa8321045e975c99ee13ff
3
+ size 3974612224
quant_log.csv CHANGED
@@ -1,113 +1,225 @@
1
  layer,module,loss,samples,damp,time
2
- 0,self_attn.k_proj,8.79510689,0.01000,0.659
3
- 0,self_attn.v_proj,0.21052508,0.01000,0.466
4
- 0,self_attn.q_proj,18.25806046,0.01000,0.473
5
- 0,self_attn.o_proj,0.02193195,0.01000,0.470
6
- 0,mlp.up_proj,15.79567146,0.01000,0.476
7
- 0,mlp.gate_proj,20.15350723,0.01000,0.474
8
- 0,mlp.down_proj,0.15333621,0.01000,2.037
9
- 1,self_attn.k_proj,18.12412453,0.01000,0.472
10
- 1,self_attn.v_proj,0.98016936,0.01000,0.471
11
- 1,self_attn.q_proj,33.08531570,0.01000,0.476
12
- 1,self_attn.o_proj,0.11857565,0.01000,0.472
13
- 1,mlp.up_proj,24.87150764,0.01000,0.482
14
- 1,mlp.gate_proj,34.42536926,0.01000,0.479
15
- 1,mlp.down_proj,20.08703995,0.01000,1.996
16
- 2,self_attn.k_proj,28.00672913,0.01000,0.469
17
- 2,self_attn.v_proj,1.93891215,0.01000,0.468
18
- 2,self_attn.q_proj,55.34558868,0.01000,0.478
19
- 2,self_attn.o_proj,0.09205560,0.01000,0.470
20
- 2,mlp.up_proj,30.34683800,0.01000,0.479
21
- 2,mlp.gate_proj,49.06833267,0.01000,0.501
22
- 2,mlp.down_proj,0.42902470,0.01000,2.014
23
- 3,self_attn.k_proj,21.30574417,0.01000,0.473
24
- 3,self_attn.v_proj,2.84672379,0.01000,0.469
25
- 3,self_attn.q_proj,46.99731827,0.01000,0.474
26
- 3,self_attn.o_proj,0.16826847,0.01000,0.472
27
- 3,mlp.up_proj,38.18422699,0.01000,0.479
28
- 3,mlp.gate_proj,76.93240356,0.01000,0.477
29
- 3,mlp.down_proj,0.58045858,0.01000,1.992
30
- 4,self_attn.k_proj,24.07534409,0.01000,0.467
31
- 4,self_attn.v_proj,2.88128042,0.01000,0.468
32
- 4,self_attn.q_proj,50.19567108,0.01000,0.471
33
- 4,self_attn.o_proj,0.22788243,0.01000,0.471
34
- 4,mlp.up_proj,40.49008560,0.01000,0.480
35
- 4,mlp.gate_proj,88.19657898,0.01000,0.475
36
- 4,mlp.down_proj,0.64514506,0.01000,1.998
37
- 5,self_attn.k_proj,35.62877274,0.01000,0.473
38
- 5,self_attn.v_proj,2.56014800,0.01000,0.469
39
- 5,self_attn.q_proj,64.17327118,0.01000,0.478
40
- 5,self_attn.o_proj,0.23322049,0.01000,0.507
41
- 5,mlp.up_proj,42.68757629,0.01000,0.481
42
- 5,mlp.gate_proj,79.94951630,0.01000,0.480
43
- 5,mlp.down_proj,0.71073961,0.01000,2.018
44
- 6,self_attn.k_proj,32.50390625,0.01000,0.467
45
- 6,self_attn.v_proj,3.48658395,0.01000,0.467
46
- 6,self_attn.q_proj,50.43141556,0.01000,0.477
47
- 6,self_attn.o_proj,0.32279876,0.01000,0.473
48
- 6,mlp.up_proj,43.27687836,0.01000,0.481
49
- 6,mlp.gate_proj,79.23511505,0.01000,0.478
50
- 6,mlp.down_proj,0.72877872,0.01000,2.031
51
- 7,self_attn.k_proj,33.91743851,0.01000,0.475
52
- 7,self_attn.v_proj,4.19138622,0.01000,0.469
53
- 7,self_attn.q_proj,63.09676743,0.01000,0.482
54
- 7,self_attn.o_proj,0.31201273,0.01000,0.481
55
- 7,mlp.up_proj,48.48965454,0.01000,0.484
56
- 7,mlp.gate_proj,79.57895660,0.01000,0.493
57
- 7,mlp.down_proj,0.84213424,0.01000,1.988
58
- 8,self_attn.k_proj,42.15065765,0.01000,0.471
59
- 8,self_attn.v_proj,4.18408966,0.01000,0.469
60
- 8,self_attn.q_proj,69.34562683,0.01000,0.479
61
- 8,self_attn.o_proj,0.36488774,0.01000,0.819
62
- 8,mlp.up_proj,56.41041946,0.01000,0.480
63
- 8,mlp.gate_proj,89.70135498,0.01000,0.482
64
- 8,mlp.down_proj,1.05656636,0.01000,1.997
65
- 9,self_attn.k_proj,35.16567993,0.01000,0.469
66
- 9,self_attn.v_proj,5.07817173,0.01000,0.469
67
- 9,self_attn.q_proj,86.08435059,0.01000,0.470
68
- 9,self_attn.o_proj,0.62493443,0.01000,0.472
69
- 9,mlp.up_proj,64.02466583,0.01000,0.482
70
- 9,mlp.gate_proj,104.88742828,0.01000,0.485
71
- 9,mlp.down_proj,1.33367336,0.01000,2.080
72
- 10,self_attn.k_proj,43.94557190,0.01000,0.473
73
- 10,self_attn.v_proj,5.46747303,0.01000,0.471
74
- 10,self_attn.q_proj,92.83677673,0.01000,0.472
75
- 10,self_attn.o_proj,0.59127998,0.01000,0.470
76
- 10,mlp.up_proj,79.12550354,0.01000,0.480
77
- 10,mlp.gate_proj,125.22569275,0.01000,0.480
78
- 10,mlp.down_proj,1.87667823,0.01000,2.018
79
- 11,self_attn.k_proj,51.54523468,0.01000,0.472
80
- 11,self_attn.v_proj,5.58656693,0.01000,0.466
81
- 11,self_attn.q_proj,90.90484619,0.01000,0.474
82
- 11,self_attn.o_proj,0.48921397,0.01000,0.472
83
- 11,mlp.up_proj,96.23802185,0.01000,0.478
84
- 11,mlp.gate_proj,149.39210510,0.01000,0.478
85
- 11,mlp.down_proj,2.70367146,0.01000,2.027
86
- 12,self_attn.k_proj,49.63491821,0.01000,0.471
87
- 12,self_attn.v_proj,5.90558290,0.01000,0.466
88
- 12,self_attn.q_proj,93.67311096,0.01000,0.479
89
- 12,self_attn.o_proj,0.51877552,0.01000,0.469
90
- 12,mlp.up_proj,112.33209229,0.01000,0.478
91
- 12,mlp.gate_proj,165.25785828,0.01000,0.480
92
- 12,mlp.down_proj,4.04239559,0.01000,2.022
93
- 13,self_attn.k_proj,52.68299866,0.01000,0.470
94
- 13,self_attn.v_proj,10.03530312,0.01000,0.470
95
- 13,self_attn.q_proj,107.74049377,0.01000,0.477
96
- 13,self_attn.o_proj,0.80708027,0.01000,0.469
97
- 13,mlp.up_proj,133.44732666,0.01000,0.480
98
- 13,mlp.gate_proj,179.16419983,0.01000,0.479
99
- 13,mlp.down_proj,5.91810417,0.01000,2.132
100
- 14,self_attn.k_proj,49.46713257,0.01000,0.467
101
- 14,self_attn.v_proj,18.70763016,0.01000,0.465
102
- 14,self_attn.q_proj,103.55518341,0.01000,0.468
103
- 14,self_attn.o_proj,1.71897697,0.01000,0.470
104
- 14,mlp.up_proj,148.42242432,0.01000,0.479
105
- 14,mlp.gate_proj,214.55715942,0.01000,0.480
106
- 14,mlp.down_proj,7.16841698,0.01000,2.001
107
- 15,self_attn.k_proj,46.09174347,0.01000,0.473
108
- 15,self_attn.v_proj,20.09922600,0.01000,0.469
109
- 15,self_attn.q_proj,97.27299500,0.01000,0.473
110
- 15,self_attn.o_proj,4.08784580,0.01000,0.472
111
- 15,mlp.up_proj,185.49623108,0.01000,0.479
112
- 15,mlp.gate_proj,249.19700623,0.01000,0.481
113
- 15,mlp.down_proj,15.86927128,0.01000,2.031
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  layer,module,loss,samples,damp,time
2
+ 0,self_attn.k_proj,11.50315285,0.01000,1.200
3
+ 0,self_attn.v_proj,0.08434783,0.01000,0.942
4
+ 0,self_attn.q_proj,18.04431343,0.01000,0.974
5
+ 0,self_attn.o_proj,0.00746359,0.01000,0.954
6
+ 0,mlp.up_proj,6.94379950,0.01000,0.970
7
+ 0,mlp.gate_proj,8.43739128,0.01000,0.971
8
+ 0,mlp.down_proj,0.03982410,0.01000,4.106
9
+ 1,self_attn.k_proj,9.79508400,0.01000,0.958
10
+ 1,self_attn.v_proj,0.30334821,0.01000,0.964
11
+ 1,self_attn.q_proj,17.00190735,0.01000,0.977
12
+ 1,self_attn.o_proj,0.02521426,0.01000,0.967
13
+ 1,mlp.up_proj,14.35952663,0.01000,0.979
14
+ 1,mlp.gate_proj,17.07050514,0.01000,0.983
15
+ 1,mlp.down_proj,9.23333359,0.01000,4.086
16
+ 2,self_attn.k_proj,30.43294144,0.01000,0.950
17
+ 2,self_attn.v_proj,1.15037918,0.01000,0.970
18
+ 2,self_attn.q_proj,45.58643723,0.01000,0.964
19
+ 2,self_attn.o_proj,0.03051591,0.01000,0.967
20
+ 2,mlp.up_proj,18.03928757,0.01000,0.982
21
+ 2,mlp.gate_proj,22.90150261,0.01000,0.980
22
+ 2,mlp.down_proj,0.10555533,0.01000,4.078
23
+ 3,self_attn.k_proj,27.10592270,0.01000,1.535
24
+ 3,self_attn.v_proj,2.30466604,0.01000,0.947
25
+ 3,self_attn.q_proj,47.55756378,0.01000,0.963
26
+ 3,self_attn.o_proj,0.05661678,0.01000,0.964
27
+ 3,mlp.up_proj,27.88656998,0.01000,0.980
28
+ 3,mlp.gate_proj,40.16275024,0.01000,0.975
29
+ 3,mlp.down_proj,0.23143937,0.01000,4.022
30
+ 4,self_attn.k_proj,27.37422943,0.01000,0.948
31
+ 4,self_attn.v_proj,2.49580097,0.01000,0.970
32
+ 4,self_attn.q_proj,47.32018280,0.01000,0.967
33
+ 4,self_attn.o_proj,0.11866941,0.01000,0.966
34
+ 4,mlp.up_proj,34.95981979,0.01000,0.982
35
+ 4,mlp.gate_proj,57.44453812,0.01000,0.976
36
+ 4,mlp.down_proj,0.39879456,0.01000,4.011
37
+ 5,self_attn.k_proj,44.15615082,0.01000,0.947
38
+ 5,self_attn.v_proj,2.45797062,0.01000,0.943
39
+ 5,self_attn.q_proj,69.22374725,0.01000,0.957
40
+ 5,self_attn.o_proj,0.13216227,0.01000,0.961
41
+ 5,mlp.up_proj,44.49318695,0.01000,0.983
42
+ 5,mlp.gate_proj,71.99307251,0.01000,0.977
43
+ 5,mlp.down_proj,0.57819533,0.01000,4.072
44
+ 6,self_attn.k_proj,39.04605484,0.01000,0.945
45
+ 6,self_attn.v_proj,3.02949667,0.01000,0.941
46
+ 6,self_attn.q_proj,68.57389832,0.01000,0.968
47
+ 6,self_attn.o_proj,0.20356959,0.01000,0.961
48
+ 6,mlp.up_proj,51.34634399,0.01000,0.985
49
+ 6,mlp.gate_proj,83.72372437,0.01000,0.980
50
+ 6,mlp.down_proj,0.74748993,0.01000,3.988
51
+ 7,self_attn.k_proj,41.18905640,0.01000,0.951
52
+ 7,self_attn.v_proj,3.36715841,0.01000,0.944
53
+ 7,self_attn.q_proj,67.75445557,0.01000,1.042
54
+ 7,self_attn.o_proj,0.25776845,0.01000,0.961
55
+ 7,mlp.up_proj,54.56769562,0.01000,0.981
56
+ 7,mlp.gate_proj,83.44071960,0.01000,0.979
57
+ 7,mlp.down_proj,0.84187621,0.01000,4.054
58
+ 8,self_attn.k_proj,51.15142441,0.01000,0.957
59
+ 8,self_attn.v_proj,4.34867573,0.01000,0.976
60
+ 8,self_attn.q_proj,82.86448669,0.01000,0.967
61
+ 8,self_attn.o_proj,0.30111048,0.01000,0.960
62
+ 8,mlp.up_proj,57.04904175,0.01000,0.985
63
+ 8,mlp.gate_proj,88.49418640,0.01000,0.980
64
+ 8,mlp.down_proj,0.88172758,0.01000,4.088
65
+ 9,self_attn.k_proj,50.66983795,0.01000,0.950
66
+ 9,self_attn.v_proj,6.26596260,0.01000,1.005
67
+ 9,self_attn.q_proj,84.79583740,0.01000,0.964
68
+ 9,self_attn.o_proj,0.39540437,0.01000,0.961
69
+ 9,mlp.up_proj,59.31522369,0.01000,1.601
70
+ 9,mlp.gate_proj,92.27600861,0.01000,0.976
71
+ 9,mlp.down_proj,0.90846139,0.01000,4.001
72
+ 10,self_attn.k_proj,60.17391205,0.01000,0.953
73
+ 10,self_attn.v_proj,4.89551830,0.01000,0.994
74
+ 10,self_attn.q_proj,98.11512756,0.01000,0.967
75
+ 10,self_attn.o_proj,0.34281918,0.01000,0.960
76
+ 10,mlp.up_proj,62.74011230,0.01000,0.982
77
+ 10,mlp.gate_proj,90.72794342,0.01000,0.980
78
+ 10,mlp.down_proj,0.96149510,0.01000,3.990
79
+ 11,self_attn.k_proj,55.94424438,0.01000,0.952
80
+ 11,self_attn.v_proj,5.04172277,0.01000,0.943
81
+ 11,self_attn.q_proj,86.99108887,0.01000,0.971
82
+ 11,self_attn.o_proj,0.33490360,0.01000,0.991
83
+ 11,mlp.up_proj,65.00627899,0.01000,0.994
84
+ 11,mlp.gate_proj,90.79618835,0.01000,0.993
85
+ 11,mlp.down_proj,1.04935539,0.01000,4.030
86
+ 12,self_attn.k_proj,42.39485168,0.01000,0.950
87
+ 12,self_attn.v_proj,5.80718994,0.01000,0.952
88
+ 12,self_attn.q_proj,74.00858307,0.01000,0.970
89
+ 12,self_attn.o_proj,0.50482625,0.01000,0.958
90
+ 12,mlp.up_proj,66.45251465,0.01000,0.981
91
+ 12,mlp.gate_proj,87.68470764,0.01000,0.979
92
+ 12,mlp.down_proj,1.16635084,0.01000,4.013
93
+ 13,self_attn.k_proj,66.97894287,0.01000,0.957
94
+ 13,self_attn.v_proj,6.70645237,0.01000,0.947
95
+ 13,self_attn.q_proj,102.37200928,0.01000,0.969
96
+ 13,self_attn.o_proj,0.47240034,0.01000,0.969
97
+ 13,mlp.up_proj,72.47052765,0.01000,0.991
98
+ 13,mlp.gate_proj,95.45094299,0.01000,0.989
99
+ 13,mlp.down_proj,1.29506612,0.01000,4.084
100
+ 14,self_attn.k_proj,65.34709930,0.01000,0.953
101
+ 14,self_attn.v_proj,6.54276228,0.01000,0.978
102
+ 14,self_attn.q_proj,95.75817871,0.01000,0.963
103
+ 14,self_attn.o_proj,0.56570506,0.01000,0.988
104
+ 14,mlp.up_proj,77.22135162,0.01000,0.983
105
+ 14,mlp.gate_proj,107.80537415,0.01000,0.976
106
+ 14,mlp.down_proj,1.62657499,0.01000,4.082
107
+ 15,self_attn.k_proj,60.99359131,0.01000,0.948
108
+ 15,self_attn.v_proj,7.83463526,0.01000,1.000
109
+ 15,self_attn.q_proj,114.51682281,0.01000,1.005
110
+ 15,self_attn.o_proj,0.98620689,0.01000,0.966
111
+ 15,mlp.up_proj,82.91996765,0.01000,0.985
112
+ 15,mlp.gate_proj,122.00700378,0.01000,0.991
113
+ 15,mlp.down_proj,1.85101867,0.01000,4.006
114
+ 16,self_attn.k_proj,61.05425644,0.01000,0.950
115
+ 16,self_attn.v_proj,6.67351246,0.01000,0.953
116
+ 16,self_attn.q_proj,100.83403015,0.01000,0.961
117
+ 16,self_attn.o_proj,0.69056815,0.01000,0.962
118
+ 16,mlp.up_proj,88.04904938,0.01000,0.977
119
+ 16,mlp.gate_proj,137.32760620,0.01000,0.976
120
+ 16,mlp.down_proj,2.13290906,0.01000,4.007
121
+ 17,self_attn.k_proj,66.14538574,0.01000,0.956
122
+ 17,self_attn.v_proj,7.93916798,0.01000,0.963
123
+ 17,self_attn.q_proj,109.25076294,0.01000,0.973
124
+ 17,self_attn.o_proj,0.67304528,0.01000,0.972
125
+ 17,mlp.up_proj,92.86845398,0.01000,0.984
126
+ 17,mlp.gate_proj,148.43232727,0.01000,0.978
127
+ 17,mlp.down_proj,2.42646503,0.01000,4.064
128
+ 18,self_attn.k_proj,70.76748657,0.01000,0.952
129
+ 18,self_attn.v_proj,7.24831295,0.01000,0.944
130
+ 18,self_attn.q_proj,106.46402740,0.01000,0.962
131
+ 18,self_attn.o_proj,0.36137423,0.01000,0.962
132
+ 18,mlp.up_proj,93.72953796,0.01000,0.983
133
+ 18,mlp.gate_proj,151.20054626,0.01000,0.977
134
+ 18,mlp.down_proj,2.41029239,0.01000,4.024
135
+ 19,self_attn.k_proj,62.90951157,0.01000,0.952
136
+ 19,self_attn.v_proj,8.11475182,0.01000,0.975
137
+ 19,self_attn.q_proj,106.72672272,0.01000,0.960
138
+ 19,self_attn.o_proj,0.45973969,0.01000,0.959
139
+ 19,mlp.up_proj,96.43962860,0.01000,0.983
140
+ 19,mlp.gate_proj,158.58799744,0.01000,0.977
141
+ 19,mlp.down_proj,2.60704017,0.01000,4.036
142
+ 20,self_attn.k_proj,66.16004944,0.01000,0.952
143
+ 20,self_attn.v_proj,8.60017872,0.01000,0.960
144
+ 20,self_attn.q_proj,106.42929077,0.01000,0.975
145
+ 20,self_attn.o_proj,0.42740995,0.01000,0.966
146
+ 20,mlp.up_proj,104.03691864,0.01000,0.986
147
+ 20,mlp.gate_proj,169.39122009,0.01000,2.004
148
+ 20,mlp.down_proj,2.89748049,0.01000,4.059
149
+ 21,self_attn.k_proj,64.93406677,0.01000,0.948
150
+ 21,self_attn.v_proj,9.05480289,0.01000,0.959
151
+ 21,self_attn.q_proj,102.93681335,0.01000,0.966
152
+ 21,self_attn.o_proj,0.57246387,0.01000,0.967
153
+ 21,mlp.up_proj,109.80873108,0.01000,0.985
154
+ 21,mlp.gate_proj,180.10443115,0.01000,0.980
155
+ 21,mlp.down_proj,3.18497372,0.01000,4.002
156
+ 22,self_attn.k_proj,68.19435120,0.01000,0.956
157
+ 22,self_attn.v_proj,10.42789459,0.01000,0.948
158
+ 22,self_attn.q_proj,102.62083435,0.01000,0.959
159
+ 22,self_attn.o_proj,0.82003343,0.01000,0.959
160
+ 22,mlp.up_proj,115.58087921,0.01000,0.992
161
+ 22,mlp.gate_proj,187.88652039,0.01000,0.987
162
+ 22,mlp.down_proj,3.17831731,0.01000,3.990
163
+ 23,self_attn.k_proj,65.93269348,0.01000,0.962
164
+ 23,self_attn.v_proj,11.55161858,0.01000,0.955
165
+ 23,self_attn.q_proj,106.37304688,0.01000,0.966
166
+ 23,self_attn.o_proj,0.79565775,0.01000,0.963
167
+ 23,mlp.up_proj,122.20275116,0.01000,0.984
168
+ 23,mlp.gate_proj,197.79428101,0.01000,0.987
169
+ 23,mlp.down_proj,3.40926170,0.01000,4.092
170
+ 24,self_attn.k_proj,65.81439209,0.01000,0.960
171
+ 24,self_attn.v_proj,14.61132812,0.01000,0.975
172
+ 24,self_attn.q_proj,109.37326050,0.01000,0.963
173
+ 24,self_attn.o_proj,0.96981573,0.01000,0.967
174
+ 24,mlp.up_proj,131.75790405,0.01000,0.978
175
+ 24,mlp.gate_proj,213.20829773,0.01000,0.975
176
+ 24,mlp.down_proj,3.76813459,0.01000,4.002
177
+ 25,self_attn.k_proj,62.32714081,0.01000,0.951
178
+ 25,self_attn.v_proj,15.46690750,0.01000,0.965
179
+ 25,self_attn.q_proj,109.55595398,0.01000,0.976
180
+ 25,self_attn.o_proj,1.38681269,0.01000,0.964
181
+ 25,mlp.up_proj,140.07965088,0.01000,1.005
182
+ 25,mlp.gate_proj,226.35734558,0.01000,0.983
183
+ 25,mlp.down_proj,4.22223282,0.01000,4.021
184
+ 26,self_attn.k_proj,69.03659821,0.01000,0.956
185
+ 26,self_attn.v_proj,14.82448673,0.01000,0.947
186
+ 26,self_attn.q_proj,109.22200775,0.01000,1.008
187
+ 26,self_attn.o_proj,1.21192455,0.01000,0.973
188
+ 26,mlp.up_proj,151.80865479,0.01000,0.987
189
+ 26,mlp.gate_proj,245.63934326,0.01000,0.980
190
+ 26,mlp.down_proj,4.96812248,0.01000,3.998
191
+ 27,self_attn.k_proj,73.81638336,0.01000,0.952
192
+ 27,self_attn.v_proj,21.00315094,0.01000,0.944
193
+ 27,self_attn.q_proj,113.85871887,0.01000,0.963
194
+ 27,self_attn.o_proj,1.86845124,0.01000,0.964
195
+ 27,mlp.up_proj,164.77142334,0.01000,0.989
196
+ 27,mlp.gate_proj,265.11877441,0.01000,0.984
197
+ 27,mlp.down_proj,5.92129993,0.01000,4.004
198
+ 28,self_attn.k_proj,61.08584976,0.01000,0.954
199
+ 28,self_attn.v_proj,19.68507385,0.01000,0.968
200
+ 28,self_attn.q_proj,108.52185822,0.01000,0.962
201
+ 28,self_attn.o_proj,2.66862583,0.01000,0.966
202
+ 28,mlp.up_proj,185.00190735,0.01000,0.984
203
+ 28,mlp.gate_proj,284.06256104,0.01000,0.983
204
+ 28,mlp.down_proj,7.90922022,0.01000,4.070
205
+ 29,self_attn.k_proj,65.70452881,0.01000,0.952
206
+ 29,self_attn.v_proj,23.99268150,0.01000,0.972
207
+ 29,self_attn.q_proj,115.29597473,0.01000,0.973
208
+ 29,self_attn.o_proj,2.71498299,0.01000,0.969
209
+ 29,mlp.up_proj,195.88598633,0.01000,0.988
210
+ 29,mlp.gate_proj,286.53732300,0.01000,0.981
211
+ 29,mlp.down_proj,10.72382736,0.01000,4.052
212
+ 30,self_attn.k_proj,65.22695923,0.01000,0.952
213
+ 30,self_attn.v_proj,32.39274597,0.01000,0.952
214
+ 30,self_attn.q_proj,109.58857727,0.01000,0.975
215
+ 30,self_attn.o_proj,6.09502459,0.01000,0.970
216
+ 30,mlp.up_proj,210.70892334,0.01000,0.988
217
+ 30,mlp.gate_proj,316.56787109,0.01000,0.985
218
+ 30,mlp.down_proj,18.89646912,0.01000,4.010
219
+ 31,self_attn.k_proj,47.18705368,0.01000,0.956
220
+ 31,self_attn.v_proj,20.02719879,0.01000,0.950
221
+ 31,self_attn.q_proj,90.60032654,0.01000,0.971
222
+ 31,self_attn.o_proj,7.11097431,0.01000,0.965
223
+ 31,mlp.up_proj,202.48239136,0.01000,0.984
224
+ 31,mlp.gate_proj,296.04345703,0.01000,0.980
225
+ 31,mlp.down_proj,60.59933090,0.01000,4.000
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
3
- size 17209920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9400df98529060210393c40f08cb127f7c0df584338b3fbfdba8cf82a33c1ade
3
+ size 17210102
tokenizer_config.json CHANGED
@@ -2047,6 +2047,14 @@
2047
  "rstrip": false,
2048
  "single_word": false,
2049
  "special": true
 
 
 
 
 
 
 
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
 
2047
  "rstrip": false,
2048
  "single_word": false,
2049
  "special": true
2050
+ },
2051
+ "128256": {
2052
+ "content": "<pad>",
2053
+ "lstrip": false,
2054
+ "normalized": false,
2055
+ "rstrip": false,
2056
+ "single_word": false,
2057
+ "special": true
2058
  }
2059
  },
2060
  "bos_token": "<|begin_of_text|>",