zx-modelcloud commited on
Commit
370a9d9
·
verified ·
1 Parent(s): d784d77

Add files using upload-large-folder tool

Browse files
Files changed (5) hide show
  1. config.json +13 -7
  2. generation_config.json +12 -0
  3. model.safetensors +2 -2
  4. quant_log.csv +162 -15
  5. quantize_config.json +10 -4
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_attn_implementation_autoset": true,
3
  "_name_or_path": "/monster/data/model/Qwen1.5-1.8B-Chat",
4
  "architectures": [
5
  "Qwen2ForCausalLM"
@@ -22,10 +21,17 @@
22
  "checkpoint_format": "gptq",
23
  "desc_act": true,
24
  "dynamic": {
25
- "-:model\\.layers\\.([2-9]\\d*|[1-9]\\d+)\\..*": {},
26
- ".*\\.1\\..*": {
 
 
 
 
 
 
 
 
27
  "bits": 8,
28
- "desc_act": false,
29
  "group_size": 32
30
  },
31
  "lm_head": {
@@ -43,7 +49,7 @@
43
  "damp_percent": 0.01,
44
  "mse": 0.0,
45
  "quantizer": [
46
- "gptqmodel:1.8.2"
47
  ],
48
  "static_groups": false,
49
  "true_sequential": true,
@@ -56,10 +62,10 @@
56
  "rms_norm_eps": 1e-06,
57
  "rope_scaling": null,
58
  "rope_theta": 1000000.0,
59
- "sliding_window": null,
60
  "tie_word_embeddings": false,
61
  "torch_dtype": "bfloat16",
62
- "transformers_version": "4.48.2",
63
  "use_cache": true,
64
  "use_sliding_window": false,
65
  "vocab_size": 151936
 
1
  {
 
2
  "_name_or_path": "/monster/data/model/Qwen1.5-1.8B-Chat",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
 
21
  "checkpoint_format": "gptq",
22
  "desc_act": true,
23
  "dynamic": {
24
+ "-:model\\.layers\\.2\\..*": {},
25
+ ".*\\.1\\..*k_proj.*": {
26
+ "bits": 8,
27
+ "group_size": 32
28
+ },
29
+ ".*\\.1\\..*q_proj.*": {
30
+ "bits": 8,
31
+ "group_size": 32
32
+ },
33
+ ".*\\.1\\..*v_proj.*": {
34
  "bits": 8,
 
35
  "group_size": 32
36
  },
37
  "lm_head": {
 
49
  "damp_percent": 0.01,
50
  "mse": 0.0,
51
  "quantizer": [
52
+ "gptqmodel:2.0.0-dev"
53
  ],
54
  "static_groups": false,
55
  "true_sequential": true,
 
62
  "rms_norm_eps": 1e-06,
63
  "rope_scaling": null,
64
  "rope_theta": 1000000.0,
65
+ "sliding_window": 32768,
66
  "tie_word_embeddings": false,
67
  "torch_dtype": "bfloat16",
68
+ "transformers_version": "4.49.0",
69
  "use_cache": true,
70
  "use_sliding_window": false,
71
  "vocab_size": 151936
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "top_p": 0.8,
11
+ "transformers_version": "4.49.0"
12
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad0b688e3a726e4663da6916d11e188fa2767a4a95ea3f07b562f704cf8ec8ef
3
- size 3271099656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a52f3fb6998f54cad9cce6d11982ac4010b2666584a4c4bf53d6c55e9590aeb7
3
+ size 1677862584
quant_log.csv CHANGED
@@ -1,16 +1,163 @@
1
  layer,module,loss,damp,time
2
- 0,self_attn.k_proj,0.31380,0.01000,0.476
3
- 0,self_attn.v_proj,0.00746,0.01000,0.329
4
- 0,self_attn.q_proj,0.42213,0.01000,0.314
5
- 0,self_attn.o_proj,0.00029,0.01000,0.438
6
- 0,mlp.up_proj,1.62644,0.01000,0.363
7
- 0,mlp.gate_proj,1.55393,0.01000,0.355
8
- 0,mlp.down_proj,0.03734,0.01000,0.889
9
- 1,self_attn.k_proj,0.00605,0.01000,0.334
10
- 1,self_attn.v_proj,0.00082,0.01000,0.330
11
- 1,self_attn.q_proj,0.00601,0.01000,0.332
12
- 1,self_attn.o_proj,0.00002,0.01000,0.331
13
- 1,mlp.up_proj,0.00466,0.01000,0.374
14
- 1,mlp.gate_proj,0.00529,0.01000,0.374
15
- 1,mlp.down_proj,0.00024,0.01000,0.889
16
- 24,lm_head,31.60924,0.01000,5.797
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  layer,module,loss,damp,time
2
+ 0,self_attn.k_proj,1.52977,0.01000,0.419
3
+ 0,self_attn.v_proj,0.04002,0.01000,0.248
4
+ 0,self_attn.q_proj,2.05718,0.01000,0.239
5
+ 0,self_attn.o_proj,0.00179,0.01000,0.248
6
+ 0,mlp.up_proj,9.05834,0.01000,0.258
7
+ 0,mlp.gate_proj,8.63545,0.01000,0.244
8
+ 0,mlp.down_proj,0.19796,0.01000,0.723
9
+ 1,self_attn.k_proj,0.03884,0.01000,0.267
10
+ 1,self_attn.v_proj,0.00530,0.01000,0.253
11
+ 1,self_attn.q_proj,0.03862,0.01000,0.253
12
+ 1,self_attn.o_proj,0.04960,0.01000,0.250
13
+ 1,mlp.up_proj,12.92030,0.01000,0.251
14
+ 1,mlp.gate_proj,14.63847,0.01000,0.241
15
+ 1,mlp.down_proj,0.32817,0.01000,0.687
16
+ 3,self_attn.k_proj,62.73893,0.01000,0.254
17
+ 3,self_attn.v_proj,13.23958,0.01000,0.307
18
+ 3,self_attn.q_proj,57.28278,0.01000,0.260
19
+ 3,self_attn.o_proj,0.13773,0.01000,0.245
20
+ 3,mlp.up_proj,21.15522,0.01000,0.251
21
+ 3,mlp.gate_proj,24.74686,0.01000,0.243
22
+ 3,mlp.down_proj,0.53724,0.01000,0.727
23
+ 4,self_attn.k_proj,68.30590,0.01000,0.263
24
+ 4,self_attn.v_proj,15.82255,0.01000,0.247
25
+ 4,self_attn.q_proj,63.01257,0.01000,0.259
26
+ 4,self_attn.o_proj,0.22000,0.01000,0.242
27
+ 4,mlp.up_proj,28.74498,0.01000,0.253
28
+ 4,mlp.gate_proj,33.93200,0.01000,0.245
29
+ 4,mlp.down_proj,0.65790,0.01000,0.692
30
+ 5,self_attn.k_proj,60.91286,0.01000,0.271
31
+ 5,self_attn.v_proj,14.59481,0.01000,0.232
32
+ 5,self_attn.q_proj,56.24625,0.01000,0.232
33
+ 5,self_attn.o_proj,0.32792,0.01000,0.248
34
+ 5,mlp.up_proj,38.43754,0.01000,0.266
35
+ 5,mlp.gate_proj,44.36505,0.01000,0.245
36
+ 5,mlp.down_proj,1.13535,0.01000,0.690
37
+ 6,self_attn.k_proj,60.38514,0.01000,0.246
38
+ 6,self_attn.v_proj,16.79150,0.01000,0.234
39
+ 6,self_attn.q_proj,57.52007,0.01000,0.232
40
+ 6,self_attn.o_proj,0.55901,0.01000,0.293
41
+ 6,mlp.up_proj,46.22112,0.01000,0.321
42
+ 6,mlp.gate_proj,52.69062,0.01000,0.237
43
+ 6,mlp.down_proj,12.85980,0.01000,0.794
44
+ 7,self_attn.k_proj,68.91588,0.01000,0.244
45
+ 7,self_attn.v_proj,25.70186,0.01000,0.234
46
+ 7,self_attn.q_proj,74.04494,0.01000,0.336
47
+ 7,self_attn.o_proj,0.60068,0.01000,0.296
48
+ 7,mlp.up_proj,51.54780,0.01000,0.239
49
+ 7,mlp.gate_proj,56.22544,0.01000,0.232
50
+ 7,mlp.down_proj,2.15107,0.01000,0.670
51
+ 8,self_attn.k_proj,63.73604,0.01000,0.250
52
+ 8,self_attn.v_proj,24.49372,0.01000,0.232
53
+ 8,self_attn.q_proj,65.13046,0.01000,0.231
54
+ 8,self_attn.o_proj,0.90025,0.01000,0.237
55
+ 8,mlp.up_proj,54.74615,0.01000,0.302
56
+ 8,mlp.gate_proj,58.73801,0.01000,0.246
57
+ 8,mlp.down_proj,2.61607,0.01000,0.706
58
+ 9,self_attn.k_proj,68.32961,0.01000,0.322
59
+ 9,self_attn.v_proj,29.13957,0.01000,0.270
60
+ 9,self_attn.q_proj,68.92210,0.01000,0.238
61
+ 9,self_attn.o_proj,1.26286,0.01000,0.245
62
+ 9,mlp.up_proj,56.97021,0.01000,0.245
63
+ 9,mlp.gate_proj,57.39296,0.01000,0.230
64
+ 9,mlp.down_proj,2.88838,0.01000,0.670
65
+ 10,self_attn.k_proj,77.47882,0.01000,0.247
66
+ 10,self_attn.v_proj,37.03654,0.01000,0.242
67
+ 10,self_attn.q_proj,82.30052,0.01000,0.276
68
+ 10,self_attn.o_proj,1.59611,0.01000,0.258
69
+ 10,mlp.up_proj,60.05450,0.01000,0.261
70
+ 10,mlp.gate_proj,63.74242,0.01000,0.233
71
+ 10,mlp.down_proj,3.57756,0.01000,0.664
72
+ 11,self_attn.k_proj,76.88316,0.01000,0.302
73
+ 11,self_attn.v_proj,42.78612,0.01000,0.267
74
+ 11,self_attn.q_proj,81.16360,0.01000,0.301
75
+ 11,self_attn.o_proj,1.50892,0.01000,0.335
76
+ 11,mlp.up_proj,63.83591,0.01000,0.341
77
+ 11,mlp.gate_proj,65.37469,0.01000,0.296
78
+ 11,mlp.down_proj,4.61587,0.01000,0.771
79
+ 12,self_attn.k_proj,77.84145,0.01000,0.249
80
+ 12,self_attn.v_proj,43.29803,0.01000,0.257
81
+ 12,self_attn.q_proj,81.49866,0.01000,0.287
82
+ 12,self_attn.o_proj,2.36211,0.01000,0.259
83
+ 12,mlp.up_proj,67.92334,0.01000,0.292
84
+ 12,mlp.gate_proj,68.87761,0.01000,0.303
85
+ 12,mlp.down_proj,5.67846,0.01000,0.746
86
+ 13,self_attn.k_proj,78.39484,0.01000,0.267
87
+ 13,self_attn.v_proj,49.21562,0.01000,0.263
88
+ 13,self_attn.q_proj,78.76001,0.01000,0.272
89
+ 13,self_attn.o_proj,2.40954,0.01000,0.254
90
+ 13,mlp.up_proj,72.61395,0.01000,0.246
91
+ 13,mlp.gate_proj,72.00835,0.01000,0.236
92
+ 13,mlp.down_proj,7.55164,0.01000,0.774
93
+ 14,self_attn.k_proj,85.13994,0.01000,0.242
94
+ 14,self_attn.v_proj,65.58170,0.01000,0.230
95
+ 14,self_attn.q_proj,91.41071,0.01000,0.237
96
+ 14,self_attn.o_proj,3.37462,0.01000,0.240
97
+ 14,mlp.up_proj,83.58630,0.01000,0.242
98
+ 14,mlp.gate_proj,80.19814,0.01000,0.232
99
+ 14,mlp.down_proj,10.20866,0.01000,0.662
100
+ 15,self_attn.k_proj,82.94417,0.01000,0.244
101
+ 15,self_attn.v_proj,70.66344,0.01000,0.256
102
+ 15,self_attn.q_proj,86.81380,0.01000,0.230
103
+ 15,self_attn.o_proj,3.95524,0.01000,0.236
104
+ 15,mlp.up_proj,99.12436,0.01000,0.240
105
+ 15,mlp.gate_proj,92.91805,0.01000,0.286
106
+ 15,mlp.down_proj,13.83353,0.01000,0.701
107
+ 16,self_attn.k_proj,88.87656,0.01000,0.244
108
+ 16,self_attn.v_proj,75.56125,0.01000,0.243
109
+ 16,self_attn.q_proj,93.78792,0.01000,0.231
110
+ 16,self_attn.o_proj,3.89123,0.01000,0.239
111
+ 16,mlp.up_proj,116.95462,0.01000,0.241
112
+ 16,mlp.gate_proj,109.16769,0.01000,0.233
113
+ 16,mlp.down_proj,18.61803,0.01000,0.664
114
+ 17,self_attn.k_proj,88.49626,0.01000,0.244
115
+ 17,self_attn.v_proj,98.59828,0.01000,0.233
116
+ 17,self_attn.q_proj,97.10524,0.01000,0.231
117
+ 17,self_attn.o_proj,5.03318,0.01000,0.238
118
+ 17,mlp.up_proj,134.24248,0.01000,0.278
119
+ 17,mlp.gate_proj,125.89964,0.01000,0.247
120
+ 17,mlp.down_proj,22.90651,0.01000,0.655
121
+ 18,self_attn.k_proj,91.61066,0.01000,0.253
122
+ 18,self_attn.v_proj,120.96928,0.01000,0.253
123
+ 18,self_attn.q_proj,99.53651,0.01000,0.232
124
+ 18,self_attn.o_proj,8.69388,0.01000,0.236
125
+ 18,mlp.up_proj,156.05095,0.01000,0.240
126
+ 18,mlp.gate_proj,139.47502,0.01000,0.232
127
+ 18,mlp.down_proj,30.87194,0.01000,0.660
128
+ 19,self_attn.k_proj,96.57993,0.01000,0.241
129
+ 19,self_attn.v_proj,132.85014,0.01000,0.230
130
+ 19,self_attn.q_proj,105.32343,0.01000,0.230
131
+ 19,self_attn.o_proj,9.85187,0.01000,0.270
132
+ 19,mlp.up_proj,173.96132,0.01000,0.243
133
+ 19,mlp.gate_proj,152.68323,0.01000,0.232
134
+ 19,mlp.down_proj,42.00521,0.01000,0.674
135
+ 20,self_attn.k_proj,101.74275,0.01000,0.276
136
+ 20,self_attn.v_proj,147.70975,0.01000,0.231
137
+ 20,self_attn.q_proj,111.28523,0.01000,0.239
138
+ 20,self_attn.o_proj,8.89061,0.01000,0.244
139
+ 20,mlp.up_proj,188.34740,0.01000,0.242
140
+ 20,mlp.gate_proj,161.44333,0.01000,0.234
141
+ 20,mlp.down_proj,49.47644,0.01000,0.674
142
+ 21,self_attn.k_proj,104.71509,0.01000,0.254
143
+ 21,self_attn.v_proj,158.71223,0.01000,0.254
144
+ 21,self_attn.q_proj,115.99524,0.01000,0.265
145
+ 21,self_attn.o_proj,12.59802,0.01000,0.241
146
+ 21,mlp.up_proj,213.42955,0.01000,0.251
147
+ 21,mlp.gate_proj,180.52798,0.01000,0.282
148
+ 21,mlp.down_proj,63.05851,0.01000,0.709
149
+ 22,self_attn.k_proj,96.23979,0.01000,0.243
150
+ 22,self_attn.v_proj,150.83650,0.01000,0.230
151
+ 22,self_attn.q_proj,103.43996,0.01000,0.231
152
+ 22,self_attn.o_proj,18.46274,0.01000,0.235
153
+ 22,mlp.up_proj,230.47380,0.01000,0.239
154
+ 22,mlp.gate_proj,198.01071,0.01000,0.232
155
+ 22,mlp.down_proj,76.59824,0.01000,0.658
156
+ 23,self_attn.k_proj,99.04884,0.01000,0.241
157
+ 23,self_attn.v_proj,149.65315,0.01000,0.230
158
+ 23,self_attn.q_proj,97.98630,0.01000,0.258
159
+ 23,self_attn.o_proj,30.47421,0.01000,0.298
160
+ 23,mlp.up_proj,256.14508,0.01000,0.253
161
+ 23,mlp.gate_proj,221.90912,0.01000,0.232
162
+ 23,mlp.down_proj,158.18503,0.01000,0.668
163
+ 24,lm_head,132.93503,0.01000,3.311
quantize_config.json CHANGED
@@ -1,10 +1,16 @@
1
  {
2
  "bits": 4,
3
  "dynamic": {
4
- "-:model\\.layers\\.([2-9]\\d*|[1-9]\\d+)\\..*": {},
5
- ".*\\.1\\..*": {
 
 
 
 
 
 
 
6
  "bits": 8,
7
- "desc_act": false,
8
  "group_size": 32
9
  },
10
  "lm_head": {
@@ -24,7 +30,7 @@
24
  "pack_dtype": "int32",
25
  "meta": {
26
  "quantizer": [
27
- "gptqmodel:1.8.2"
28
  ],
29
  "uri": "https://github.com/modelcloud/gptqmodel",
30
  "damp_percent": 0.01,
 
1
  {
2
  "bits": 4,
3
  "dynamic": {
4
+ ".*\\.1\\..*q_proj.*": {
5
+ "bits": 8,
6
+ "group_size": 32
7
+ },
8
+ ".*\\.1\\..*k_proj.*": {
9
+ "bits": 8,
10
+ "group_size": 32
11
+ },
12
+ ".*\\.1\\..*v_proj.*": {
13
  "bits": 8,
 
14
  "group_size": 32
15
  },
16
  "lm_head": {
 
30
  "pack_dtype": "int32",
31
  "meta": {
32
  "quantizer": [
33
+ "gptqmodel:2.0.0-dev"
34
  ],
35
  "uri": "https://github.com/modelcloud/gptqmodel",
36
  "damp_percent": 0.01,