Upload 7 files
Browse files- .gitattributes +1 -0
- config.json +51 -0
- model.safetensors +3 -0
- quant_log.csv +449 -0
- quantize_config.json +21 -0
- special_tokens_map.json +23 -0
- tokenizer.json +3 -0
- tokenizer_config.json +195 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_attn_implementation_autoset": true,
|
3 |
+
"_name_or_path": "/home/mmv/.cache/huggingface/hub/models--FuseAI--FuseO1-DeepSeekR1-QwQ-SkyT1-Flash-32B-Preview/snapshots/60aac57f283c2aa50091efcc317801a961ca72dd",
|
4 |
+
"architectures": [
|
5 |
+
"Qwen2ForCausalLM"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 151643,
|
9 |
+
"eos_token_id": 151643,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 5120,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 27648,
|
14 |
+
"max_position_embeddings": 131072,
|
15 |
+
"max_window_layers": 64,
|
16 |
+
"model_type": "qwen2",
|
17 |
+
"num_attention_heads": 40,
|
18 |
+
"num_hidden_layers": 64,
|
19 |
+
"num_key_value_heads": 8,
|
20 |
+
"quantization_config": {
|
21 |
+
"bits": 4,
|
22 |
+
"checkpoint_format": "gptq",
|
23 |
+
"desc_act": true,
|
24 |
+
"dynamic": null,
|
25 |
+
"group_size": 32,
|
26 |
+
"lm_head": false,
|
27 |
+
"meta": {
|
28 |
+
"damp_auto_increment": 0.0015,
|
29 |
+
"damp_percent": 0.1,
|
30 |
+
"mse": 0.0,
|
31 |
+
"quantizer": [
|
32 |
+
"gptqmodel:1.7.3"
|
33 |
+
],
|
34 |
+
"static_groups": false,
|
35 |
+
"true_sequential": true,
|
36 |
+
"uri": "https://github.com/modelcloud/gptqmodel"
|
37 |
+
},
|
38 |
+
"quant_method": "gptq",
|
39 |
+
"sym": true
|
40 |
+
},
|
41 |
+
"rms_norm_eps": 1e-05,
|
42 |
+
"rope_scaling": null,
|
43 |
+
"rope_theta": 1000000.0,
|
44 |
+
"sliding_window": null,
|
45 |
+
"tie_word_embeddings": false,
|
46 |
+
"torch_dtype": "bfloat16",
|
47 |
+
"transformers_version": "4.48.1",
|
48 |
+
"use_cache": false,
|
49 |
+
"use_sliding_window": false,
|
50 |
+
"vocab_size": 152064
|
51 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd8f6726f87f31d9ac8fc5fab15eda1406054b05244b0f2fd07251e2bf4d2cb0
|
3 |
+
size 21172443232
|
quant_log.csv
ADDED
@@ -0,0 +1,449 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
layer,module,loss,damp,time
|
2 |
+
0,self_attn.k_proj,0.21932,0.10000,1.539
|
3 |
+
0,self_attn.v_proj,0.06990,0.10000,1.339
|
4 |
+
0,self_attn.q_proj,0.63353,0.10000,1.497
|
5 |
+
0,self_attn.o_proj,1.53644,0.10000,1.496
|
6 |
+
0,mlp.up_proj,0.48136,0.10000,2.423
|
7 |
+
0,mlp.gate_proj,0.52939,0.10000,2.411
|
8 |
+
0,mlp.down_proj,0.60040,0.10000,11.206
|
9 |
+
1,self_attn.k_proj,0.00747,0.10000,1.339
|
10 |
+
1,self_attn.v_proj,0.00383,0.10000,1.341
|
11 |
+
1,self_attn.q_proj,0.02565,0.10000,1.502
|
12 |
+
1,self_attn.o_proj,0.01501,0.10000,1.503
|
13 |
+
1,mlp.up_proj,2.40089,0.10000,2.445
|
14 |
+
1,mlp.gate_proj,6.17636,0.10000,2.421
|
15 |
+
1,mlp.down_proj,0.07097,0.10000,11.123
|
16 |
+
2,self_attn.k_proj,0.03215,0.10000,1.347
|
17 |
+
2,self_attn.v_proj,0.01367,0.10000,1.346
|
18 |
+
2,self_attn.q_proj,0.09015,0.10000,1.512
|
19 |
+
2,self_attn.o_proj,0.09266,0.10000,1.487
|
20 |
+
2,mlp.up_proj,5.20606,0.10000,2.403
|
21 |
+
2,mlp.gate_proj,10.58370,0.10000,2.400
|
22 |
+
2,mlp.down_proj,0.36352,0.10000,11.242
|
23 |
+
3,self_attn.k_proj,0.20204,0.10000,1.358
|
24 |
+
3,self_attn.v_proj,0.07010,0.10000,1.345
|
25 |
+
3,self_attn.q_proj,0.58422,0.10000,1.520
|
26 |
+
3,self_attn.o_proj,0.32670,0.10000,1.508
|
27 |
+
3,mlp.up_proj,6.34731,0.10000,2.424
|
28 |
+
3,mlp.gate_proj,13.98404,0.10000,2.424
|
29 |
+
3,mlp.down_proj,0.86336,0.10000,11.249
|
30 |
+
4,self_attn.k_proj,0.19773,0.10000,1.336
|
31 |
+
4,self_attn.v_proj,0.10230,0.10000,1.326
|
32 |
+
4,self_attn.q_proj,0.60618,0.10000,1.496
|
33 |
+
4,self_attn.o_proj,0.63086,0.10000,1.490
|
34 |
+
4,mlp.up_proj,16.42926,0.10000,2.387
|
35 |
+
4,mlp.gate_proj,32.29819,0.10000,2.408
|
36 |
+
4,mlp.down_proj,587.46689,0.10000,11.247
|
37 |
+
5,self_attn.k_proj,0.72354,0.10000,1.348
|
38 |
+
5,self_attn.v_proj,0.42462,0.10000,1.334
|
39 |
+
5,self_attn.q_proj,2.40297,0.10000,1.496
|
40 |
+
5,self_attn.o_proj,0.73659,0.10000,1.501
|
41 |
+
5,mlp.up_proj,26.50791,0.10000,2.416
|
42 |
+
5,mlp.gate_proj,49.79549,0.10000,2.393
|
43 |
+
5,mlp.down_proj,522.29541,0.10000,11.290
|
44 |
+
6,self_attn.k_proj,0.66616,0.10000,1.346
|
45 |
+
6,self_attn.v_proj,0.49659,0.10000,1.339
|
46 |
+
6,self_attn.q_proj,2.35563,0.10000,1.503
|
47 |
+
6,self_attn.o_proj,0.58956,0.10000,1.506
|
48 |
+
6,mlp.up_proj,42.32227,0.10000,2.401
|
49 |
+
6,mlp.gate_proj,75.13210,0.10000,2.409
|
50 |
+
6,mlp.down_proj,31.55573,0.10000,11.224
|
51 |
+
7,self_attn.k_proj,0.85074,0.10000,1.335
|
52 |
+
7,self_attn.v_proj,0.69796,0.10000,1.331
|
53 |
+
7,self_attn.q_proj,3.08233,0.10000,1.500
|
54 |
+
7,self_attn.o_proj,0.63950,0.10000,1.500
|
55 |
+
7,mlp.up_proj,49.91836,0.10000,2.395
|
56 |
+
7,mlp.gate_proj,89.78850,0.10000,2.408
|
57 |
+
7,mlp.down_proj,2.12929,0.10000,11.077
|
58 |
+
8,self_attn.k_proj,1.07405,0.10000,1.363
|
59 |
+
8,self_attn.v_proj,0.64949,0.10000,1.328
|
60 |
+
8,self_attn.q_proj,3.66730,0.10000,1.483
|
61 |
+
8,self_attn.o_proj,0.57933,0.10000,1.486
|
62 |
+
8,mlp.up_proj,30.98035,0.10000,2.376
|
63 |
+
8,mlp.gate_proj,54.68026,0.10000,2.375
|
64 |
+
8,mlp.down_proj,2.39296,0.10000,11.024
|
65 |
+
9,self_attn.k_proj,0.85622,0.10000,1.303
|
66 |
+
9,self_attn.v_proj,0.70850,0.10000,1.290
|
67 |
+
9,self_attn.q_proj,3.05020,0.10000,1.439
|
68 |
+
9,self_attn.o_proj,0.97835,0.10000,1.446
|
69 |
+
9,mlp.up_proj,12.53627,0.10000,2.276
|
70 |
+
9,mlp.gate_proj,13.44056,0.10000,2.272
|
71 |
+
9,mlp.down_proj,2.59464,0.10000,10.920
|
72 |
+
10,self_attn.k_proj,1.16653,0.10000,1.312
|
73 |
+
10,self_attn.v_proj,0.90903,0.10000,1.360
|
74 |
+
10,self_attn.q_proj,4.31524,0.10000,1.433
|
75 |
+
10,self_attn.o_proj,0.95127,0.10000,1.505
|
76 |
+
10,mlp.up_proj,14.21657,0.10000,2.277
|
77 |
+
10,mlp.gate_proj,15.31685,0.10000,2.270
|
78 |
+
10,mlp.down_proj,3.00897,0.10000,10.969
|
79 |
+
11,self_attn.k_proj,0.94783,0.10000,1.298
|
80 |
+
11,self_attn.v_proj,0.68951,0.10000,1.316
|
81 |
+
11,self_attn.q_proj,3.46378,0.10000,1.445
|
82 |
+
11,self_attn.o_proj,1.90921,0.10000,1.447
|
83 |
+
11,mlp.up_proj,19.40327,0.10000,2.280
|
84 |
+
11,mlp.gate_proj,23.92704,0.10000,2.285
|
85 |
+
11,mlp.down_proj,2.97715,0.10000,10.946
|
86 |
+
12,self_attn.k_proj,1.08602,0.10000,1.291
|
87 |
+
12,self_attn.v_proj,0.74431,0.10000,1.290
|
88 |
+
12,self_attn.q_proj,4.01792,0.10000,1.438
|
89 |
+
12,self_attn.o_proj,1.80497,0.10000,1.432
|
90 |
+
12,mlp.up_proj,16.91254,0.10000,2.273
|
91 |
+
12,mlp.gate_proj,18.04272,0.10000,2.274
|
92 |
+
12,mlp.down_proj,3.48199,0.10000,11.011
|
93 |
+
13,self_attn.k_proj,1.22753,0.10000,1.290
|
94 |
+
13,self_attn.v_proj,0.89010,0.10000,1.307
|
95 |
+
13,self_attn.q_proj,4.36018,0.10000,1.439
|
96 |
+
13,self_attn.o_proj,1.75457,0.10000,1.439
|
97 |
+
13,mlp.up_proj,19.40314,0.10000,2.292
|
98 |
+
13,mlp.gate_proj,20.87128,0.10000,2.309
|
99 |
+
13,mlp.down_proj,3.92058,0.10000,11.225
|
100 |
+
14,self_attn.k_proj,1.47310,0.10000,1.327
|
101 |
+
14,self_attn.v_proj,0.97991,0.10000,1.304
|
102 |
+
14,self_attn.q_proj,5.31070,0.10000,1.433
|
103 |
+
14,self_attn.o_proj,1.86601,0.10000,1.513
|
104 |
+
14,mlp.up_proj,20.50224,0.10000,2.349
|
105 |
+
14,mlp.gate_proj,22.18833,0.10000,2.281
|
106 |
+
14,mlp.down_proj,4.27567,0.10000,11.369
|
107 |
+
15,self_attn.k_proj,1.24926,0.10000,1.348
|
108 |
+
15,self_attn.v_proj,0.96484,0.10000,1.370
|
109 |
+
15,self_attn.q_proj,4.50831,0.10000,1.576
|
110 |
+
15,self_attn.o_proj,2.08441,0.10000,1.527
|
111 |
+
15,mlp.up_proj,21.55517,0.10000,2.360
|
112 |
+
15,mlp.gate_proj,24.37054,0.10000,2.485
|
113 |
+
15,mlp.down_proj,4.66921,0.10000,11.144
|
114 |
+
16,self_attn.k_proj,1.20586,0.10000,1.367
|
115 |
+
16,self_attn.v_proj,0.74171,0.10000,1.377
|
116 |
+
16,self_attn.q_proj,4.09036,0.10000,1.525
|
117 |
+
16,self_attn.o_proj,1.66565,0.10000,1.456
|
118 |
+
16,mlp.up_proj,20.58864,0.10000,2.293
|
119 |
+
16,mlp.gate_proj,22.13742,0.10000,2.317
|
120 |
+
16,mlp.down_proj,4.32864,0.10000,11.065
|
121 |
+
17,self_attn.k_proj,1.55755,0.10000,1.392
|
122 |
+
17,self_attn.v_proj,0.92695,0.10000,1.403
|
123 |
+
17,self_attn.q_proj,5.36392,0.10000,1.487
|
124 |
+
17,self_attn.o_proj,1.68428,0.10000,1.561
|
125 |
+
17,mlp.up_proj,20.66038,0.10000,2.341
|
126 |
+
17,mlp.gate_proj,21.92830,0.10000,2.327
|
127 |
+
17,mlp.down_proj,4.24783,0.10000,11.208
|
128 |
+
18,self_attn.k_proj,1.60531,0.10000,1.404
|
129 |
+
18,self_attn.v_proj,0.96575,0.10000,1.319
|
130 |
+
18,self_attn.q_proj,5.49226,0.10000,1.459
|
131 |
+
18,self_attn.o_proj,1.48581,0.10000,1.522
|
132 |
+
18,mlp.up_proj,20.34096,0.10000,2.379
|
133 |
+
18,mlp.gate_proj,21.43189,0.10000,2.457
|
134 |
+
18,mlp.down_proj,4.20860,0.10000,10.987
|
135 |
+
19,self_attn.k_proj,1.38301,0.10000,1.424
|
136 |
+
19,self_attn.v_proj,0.94416,0.10000,1.473
|
137 |
+
19,self_attn.q_proj,5.01696,0.10000,1.593
|
138 |
+
19,self_attn.o_proj,1.21167,0.10000,1.650
|
139 |
+
19,mlp.up_proj,20.60382,0.10000,2.310
|
140 |
+
19,mlp.gate_proj,21.65518,0.10000,2.306
|
141 |
+
19,mlp.down_proj,4.24532,0.10000,11.253
|
142 |
+
20,self_attn.k_proj,1.67529,0.10000,1.375
|
143 |
+
20,self_attn.v_proj,0.97326,0.10000,1.394
|
144 |
+
20,self_attn.q_proj,5.80866,0.10000,1.638
|
145 |
+
20,self_attn.o_proj,2.01039,0.10000,1.520
|
146 |
+
20,mlp.up_proj,20.08991,0.10000,2.333
|
147 |
+
20,mlp.gate_proj,20.81839,0.10000,2.396
|
148 |
+
20,mlp.down_proj,4.39435,0.10000,11.599
|
149 |
+
21,self_attn.k_proj,1.37911,0.10000,1.429
|
150 |
+
21,self_attn.v_proj,0.84209,0.10000,1.401
|
151 |
+
21,self_attn.q_proj,4.83675,0.10000,1.528
|
152 |
+
21,self_attn.o_proj,1.99481,0.10000,1.546
|
153 |
+
21,mlp.up_proj,19.99206,0.10000,2.361
|
154 |
+
21,mlp.gate_proj,20.76938,0.10000,2.369
|
155 |
+
21,mlp.down_proj,4.42359,0.10000,11.515
|
156 |
+
22,self_attn.k_proj,1.50769,0.10000,1.295
|
157 |
+
22,self_attn.v_proj,1.15672,0.10000,1.275
|
158 |
+
22,self_attn.q_proj,5.48060,0.10000,1.425
|
159 |
+
22,self_attn.o_proj,2.49873,0.10000,1.456
|
160 |
+
22,mlp.up_proj,21.26843,0.10000,2.309
|
161 |
+
22,mlp.gate_proj,22.03465,0.10000,2.334
|
162 |
+
22,mlp.down_proj,4.79118,0.10000,10.968
|
163 |
+
23,self_attn.k_proj,1.44571,0.10000,1.375
|
164 |
+
23,self_attn.v_proj,1.18315,0.10000,1.384
|
165 |
+
23,self_attn.q_proj,5.38706,0.10000,1.494
|
166 |
+
23,self_attn.o_proj,2.65820,0.10000,1.488
|
167 |
+
23,mlp.up_proj,22.39951,0.10000,2.343
|
168 |
+
23,mlp.gate_proj,23.49767,0.10000,2.310
|
169 |
+
23,mlp.down_proj,5.14678,0.10000,11.171
|
170 |
+
24,self_attn.k_proj,1.88726,0.10000,1.306
|
171 |
+
24,self_attn.v_proj,1.25504,0.10000,1.299
|
172 |
+
24,self_attn.q_proj,6.79697,0.10000,1.547
|
173 |
+
24,self_attn.o_proj,2.37069,0.10000,1.465
|
174 |
+
24,mlp.up_proj,23.00431,0.10000,2.300
|
175 |
+
24,mlp.gate_proj,23.90416,0.10000,2.303
|
176 |
+
24,mlp.down_proj,5.33596,0.10000,11.077
|
177 |
+
25,self_attn.k_proj,2.08859,0.10000,1.484
|
178 |
+
25,self_attn.v_proj,1.52274,0.10000,1.291
|
179 |
+
25,self_attn.q_proj,7.71944,0.10000,1.448
|
180 |
+
25,self_attn.o_proj,2.59465,0.10000,1.461
|
181 |
+
25,mlp.up_proj,23.82350,0.10000,2.295
|
182 |
+
25,mlp.gate_proj,24.36376,0.10000,2.270
|
183 |
+
25,mlp.down_proj,6.14879,0.10000,10.939
|
184 |
+
26,self_attn.k_proj,1.85201,0.10000,1.303
|
185 |
+
26,self_attn.v_proj,1.08654,0.10000,1.297
|
186 |
+
26,self_attn.q_proj,6.72261,0.10000,1.435
|
187 |
+
26,self_attn.o_proj,2.70091,0.10000,1.458
|
188 |
+
26,mlp.up_proj,24.94679,0.10000,2.327
|
189 |
+
26,mlp.gate_proj,25.21166,0.10000,2.314
|
190 |
+
26,mlp.down_proj,6.65333,0.10000,10.951
|
191 |
+
27,self_attn.k_proj,1.80303,0.10000,1.292
|
192 |
+
27,self_attn.v_proj,1.11704,0.10000,1.286
|
193 |
+
27,self_attn.q_proj,6.16689,0.10000,1.433
|
194 |
+
27,self_attn.o_proj,4.27886,0.10000,1.454
|
195 |
+
27,mlp.up_proj,25.90349,0.10000,2.314
|
196 |
+
27,mlp.gate_proj,26.13162,0.10000,2.334
|
197 |
+
27,mlp.down_proj,7.54102,0.10000,10.965
|
198 |
+
28,self_attn.k_proj,1.69771,0.10000,1.298
|
199 |
+
28,self_attn.v_proj,1.60528,0.10000,1.290
|
200 |
+
28,self_attn.q_proj,6.53194,0.10000,1.439
|
201 |
+
28,self_attn.o_proj,4.20230,0.10000,1.486
|
202 |
+
28,mlp.up_proj,28.05067,0.10000,2.303
|
203 |
+
28,mlp.gate_proj,28.02932,0.10000,2.279
|
204 |
+
28,mlp.down_proj,8.23658,0.10000,10.941
|
205 |
+
29,self_attn.k_proj,2.46457,0.10000,1.294
|
206 |
+
29,self_attn.v_proj,1.97819,0.10000,1.292
|
207 |
+
29,self_attn.q_proj,8.90249,0.10000,1.499
|
208 |
+
29,self_attn.o_proj,5.20760,0.10000,1.455
|
209 |
+
29,mlp.up_proj,30.33147,0.10000,2.388
|
210 |
+
29,mlp.gate_proj,30.10080,0.10000,2.323
|
211 |
+
29,mlp.down_proj,9.00467,0.10000,11.021
|
212 |
+
30,self_attn.k_proj,2.05785,0.10000,1.306
|
213 |
+
30,self_attn.v_proj,1.95388,0.10000,1.304
|
214 |
+
30,self_attn.q_proj,7.90785,0.10000,1.451
|
215 |
+
30,self_attn.o_proj,6.99206,0.10000,1.456
|
216 |
+
30,mlp.up_proj,32.67397,0.10000,2.312
|
217 |
+
30,mlp.gate_proj,32.30062,0.10000,2.301
|
218 |
+
30,mlp.down_proj,10.09964,0.10000,10.901
|
219 |
+
31,self_attn.k_proj,2.18536,0.10000,1.306
|
220 |
+
31,self_attn.v_proj,1.97777,0.10000,1.307
|
221 |
+
31,self_attn.q_proj,8.18490,0.10000,1.453
|
222 |
+
31,self_attn.o_proj,4.57464,0.10000,1.462
|
223 |
+
31,mlp.up_proj,35.59037,0.10000,2.315
|
224 |
+
31,mlp.gate_proj,35.25336,0.10000,2.303
|
225 |
+
31,mlp.down_proj,10.93689,0.10000,11.025
|
226 |
+
32,self_attn.k_proj,2.10946,0.10000,1.316
|
227 |
+
32,self_attn.v_proj,1.52154,0.10000,1.301
|
228 |
+
32,self_attn.q_proj,7.29806,0.10000,1.456
|
229 |
+
32,self_attn.o_proj,4.84362,0.10000,1.458
|
230 |
+
32,mlp.up_proj,39.63444,0.10000,2.325
|
231 |
+
32,mlp.gate_proj,41.30604,0.10000,2.348
|
232 |
+
32,mlp.down_proj,10.75069,0.10000,10.995
|
233 |
+
33,self_attn.k_proj,2.25656,0.10000,1.310
|
234 |
+
33,self_attn.v_proj,1.63852,0.10000,1.295
|
235 |
+
33,self_attn.q_proj,8.25086,0.10000,1.445
|
236 |
+
33,self_attn.o_proj,4.94513,0.10000,1.472
|
237 |
+
33,mlp.up_proj,37.36087,0.10000,2.379
|
238 |
+
33,mlp.gate_proj,38.17615,0.10000,2.323
|
239 |
+
33,mlp.down_proj,10.10525,0.10000,11.033
|
240 |
+
34,self_attn.k_proj,2.18897,0.10000,1.315
|
241 |
+
34,self_attn.v_proj,1.79533,0.10000,1.352
|
242 |
+
34,self_attn.q_proj,8.13230,0.10000,1.478
|
243 |
+
34,self_attn.o_proj,6.20360,0.10000,1.480
|
244 |
+
34,mlp.up_proj,36.49163,0.10000,2.371
|
245 |
+
34,mlp.gate_proj,36.59039,0.10000,2.332
|
246 |
+
34,mlp.down_proj,10.11572,0.10000,11.043
|
247 |
+
35,self_attn.k_proj,2.29760,0.10000,1.313
|
248 |
+
35,self_attn.v_proj,1.81589,0.10000,1.341
|
249 |
+
35,self_attn.q_proj,9.18513,0.10000,1.500
|
250 |
+
35,self_attn.o_proj,4.96898,0.10000,1.458
|
251 |
+
35,mlp.up_proj,37.04196,0.10000,2.301
|
252 |
+
35,mlp.gate_proj,36.97671,0.10000,2.305
|
253 |
+
35,mlp.down_proj,10.21576,0.10000,10.956
|
254 |
+
36,self_attn.k_proj,2.35134,0.10000,1.306
|
255 |
+
36,self_attn.v_proj,1.76898,0.10000,1.294
|
256 |
+
36,self_attn.q_proj,8.95750,0.10000,1.437
|
257 |
+
36,self_attn.o_proj,6.40625,0.10000,1.454
|
258 |
+
36,mlp.up_proj,35.14434,0.10000,2.283
|
259 |
+
36,mlp.gate_proj,33.75306,0.10000,2.299
|
260 |
+
36,mlp.down_proj,10.16060,0.10000,10.978
|
261 |
+
37,self_attn.k_proj,2.09853,0.10000,1.304
|
262 |
+
37,self_attn.v_proj,1.62890,0.10000,1.298
|
263 |
+
37,self_attn.q_proj,7.87804,0.10000,1.439
|
264 |
+
37,self_attn.o_proj,5.14967,0.10000,1.454
|
265 |
+
37,mlp.up_proj,34.78158,0.10000,2.320
|
266 |
+
37,mlp.gate_proj,33.21568,0.10000,2.294
|
267 |
+
37,mlp.down_proj,9.65291,0.10000,10.972
|
268 |
+
38,self_attn.k_proj,2.25152,0.10000,1.308
|
269 |
+
38,self_attn.v_proj,2.25329,0.10000,1.299
|
270 |
+
38,self_attn.q_proj,8.57306,0.10000,1.440
|
271 |
+
38,self_attn.o_proj,5.87199,0.10000,1.461
|
272 |
+
38,mlp.up_proj,35.01609,0.10000,2.377
|
273 |
+
38,mlp.gate_proj,33.53525,0.10000,2.297
|
274 |
+
38,mlp.down_proj,10.37327,0.10000,10.891
|
275 |
+
39,self_attn.k_proj,2.06724,0.10000,1.294
|
276 |
+
39,self_attn.v_proj,2.29155,0.10000,1.294
|
277 |
+
39,self_attn.q_proj,8.40444,0.10000,1.458
|
278 |
+
39,self_attn.o_proj,6.23639,0.10000,1.498
|
279 |
+
39,mlp.up_proj,35.45480,0.10000,2.320
|
280 |
+
39,mlp.gate_proj,35.16239,0.10000,2.309
|
281 |
+
39,mlp.down_proj,10.17532,0.10000,11.038
|
282 |
+
40,self_attn.k_proj,2.55298,0.10000,1.353
|
283 |
+
40,self_attn.v_proj,2.03756,0.10000,1.308
|
284 |
+
40,self_attn.q_proj,9.32236,0.10000,1.463
|
285 |
+
40,self_attn.o_proj,7.15637,0.10000,1.471
|
286 |
+
40,mlp.up_proj,34.27767,0.10000,2.325
|
287 |
+
40,mlp.gate_proj,33.71945,0.10000,2.319
|
288 |
+
40,mlp.down_proj,9.93580,0.10000,11.038
|
289 |
+
41,self_attn.k_proj,2.58491,0.10000,1.303
|
290 |
+
41,self_attn.v_proj,2.53599,0.10000,1.292
|
291 |
+
41,self_attn.q_proj,10.17238,0.10000,1.443
|
292 |
+
41,self_attn.o_proj,7.04449,0.10000,1.450
|
293 |
+
41,mlp.up_proj,35.32753,0.10000,2.307
|
294 |
+
41,mlp.gate_proj,33.75855,0.10000,2.297
|
295 |
+
41,mlp.down_proj,11.40161,0.10000,10.972
|
296 |
+
42,self_attn.k_proj,2.35824,0.10000,1.298
|
297 |
+
42,self_attn.v_proj,1.72195,0.10000,1.305
|
298 |
+
42,self_attn.q_proj,9.03940,0.10000,1.455
|
299 |
+
42,self_attn.o_proj,6.03776,0.10000,1.460
|
300 |
+
42,mlp.up_proj,37.64269,0.10000,2.303
|
301 |
+
42,mlp.gate_proj,35.07131,0.10000,2.304
|
302 |
+
42,mlp.down_proj,13.06790,0.10000,10.952
|
303 |
+
43,self_attn.k_proj,2.28732,0.10000,1.300
|
304 |
+
43,self_attn.v_proj,1.95555,0.10000,1.298
|
305 |
+
43,self_attn.q_proj,8.47664,0.10000,1.451
|
306 |
+
43,self_attn.o_proj,8.52545,0.10000,1.460
|
307 |
+
43,mlp.up_proj,39.51856,0.10000,2.327
|
308 |
+
43,mlp.gate_proj,36.65776,0.10000,2.308
|
309 |
+
43,mlp.down_proj,16.02491,0.10000,11.016
|
310 |
+
44,self_attn.k_proj,1.93263,0.10000,1.304
|
311 |
+
44,self_attn.v_proj,2.67497,0.10000,1.292
|
312 |
+
44,self_attn.q_proj,8.30756,0.10000,1.441
|
313 |
+
44,self_attn.o_proj,9.71007,0.10000,1.448
|
314 |
+
44,mlp.up_proj,39.93449,0.10000,2.340
|
315 |
+
44,mlp.gate_proj,36.77854,0.10000,2.293
|
316 |
+
44,mlp.down_proj,16.62127,0.10000,11.042
|
317 |
+
45,self_attn.k_proj,2.33465,0.10000,1.299
|
318 |
+
45,self_attn.v_proj,2.81778,0.10000,1.294
|
319 |
+
45,self_attn.q_proj,9.41949,0.10000,1.457
|
320 |
+
45,self_attn.o_proj,12.23259,0.10000,1.454
|
321 |
+
45,mlp.up_proj,41.66721,0.10000,2.273
|
322 |
+
45,mlp.gate_proj,38.44549,0.10000,2.279
|
323 |
+
45,mlp.down_proj,18.68768,0.10000,10.922
|
324 |
+
46,self_attn.k_proj,2.13504,0.10000,1.302
|
325 |
+
46,self_attn.v_proj,3.00420,0.10000,1.302
|
326 |
+
46,self_attn.q_proj,8.96905,0.10000,1.438
|
327 |
+
46,self_attn.o_proj,14.64149,0.10000,1.455
|
328 |
+
46,mlp.up_proj,44.77886,0.10000,2.297
|
329 |
+
46,mlp.gate_proj,41.66031,0.10000,2.286
|
330 |
+
46,mlp.down_proj,22.01645,0.10000,10.905
|
331 |
+
47,self_attn.k_proj,2.24537,0.10000,1.301
|
332 |
+
47,self_attn.v_proj,2.84377,0.10000,1.297
|
333 |
+
47,self_attn.q_proj,9.31574,0.10000,1.445
|
334 |
+
47,self_attn.o_proj,10.03240,0.10000,1.456
|
335 |
+
47,mlp.up_proj,50.59407,0.10000,2.298
|
336 |
+
47,mlp.gate_proj,47.86652,0.10000,2.293
|
337 |
+
47,mlp.down_proj,26.41652,0.10000,10.946
|
338 |
+
48,self_attn.k_proj,2.44420,0.10000,1.303
|
339 |
+
48,self_attn.v_proj,3.79191,0.10000,1.305
|
340 |
+
48,self_attn.q_proj,10.23924,0.10000,1.456
|
341 |
+
48,self_attn.o_proj,12.53692,0.10000,1.577
|
342 |
+
48,mlp.up_proj,55.46630,0.10000,2.330
|
343 |
+
48,mlp.gate_proj,53.09388,0.10000,2.330
|
344 |
+
48,mlp.down_proj,30.89529,0.10000,10.949
|
345 |
+
49,self_attn.k_proj,2.52239,0.10000,1.298
|
346 |
+
49,self_attn.v_proj,3.65270,0.10000,1.292
|
347 |
+
49,self_attn.q_proj,10.91024,0.10000,1.455
|
348 |
+
49,self_attn.o_proj,14.26922,0.10000,1.465
|
349 |
+
49,mlp.up_proj,66.36527,0.10000,2.305
|
350 |
+
49,mlp.gate_proj,64.04491,0.10000,2.290
|
351 |
+
49,mlp.down_proj,43.17512,0.10000,10.985
|
352 |
+
50,self_attn.k_proj,2.66782,0.10000,1.305
|
353 |
+
50,self_attn.v_proj,4.38692,0.10000,1.287
|
354 |
+
50,self_attn.q_proj,11.92033,0.10000,1.446
|
355 |
+
50,self_attn.o_proj,11.55887,0.10000,1.456
|
356 |
+
50,mlp.up_proj,76.91088,0.10000,2.291
|
357 |
+
50,mlp.gate_proj,76.20550,0.10000,2.302
|
358 |
+
50,mlp.down_proj,49.31875,0.10000,10.959
|
359 |
+
51,self_attn.k_proj,2.71374,0.10000,1.308
|
360 |
+
51,self_attn.v_proj,3.84296,0.10000,1.306
|
361 |
+
51,self_attn.q_proj,11.35158,0.10000,1.445
|
362 |
+
51,self_attn.o_proj,21.91198,0.10000,1.449
|
363 |
+
51,mlp.up_proj,85.67807,0.10000,2.291
|
364 |
+
51,mlp.gate_proj,86.28110,0.10000,2.314
|
365 |
+
51,mlp.down_proj,57.13311,0.10000,10.955
|
366 |
+
52,self_attn.k_proj,2.95962,0.10000,1.308
|
367 |
+
52,self_attn.v_proj,6.29919,0.10000,1.293
|
368 |
+
52,self_attn.q_proj,13.32253,0.10000,1.445
|
369 |
+
52,self_attn.o_proj,17.50098,0.10000,1.452
|
370 |
+
52,mlp.up_proj,93.98927,0.10000,2.319
|
371 |
+
52,mlp.gate_proj,94.03600,0.10000,2.299
|
372 |
+
52,mlp.down_proj,66.89360,0.10000,10.940
|
373 |
+
53,self_attn.k_proj,3.41568,0.10000,1.307
|
374 |
+
53,self_attn.v_proj,6.68261,0.10000,1.297
|
375 |
+
53,self_attn.q_proj,14.38581,0.10000,1.444
|
376 |
+
53,self_attn.o_proj,20.21694,0.10000,1.452
|
377 |
+
53,mlp.up_proj,104.72342,0.10000,2.307
|
378 |
+
53,mlp.gate_proj,105.06948,0.10000,2.329
|
379 |
+
53,mlp.down_proj,75.09118,0.10000,10.924
|
380 |
+
54,self_attn.k_proj,3.36520,0.10000,1.305
|
381 |
+
54,self_attn.v_proj,6.15906,0.10000,1.284
|
382 |
+
54,self_attn.q_proj,14.19391,0.10000,1.433
|
383 |
+
54,self_attn.o_proj,17.05255,0.10000,1.437
|
384 |
+
54,mlp.up_proj,116.01665,0.10000,2.269
|
385 |
+
54,mlp.gate_proj,115.23258,0.10000,2.277
|
386 |
+
54,mlp.down_proj,81.21964,0.10000,10.906
|
387 |
+
55,self_attn.k_proj,3.17986,0.10000,1.309
|
388 |
+
55,self_attn.v_proj,6.37354,0.10000,1.322
|
389 |
+
55,self_attn.q_proj,14.36818,0.10000,1.448
|
390 |
+
55,self_attn.o_proj,23.59741,0.10000,1.465
|
391 |
+
55,mlp.up_proj,127.39666,0.10000,2.319
|
392 |
+
55,mlp.gate_proj,124.96896,0.10000,2.297
|
393 |
+
55,mlp.down_proj,95.18001,0.10000,10.959
|
394 |
+
56,self_attn.k_proj,3.51710,0.10000,1.301
|
395 |
+
56,self_attn.v_proj,8.92079,0.10000,1.299
|
396 |
+
56,self_attn.q_proj,15.57063,0.10000,1.439
|
397 |
+
56,self_attn.o_proj,18.93305,0.10000,1.445
|
398 |
+
56,mlp.up_proj,137.50682,0.10000,2.292
|
399 |
+
56,mlp.gate_proj,133.82163,0.10000,2.294
|
400 |
+
56,mlp.down_proj,104.22985,0.10000,10.929
|
401 |
+
57,self_attn.k_proj,3.39507,0.10000,1.297
|
402 |
+
57,self_attn.v_proj,9.25974,0.10000,1.283
|
403 |
+
57,self_attn.q_proj,16.10579,0.10000,1.430
|
404 |
+
57,self_attn.o_proj,14.65038,0.10000,1.442
|
405 |
+
57,mlp.up_proj,146.27147,0.10000,2.271
|
406 |
+
57,mlp.gate_proj,140.22373,0.10000,2.270
|
407 |
+
57,mlp.down_proj,114.18199,0.10000,11.026
|
408 |
+
58,self_attn.k_proj,3.59921,0.10000,1.293
|
409 |
+
58,self_attn.v_proj,10.66743,0.10000,1.291
|
410 |
+
58,self_attn.q_proj,15.81635,0.10000,1.438
|
411 |
+
58,self_attn.o_proj,15.86379,0.10000,1.510
|
412 |
+
58,mlp.up_proj,158.08565,0.10000,2.279
|
413 |
+
58,mlp.gate_proj,149.25287,0.10000,2.345
|
414 |
+
58,mlp.down_proj,137.70841,0.10000,10.935
|
415 |
+
59,self_attn.k_proj,3.67538,0.10000,1.294
|
416 |
+
59,self_attn.v_proj,14.46169,0.10000,1.289
|
417 |
+
59,self_attn.q_proj,18.15292,0.10000,1.437
|
418 |
+
59,self_attn.o_proj,37.87932,0.10000,1.452
|
419 |
+
59,mlp.up_proj,175.38797,0.10000,2.328
|
420 |
+
59,mlp.gate_proj,162.44526,0.10000,2.331
|
421 |
+
59,mlp.down_proj,179.52118,0.10000,10.903
|
422 |
+
60,self_attn.k_proj,3.14580,0.10000,1.292
|
423 |
+
60,self_attn.v_proj,15.64041,0.10000,1.286
|
424 |
+
60,self_attn.q_proj,17.17400,0.10000,1.446
|
425 |
+
60,self_attn.o_proj,47.29679,0.10000,1.456
|
426 |
+
60,mlp.up_proj,191.12323,0.10000,2.287
|
427 |
+
60,mlp.gate_proj,173.92417,0.10000,2.289
|
428 |
+
60,mlp.down_proj,381.57719,0.10000,10.930
|
429 |
+
61,self_attn.k_proj,3.22560,0.10000,1.294
|
430 |
+
61,self_attn.v_proj,19.75692,0.10000,1.285
|
431 |
+
61,self_attn.q_proj,17.35482,0.10000,1.435
|
432 |
+
61,self_attn.o_proj,57.58032,0.10000,1.433
|
433 |
+
61,mlp.up_proj,205.42150,0.10000,2.284
|
434 |
+
61,mlp.gate_proj,187.30222,0.10000,2.273
|
435 |
+
61,mlp.down_proj,321.43789,0.10000,10.971
|
436 |
+
62,self_attn.k_proj,3.10699,0.10000,1.294
|
437 |
+
62,self_attn.v_proj,22.59203,0.10000,1.292
|
438 |
+
62,self_attn.q_proj,17.16475,0.10000,1.436
|
439 |
+
62,self_attn.o_proj,82.21398,0.10000,1.428
|
440 |
+
62,mlp.up_proj,205.27777,0.10000,2.263
|
441 |
+
62,mlp.gate_proj,191.64138,0.10000,2.267
|
442 |
+
62,mlp.down_proj,544.66563,0.10000,10.971
|
443 |
+
63,self_attn.k_proj,2.77712,0.10000,1.279
|
444 |
+
63,self_attn.v_proj,14.33761,0.10000,1.287
|
445 |
+
63,self_attn.q_proj,12.67621,0.10000,1.430
|
446 |
+
63,self_attn.o_proj,35.30698,0.10000,1.494
|
447 |
+
63,mlp.up_proj,221.10228,0.10000,2.296
|
448 |
+
63,mlp.gate_proj,213.36907,0.10000,2.307
|
449 |
+
63,mlp.down_proj,877.11320,0.10000,10.990
|
quantize_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bits": 4,
|
3 |
+
"dynamic": null,
|
4 |
+
"group_size": 32,
|
5 |
+
"desc_act": true,
|
6 |
+
"sym": true,
|
7 |
+
"lm_head": false,
|
8 |
+
"quant_method": "gptq",
|
9 |
+
"checkpoint_format": "gptq",
|
10 |
+
"meta": {
|
11 |
+
"quantizer": [
|
12 |
+
"gptqmodel:1.7.3"
|
13 |
+
],
|
14 |
+
"uri": "https://github.com/modelcloud/gptqmodel",
|
15 |
+
"damp_percent": 0.1,
|
16 |
+
"damp_auto_increment": 0.0015,
|
17 |
+
"static_groups": false,
|
18 |
+
"true_sequential": true,
|
19 |
+
"mse": 0.0
|
20 |
+
}
|
21 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|begin▁of▁sentence|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|end▁of▁sentence|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<|end▁of▁sentence|>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
}
|
23 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df4e7ca41f3f7f64a5b6945b3bf69d8b620334fdde07a1e8932f522775798602
|
3 |
+
size 11422185
|
tokenizer_config.json
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"add_prefix_space": null,
|
5 |
+
"added_tokens_decoder": {
|
6 |
+
"151643": {
|
7 |
+
"content": "<|end▁of▁sentence|>",
|
8 |
+
"lstrip": false,
|
9 |
+
"normalized": false,
|
10 |
+
"rstrip": false,
|
11 |
+
"single_word": false,
|
12 |
+
"special": true
|
13 |
+
},
|
14 |
+
"151644": {
|
15 |
+
"content": "<|User|>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": false,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false,
|
20 |
+
"special": false
|
21 |
+
},
|
22 |
+
"151645": {
|
23 |
+
"content": "<|Assistant|>",
|
24 |
+
"lstrip": false,
|
25 |
+
"normalized": false,
|
26 |
+
"rstrip": false,
|
27 |
+
"single_word": false,
|
28 |
+
"special": false
|
29 |
+
},
|
30 |
+
"151646": {
|
31 |
+
"content": "<|begin▁of▁sentence|>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false,
|
36 |
+
"special": true
|
37 |
+
},
|
38 |
+
"151647": {
|
39 |
+
"content": "<|EOT|>",
|
40 |
+
"lstrip": false,
|
41 |
+
"normalized": false,
|
42 |
+
"rstrip": false,
|
43 |
+
"single_word": false,
|
44 |
+
"special": false
|
45 |
+
},
|
46 |
+
"151648": {
|
47 |
+
"content": "<think>",
|
48 |
+
"lstrip": false,
|
49 |
+
"normalized": false,
|
50 |
+
"rstrip": false,
|
51 |
+
"single_word": false,
|
52 |
+
"special": false
|
53 |
+
},
|
54 |
+
"151649": {
|
55 |
+
"content": "</think>",
|
56 |
+
"lstrip": false,
|
57 |
+
"normalized": false,
|
58 |
+
"rstrip": false,
|
59 |
+
"single_word": false,
|
60 |
+
"special": false
|
61 |
+
},
|
62 |
+
"151650": {
|
63 |
+
"content": "<|quad_start|>",
|
64 |
+
"lstrip": false,
|
65 |
+
"normalized": false,
|
66 |
+
"rstrip": false,
|
67 |
+
"single_word": false,
|
68 |
+
"special": true
|
69 |
+
},
|
70 |
+
"151651": {
|
71 |
+
"content": "<|quad_end|>",
|
72 |
+
"lstrip": false,
|
73 |
+
"normalized": false,
|
74 |
+
"rstrip": false,
|
75 |
+
"single_word": false,
|
76 |
+
"special": true
|
77 |
+
},
|
78 |
+
"151652": {
|
79 |
+
"content": "<|vision_start|>",
|
80 |
+
"lstrip": false,
|
81 |
+
"normalized": false,
|
82 |
+
"rstrip": false,
|
83 |
+
"single_word": false,
|
84 |
+
"special": true
|
85 |
+
},
|
86 |
+
"151653": {
|
87 |
+
"content": "<|vision_end|>",
|
88 |
+
"lstrip": false,
|
89 |
+
"normalized": false,
|
90 |
+
"rstrip": false,
|
91 |
+
"single_word": false,
|
92 |
+
"special": true
|
93 |
+
},
|
94 |
+
"151654": {
|
95 |
+
"content": "<|vision_pad|>",
|
96 |
+
"lstrip": false,
|
97 |
+
"normalized": false,
|
98 |
+
"rstrip": false,
|
99 |
+
"single_word": false,
|
100 |
+
"special": true
|
101 |
+
},
|
102 |
+
"151655": {
|
103 |
+
"content": "<|image_pad|>",
|
104 |
+
"lstrip": false,
|
105 |
+
"normalized": false,
|
106 |
+
"rstrip": false,
|
107 |
+
"single_word": false,
|
108 |
+
"special": true
|
109 |
+
},
|
110 |
+
"151656": {
|
111 |
+
"content": "<|video_pad|>",
|
112 |
+
"lstrip": false,
|
113 |
+
"normalized": false,
|
114 |
+
"rstrip": false,
|
115 |
+
"single_word": false,
|
116 |
+
"special": true
|
117 |
+
},
|
118 |
+
"151657": {
|
119 |
+
"content": "<tool_call>",
|
120 |
+
"lstrip": false,
|
121 |
+
"normalized": false,
|
122 |
+
"rstrip": false,
|
123 |
+
"single_word": false,
|
124 |
+
"special": false
|
125 |
+
},
|
126 |
+
"151658": {
|
127 |
+
"content": "</tool_call>",
|
128 |
+
"lstrip": false,
|
129 |
+
"normalized": false,
|
130 |
+
"rstrip": false,
|
131 |
+
"single_word": false,
|
132 |
+
"special": false
|
133 |
+
},
|
134 |
+
"151659": {
|
135 |
+
"content": "<|fim_prefix|>",
|
136 |
+
"lstrip": false,
|
137 |
+
"normalized": false,
|
138 |
+
"rstrip": false,
|
139 |
+
"single_word": false,
|
140 |
+
"special": false
|
141 |
+
},
|
142 |
+
"151660": {
|
143 |
+
"content": "<|fim_middle|>",
|
144 |
+
"lstrip": false,
|
145 |
+
"normalized": false,
|
146 |
+
"rstrip": false,
|
147 |
+
"single_word": false,
|
148 |
+
"special": false
|
149 |
+
},
|
150 |
+
"151661": {
|
151 |
+
"content": "<|fim_suffix|>",
|
152 |
+
"lstrip": false,
|
153 |
+
"normalized": false,
|
154 |
+
"rstrip": false,
|
155 |
+
"single_word": false,
|
156 |
+
"special": false
|
157 |
+
},
|
158 |
+
"151662": {
|
159 |
+
"content": "<|fim_pad|>",
|
160 |
+
"lstrip": false,
|
161 |
+
"normalized": false,
|
162 |
+
"rstrip": false,
|
163 |
+
"single_word": false,
|
164 |
+
"special": false
|
165 |
+
},
|
166 |
+
"151663": {
|
167 |
+
"content": "<|repo_name|>",
|
168 |
+
"lstrip": false,
|
169 |
+
"normalized": false,
|
170 |
+
"rstrip": false,
|
171 |
+
"single_word": false,
|
172 |
+
"special": false
|
173 |
+
},
|
174 |
+
"151664": {
|
175 |
+
"content": "<|file_sep|>",
|
176 |
+
"lstrip": false,
|
177 |
+
"normalized": false,
|
178 |
+
"rstrip": false,
|
179 |
+
"single_word": false,
|
180 |
+
"special": false
|
181 |
+
}
|
182 |
+
},
|
183 |
+
"bos_token": "<|begin▁of▁sentence|>",
|
184 |
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin���>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
185 |
+
"clean_up_tokenization_spaces": false,
|
186 |
+
"eos_token": "<|end▁of▁sentence|>",
|
187 |
+
"extra_special_tokens": {},
|
188 |
+
"legacy": true,
|
189 |
+
"model_max_length": 16384,
|
190 |
+
"pad_token": "<|end▁of▁sentence|>",
|
191 |
+
"sp_model_kwargs": {},
|
192 |
+
"tokenizer_class": "LlamaTokenizer",
|
193 |
+
"unk_token": null,
|
194 |
+
"use_default_system_prompt": false
|
195 |
+
}
|