Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- config.json +60 -0
- generation_config.json +10 -0
- model-00001-of-00042.safetensors +3 -0
- model-00002-of-00042.safetensors +3 -0
- model-00003-of-00042.safetensors +3 -0
- model-00004-of-00042.safetensors +3 -0
- model-00005-of-00042.safetensors +3 -0
- model-00006-of-00042.safetensors +3 -0
- model-00007-of-00042.safetensors +3 -0
- model-00008-of-00042.safetensors +3 -0
- model-00009-of-00042.safetensors +3 -0
- model-00010-of-00042.safetensors +3 -0
- model-00011-of-00042.safetensors +3 -0
- model-00012-of-00042.safetensors +3 -0
- model-00013-of-00042.safetensors +3 -0
- model-00014-of-00042.safetensors +3 -0
- model-00015-of-00042.safetensors +3 -0
- model-00016-of-00042.safetensors +3 -0
- model-00017-of-00042.safetensors +3 -0
- model-00018-of-00042.safetensors +3 -0
- model-00019-of-00042.safetensors +3 -0
- model-00020-of-00042.safetensors +3 -0
- model-00021-of-00042.safetensors +3 -0
- model-00022-of-00042.safetensors +3 -0
- model-00023-of-00042.safetensors +3 -0
- model-00024-of-00042.safetensors +3 -0
- model-00025-of-00042.safetensors +3 -0
- model-00026-of-00042.safetensors +3 -0
- model-00027-of-00042.safetensors +3 -0
- model-00028-of-00042.safetensors +3 -0
- model-00029-of-00042.safetensors +3 -0
- model-00030-of-00042.safetensors +3 -0
- model-00031-of-00042.safetensors +3 -0
- model-00032-of-00042.safetensors +3 -0
- model-00033-of-00042.safetensors +3 -0
- model-00034-of-00042.safetensors +3 -0
- model-00035-of-00042.safetensors +3 -0
- model-00036-of-00042.safetensors +3 -0
- model-00037-of-00042.safetensors +3 -0
- model-00038-of-00042.safetensors +3 -0
- model-00039-of-00042.safetensors +3 -0
- model-00040-of-00042.safetensors +3 -0
- model-00041-of-00042.safetensors +3 -0
- model-00042-of-00042.safetensors +3 -0
- model.safetensors.index.json +0 -0
- quant_log.csv +561 -0
- quantize_config.json +21 -0
- special_tokens_map.json +17 -0
- tokenizer.json +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
config.json
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"LlamaForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_bias": false,
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 128000,
|
8 |
+
"eos_token_id": [
|
9 |
+
128001,
|
10 |
+
128008,
|
11 |
+
128009
|
12 |
+
],
|
13 |
+
"head_dim": 128,
|
14 |
+
"hidden_act": "silu",
|
15 |
+
"hidden_size": 8192,
|
16 |
+
"initializer_range": 0.02,
|
17 |
+
"intermediate_size": 28672,
|
18 |
+
"max_position_embeddings": 131072,
|
19 |
+
"mlp_bias": false,
|
20 |
+
"model_type": "llama",
|
21 |
+
"num_attention_heads": 64,
|
22 |
+
"num_hidden_layers": 80,
|
23 |
+
"num_key_value_heads": 8,
|
24 |
+
"pretraining_tp": 1,
|
25 |
+
"quantization_config": {
|
26 |
+
"bits": 8,
|
27 |
+
"checkpoint_format": "gptq",
|
28 |
+
"desc_act": false,
|
29 |
+
"group_size": 64,
|
30 |
+
"lm_head": false,
|
31 |
+
"meta": {
|
32 |
+
"damp_auto_increment": 0.0025,
|
33 |
+
"damp_percent": 0.01,
|
34 |
+
"mse": 0.0,
|
35 |
+
"quantizer": [
|
36 |
+
"gptqmodel:2.3.0-dev"
|
37 |
+
],
|
38 |
+
"static_groups": false,
|
39 |
+
"true_sequential": true,
|
40 |
+
"uri": "https://github.com/modelcloud/gptqmodel"
|
41 |
+
},
|
42 |
+
"pack_dtype": "int32",
|
43 |
+
"quant_method": "gptq",
|
44 |
+
"sym": true
|
45 |
+
},
|
46 |
+
"rms_norm_eps": 1e-05,
|
47 |
+
"rope_scaling": {
|
48 |
+
"factor": 8.0,
|
49 |
+
"high_freq_factor": 4.0,
|
50 |
+
"low_freq_factor": 1.0,
|
51 |
+
"original_max_position_embeddings": 8192,
|
52 |
+
"rope_type": "llama3"
|
53 |
+
},
|
54 |
+
"rope_theta": 500000.0,
|
55 |
+
"tie_word_embeddings": false,
|
56 |
+
"torch_dtype": "float16",
|
57 |
+
"transformers_version": "4.51.0",
|
58 |
+
"use_cache": true,
|
59 |
+
"vocab_size": 128256
|
60 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 128000,
|
4 |
+
"eos_token_id": [
|
5 |
+
128001,
|
6 |
+
128008,
|
7 |
+
128009
|
8 |
+
],
|
9 |
+
"transformers_version": "4.51.0"
|
10 |
+
}
|
model-00001-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5658746570d1e963ac2479d83b7e7c7106f0e6cd2cb26ca991ffca4aebdcd331
|
3 |
+
size 2101346440
|
model-00002-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc84d2046b3f4734cd3ea9eeeb16bc12156ceee0b69fac8ab748a6f7f82d055d
|
3 |
+
size 1950392712
|
model-00003-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a0c9fd2bd04441088653b871c7dc84490f7e6bf924ae9de842c65014f4e871c
|
3 |
+
size 1792187000
|
model-00004-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b8ad25ba1ed43f66612fd2a4be537faff99bc53cb3f9ebde5f60e2c7c4914db
|
3 |
+
size 1792187000
|
model-00005-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42988a731ae7ffa2bc7112cc5c53b884ddd31bc24dfaa2d5dd0fda41226095c3
|
3 |
+
size 1792187000
|
model-00006-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c62a85bf6ec0928096ae722cc20e99d6a7d5e67895a13761196804860fe54970
|
3 |
+
size 1792186976
|
model-00007-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2cc392cb43da7d67780085cc124fff8148e96d1751420af534747bbe36e498f
|
3 |
+
size 1792187064
|
model-00008-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e62b2f744d2d78009ece528c8c5b56681c8588f1c9f5fe6092e806eb11c56f1
|
3 |
+
size 1792187064
|
model-00009-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:30050354c791655881a40e6d9866db4c63b0ae10208348fda308ee198f1238a6
|
3 |
+
size 1792187064
|
model-00010-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b13fed693c2f445afdf307b4d823cf5d4d81044ba18ea62560d65852d7e95aaa
|
3 |
+
size 1792187064
|
model-00011-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:702be1f0dbcc98a7b17793e05a948447c005a6fca9cfc7253316520ce27b8095
|
3 |
+
size 1792187064
|
model-00012-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:97c062ec3402178fe0ec8c2728b8424952706e7f06ea543fa0a7a9da80800ddf
|
3 |
+
size 1792187064
|
model-00013-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f141bdbd8d439661e925647f7f5f6dc69c3e2906fd09f5f0d6c7eaea4e3881cc
|
3 |
+
size 1792187064
|
model-00014-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f9050cd231af2b832935c44374ea0bc041053283bc8dc1f401b223c83bc7582
|
3 |
+
size 1792187064
|
model-00015-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e81b3ee6506bd2d880d6825864dbe5e69bcabab77aa6249e83ff009940babed3
|
3 |
+
size 1792187064
|
model-00016-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16a7f00e8e3b8c40b7ef77643d9f0649ccaabed3d1c6c840d7acbb59e74b9cdb
|
3 |
+
size 1792187064
|
model-00017-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68a81d8089794b70758976beb1dd9e5ff8af6c05ef8d087cae08e9e282184164
|
3 |
+
size 1792187064
|
model-00018-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f5fb720a763cf9100270a3ac6a5068f1d27e97af27ff110f50c6ea67fff924c9
|
3 |
+
size 1792187064
|
model-00019-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe747532479e49e28575e704b2845fcae2c86a4c6cc0d5e07a0c07b4d3efbb2d
|
3 |
+
size 1792187064
|
model-00020-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d80a217d80c553018fba13dd363fe074ebae967af2228830568e3c6f075b9427
|
3 |
+
size 1792187064
|
model-00021-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6b844def6030b7036805d46b53a15e17178bc867b17b0991217c7ba8b682974
|
3 |
+
size 1792187064
|
model-00022-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6a65365f7e6145022730f5aafde19b476717f9942e7942cb14db8900a0ce1d4
|
3 |
+
size 1792187064
|
model-00023-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14534f5943683674e1066926490b5d2303fb85a0a31032cf758f19b4a1931679
|
3 |
+
size 1792187064
|
model-00024-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62e2ecf7cccb9829f6ad20e5cebdb7ed3a48ce2857490bf028dbdb708f5844de
|
3 |
+
size 1792187064
|
model-00025-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd6179776d1ac79d2f3e8aa9b83a08d5925ed64756b582f440ba501c53546c61
|
3 |
+
size 1792187064
|
model-00026-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:897d3c05980f6a2a9a37b024301b62767cc879c469fa6b20efdb5b2c3143ffbe
|
3 |
+
size 1792187064
|
model-00027-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8b0bcf7f33c9df139b268ebec6479892a287043692189e90419eb70735ea08b
|
3 |
+
size 1792187064
|
model-00028-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2aa962bb58a978b1ea2099d4715679e708504553381419c0b3fbdaecf6b8c9d
|
3 |
+
size 1792187064
|
model-00029-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a6e67ee247e8d2656612955ba6ef92878b4c7e3efa680d4da1004e3de4640276
|
3 |
+
size 1792187064
|
model-00030-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10cce36a3999d235826f1951963c88fdce02f2ac3b4f70c24360194deb907f8c
|
3 |
+
size 1792187064
|
model-00031-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6841c81e90aa925b4d96f73433d9a18ab1575f4705a81877edd3919786551761
|
3 |
+
size 1792187064
|
model-00032-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fdc97c2009c4a25e3447d28a3090cf714bad090c449d6c8fa2a4ae35fa3ce98
|
3 |
+
size 1792187064
|
model-00033-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4119f96f17e6d562d6e6ab521b36af688887750e1ad20a1c2ec772adaae595ed
|
3 |
+
size 1792187064
|
model-00034-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:018fcad36a0e6760c7a097f16298c9318f175bf2636b73d4aad95a071a41ff9e
|
3 |
+
size 1792187064
|
model-00035-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:229d527216b85a824ce39d47bf7f8b460393643f01d84466d2ebc01b174af2d6
|
3 |
+
size 1792187064
|
model-00036-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ec500937fd2a5181e55a8d91077d66d71043319fbd3316a6d01d002eb7994a3
|
3 |
+
size 1792187064
|
model-00037-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c93c035588681576a904c5cd5126dba54796aead536e3fff5f5448e629b3886e
|
3 |
+
size 1792187064
|
model-00038-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:14806702518b2b6254a60ac505e0ee763fe6fae6c787333bd9d3db4c26f2d301
|
3 |
+
size 1792187064
|
model-00039-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee2fc3acee82b9b94907c3a3244453ad8afed2f0a3ac5c020fcf1f602d9f0b6a
|
3 |
+
size 1792187064
|
model-00040-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2496b9e0a1e51ab57b1f5ee78c522a35e913077edefeb6beb295f06d3b32fc86
|
3 |
+
size 1792187064
|
model-00041-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b738dfcc403cf12f55ecf283c9fc6cfd4d7bf2dd057b3670d00cb0bfadd0e38f
|
3 |
+
size 2101346432
|
model-00042-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bd79b6fa05097208ce92ea26b0347c5de86d994d2c99726c2b42246a75e81a0
|
3 |
+
size 1633997792
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
quant_log.csv
ADDED
@@ -0,0 +1,561 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
layer,module,loss,samples,damp,time
|
2 |
+
0,self_attn.k_proj,0.00117016,0.01000,3.044
|
3 |
+
0,self_attn.v_proj,0.00083932,0.01000,2.320
|
4 |
+
0,self_attn.q_proj,0.00348741,0.01000,2.274
|
5 |
+
0,self_attn.o_proj,0.00000004,0.01000,2.433
|
6 |
+
0,mlp.up_proj,0.00398841,0.01000,2.557
|
7 |
+
0,mlp.gate_proj,0.00399996,0.01000,2.605
|
8 |
+
0,mlp.down_proj,0.00000087,0.01000,14.234
|
9 |
+
1,self_attn.k_proj,0.00034051,0.01000,2.149
|
10 |
+
1,self_attn.v_proj,0.00080600,0.01000,2.172
|
11 |
+
1,self_attn.q_proj,0.00102862,0.01000,2.330
|
12 |
+
1,self_attn.o_proj,0.00000026,0.01000,2.368
|
13 |
+
1,mlp.up_proj,0.00015353,0.01000,2.613
|
14 |
+
1,mlp.gate_proj,0.00016610,0.01000,2.605
|
15 |
+
1,mlp.down_proj,0.00000039,0.01000,14.415
|
16 |
+
2,self_attn.k_proj,0.00006711,0.01000,2.129
|
17 |
+
2,self_attn.v_proj,0.00003058,0.01000,2.511
|
18 |
+
2,self_attn.q_proj,0.00029320,0.01000,2.476
|
19 |
+
2,self_attn.o_proj,0.00000039,0.01000,2.351
|
20 |
+
2,mlp.up_proj,0.00049619,0.01000,2.547
|
21 |
+
2,mlp.gate_proj,0.00049583,0.01000,2.558
|
22 |
+
2,mlp.down_proj,0.00000069,0.01000,14.316
|
23 |
+
3,self_attn.k_proj,0.00086632,0.01000,2.249
|
24 |
+
3,self_attn.v_proj,0.00043339,0.01000,2.236
|
25 |
+
3,self_attn.q_proj,0.00241460,0.01000,2.266
|
26 |
+
3,self_attn.o_proj,0.00000035,0.01000,2.336
|
27 |
+
3,mlp.up_proj,0.00149223,0.01000,2.769
|
28 |
+
3,mlp.gate_proj,0.00160987,0.01000,2.844
|
29 |
+
3,mlp.down_proj,0.00001598,0.01000,14.606
|
30 |
+
4,self_attn.k_proj,0.00015258,0.01000,2.121
|
31 |
+
4,self_attn.v_proj,0.00003290,0.01000,2.126
|
32 |
+
4,self_attn.q_proj,0.00044938,0.01000,2.257
|
33 |
+
4,self_attn.o_proj,0.00000027,0.01000,2.351
|
34 |
+
4,mlp.up_proj,0.00095811,0.01000,2.547
|
35 |
+
4,mlp.gate_proj,0.00098845,0.01000,2.547
|
36 |
+
4,mlp.down_proj,0.00000120,0.01000,14.399
|
37 |
+
5,self_attn.k_proj,0.00059744,0.01000,2.210
|
38 |
+
5,self_attn.v_proj,0.00027041,0.01000,2.305
|
39 |
+
5,self_attn.q_proj,0.00084680,0.01000,2.465
|
40 |
+
5,self_attn.o_proj,0.00000034,0.01000,2.245
|
41 |
+
5,mlp.up_proj,0.01062924,0.01000,2.603
|
42 |
+
5,mlp.gate_proj,0.01088028,0.01000,2.564
|
43 |
+
5,mlp.down_proj,0.00000169,0.01000,14.509
|
44 |
+
6,self_attn.k_proj,0.00035254,0.01000,2.238
|
45 |
+
6,self_attn.v_proj,0.00005993,0.01000,2.148
|
46 |
+
6,self_attn.q_proj,0.00101282,0.01000,2.252
|
47 |
+
6,self_attn.o_proj,0.00000050,0.01000,2.241
|
48 |
+
6,mlp.up_proj,0.00067450,0.01000,2.609
|
49 |
+
6,mlp.gate_proj,0.00070612,0.01000,2.606
|
50 |
+
6,mlp.down_proj,0.00000217,0.01000,14.286
|
51 |
+
7,self_attn.k_proj,0.00041241,0.01000,2.138
|
52 |
+
7,self_attn.v_proj,0.00008458,0.01000,2.151
|
53 |
+
7,self_attn.q_proj,0.00121471,0.01000,2.259
|
54 |
+
7,self_attn.o_proj,0.00000038,0.01000,2.285
|
55 |
+
7,mlp.up_proj,0.00062014,0.01000,2.659
|
56 |
+
7,mlp.gate_proj,0.00065432,0.01000,2.592
|
57 |
+
7,mlp.down_proj,0.00000287,0.01000,14.399
|
58 |
+
8,self_attn.k_proj,0.00025489,0.01000,2.245
|
59 |
+
8,self_attn.v_proj,0.00004794,0.01000,2.226
|
60 |
+
8,self_attn.q_proj,0.00071945,0.01000,2.259
|
61 |
+
8,self_attn.o_proj,0.00000078,0.01000,2.360
|
62 |
+
8,mlp.up_proj,0.00146766,0.01000,2.582
|
63 |
+
8,mlp.gate_proj,0.00156340,0.01000,2.582
|
64 |
+
8,mlp.down_proj,0.00000328,0.01000,14.577
|
65 |
+
9,self_attn.k_proj,0.00022181,0.01000,2.208
|
66 |
+
9,self_attn.v_proj,0.00004364,0.01000,2.155
|
67 |
+
9,self_attn.q_proj,0.00063667,0.01000,2.246
|
68 |
+
9,self_attn.o_proj,0.00000120,0.01000,2.336
|
69 |
+
9,mlp.up_proj,0.00960972,0.01000,2.528
|
70 |
+
9,mlp.gate_proj,0.01006194,0.01000,2.616
|
71 |
+
9,mlp.down_proj,0.00000441,0.01000,15.204
|
72 |
+
10,self_attn.k_proj,0.00062121,0.01000,2.203
|
73 |
+
10,self_attn.v_proj,0.00009192,0.01000,2.327
|
74 |
+
10,self_attn.q_proj,0.00175347,0.01000,2.345
|
75 |
+
10,self_attn.o_proj,0.00000146,0.01000,2.213
|
76 |
+
10,mlp.up_proj,0.00093580,0.01000,2.578
|
77 |
+
10,mlp.gate_proj,0.00100192,0.01000,2.644
|
78 |
+
10,mlp.down_proj,0.00000710,0.01000,14.963
|
79 |
+
11,self_attn.k_proj,0.00075064,0.01000,2.157
|
80 |
+
11,self_attn.v_proj,0.00009294,0.01000,2.248
|
81 |
+
11,self_attn.q_proj,0.00145335,0.01000,2.250
|
82 |
+
11,self_attn.o_proj,0.00000151,0.01000,2.369
|
83 |
+
11,mlp.up_proj,0.00169675,0.01000,2.564
|
84 |
+
11,mlp.gate_proj,0.00182295,0.01000,2.606
|
85 |
+
11,mlp.down_proj,0.00000883,0.01000,14.216
|
86 |
+
12,self_attn.k_proj,0.00040942,0.01000,2.222
|
87 |
+
12,self_attn.v_proj,0.00007576,0.01000,2.146
|
88 |
+
12,self_attn.q_proj,0.00115182,0.01000,2.234
|
89 |
+
12,self_attn.o_proj,0.00000166,0.01000,2.215
|
90 |
+
12,mlp.up_proj,0.00112496,0.01000,2.519
|
91 |
+
12,mlp.gate_proj,0.00120508,0.01000,2.550
|
92 |
+
12,mlp.down_proj,0.00000987,0.01000,14.331
|
93 |
+
13,self_attn.k_proj,0.00056519,0.01000,2.230
|
94 |
+
13,self_attn.v_proj,0.00007572,0.01000,2.550
|
95 |
+
13,self_attn.q_proj,0.00150745,0.01000,2.486
|
96 |
+
13,self_attn.o_proj,0.00000563,0.01000,2.251
|
97 |
+
13,mlp.up_proj,0.00101434,0.01000,2.600
|
98 |
+
13,mlp.gate_proj,0.00107714,0.01000,2.535
|
99 |
+
13,mlp.down_proj,0.00001250,0.01000,15.180
|
100 |
+
14,self_attn.k_proj,0.00070874,0.01000,2.258
|
101 |
+
14,self_attn.v_proj,0.00009899,0.01000,2.234
|
102 |
+
14,self_attn.q_proj,0.00209413,0.01000,2.255
|
103 |
+
14,self_attn.o_proj,0.00000751,0.01000,2.240
|
104 |
+
14,mlp.up_proj,0.00132133,0.01000,2.592
|
105 |
+
14,mlp.gate_proj,0.00141659,0.01000,2.535
|
106 |
+
14,mlp.down_proj,0.00001739,0.01000,14.213
|
107 |
+
15,self_attn.k_proj,0.00060860,0.01000,2.249
|
108 |
+
15,self_attn.v_proj,0.00009004,0.01000,2.128
|
109 |
+
15,self_attn.q_proj,0.00170438,0.01000,2.204
|
110 |
+
15,self_attn.o_proj,0.00000787,0.01000,2.243
|
111 |
+
15,mlp.up_proj,0.00153280,0.01000,2.614
|
112 |
+
15,mlp.gate_proj,0.00167764,0.01000,2.563
|
113 |
+
15,mlp.down_proj,0.00002246,0.01000,14.346
|
114 |
+
16,self_attn.k_proj,0.00062847,0.01000,2.165
|
115 |
+
16,self_attn.v_proj,0.00009563,0.01000,2.498
|
116 |
+
16,self_attn.q_proj,0.00193428,0.01000,2.473
|
117 |
+
16,self_attn.o_proj,0.00000889,0.01000,2.491
|
118 |
+
16,mlp.up_proj,0.00195664,0.01000,2.547
|
119 |
+
16,mlp.gate_proj,0.00216027,0.01000,2.547
|
120 |
+
16,mlp.down_proj,0.00002996,0.01000,14.186
|
121 |
+
17,self_attn.k_proj,0.00103962,0.01000,2.376
|
122 |
+
17,self_attn.v_proj,0.00010982,0.01000,2.401
|
123 |
+
17,self_attn.q_proj,0.00282694,0.01000,2.497
|
124 |
+
17,self_attn.o_proj,0.00002287,0.01000,2.468
|
125 |
+
17,mlp.up_proj,0.00235450,0.01000,2.538
|
126 |
+
17,mlp.gate_proj,0.00259520,0.01000,2.544
|
127 |
+
17,mlp.down_proj,0.00004308,0.01000,14.371
|
128 |
+
18,self_attn.k_proj,0.00132764,0.01000,2.131
|
129 |
+
18,self_attn.v_proj,0.00014969,0.01000,2.151
|
130 |
+
18,self_attn.q_proj,0.00385427,0.01000,2.253
|
131 |
+
18,self_attn.o_proj,0.00002702,0.01000,2.352
|
132 |
+
18,mlp.up_proj,0.00300897,0.01000,2.521
|
133 |
+
18,mlp.gate_proj,0.00340771,0.01000,2.521
|
134 |
+
18,mlp.down_proj,0.00008709,0.01000,14.469
|
135 |
+
19,self_attn.k_proj,0.00119542,0.01000,2.260
|
136 |
+
19,self_attn.v_proj,0.00017003,0.01000,2.264
|
137 |
+
19,self_attn.q_proj,0.00331342,0.01000,2.557
|
138 |
+
19,self_attn.o_proj,0.00002724,0.01000,2.350
|
139 |
+
19,mlp.up_proj,0.00319600,0.01000,2.532
|
140 |
+
19,mlp.gate_proj,0.00373633,0.01000,2.536
|
141 |
+
19,mlp.down_proj,0.00007323,0.01000,14.570
|
142 |
+
20,self_attn.k_proj,0.00152185,0.01000,2.140
|
143 |
+
20,self_attn.v_proj,0.00017362,0.01000,2.241
|
144 |
+
20,self_attn.q_proj,0.00332100,0.01000,2.249
|
145 |
+
20,self_attn.o_proj,0.00001985,0.01000,2.212
|
146 |
+
20,mlp.up_proj,0.00369579,0.01000,2.885
|
147 |
+
20,mlp.gate_proj,0.00422741,0.01000,2.800
|
148 |
+
20,mlp.down_proj,0.00007474,0.01000,14.372
|
149 |
+
21,self_attn.k_proj,0.00230464,0.01000,2.405
|
150 |
+
21,self_attn.v_proj,0.00020898,0.01000,2.419
|
151 |
+
21,self_attn.q_proj,0.00533701,0.01000,2.465
|
152 |
+
21,self_attn.o_proj,0.00003033,0.01000,2.220
|
153 |
+
21,mlp.up_proj,0.00422490,0.01000,2.531
|
154 |
+
21,mlp.gate_proj,0.00486612,0.01000,2.550
|
155 |
+
21,mlp.down_proj,0.00008368,0.01000,14.292
|
156 |
+
22,self_attn.k_proj,0.00243283,0.01000,2.362
|
157 |
+
22,self_attn.v_proj,0.00031447,0.01000,2.417
|
158 |
+
22,self_attn.q_proj,0.00541335,0.01000,2.358
|
159 |
+
22,self_attn.o_proj,0.00001672,0.01000,2.349
|
160 |
+
22,mlp.up_proj,0.00465806,0.01000,2.864
|
161 |
+
22,mlp.gate_proj,0.00548650,0.01000,2.827
|
162 |
+
22,mlp.down_proj,0.00008461,0.01000,14.221
|
163 |
+
23,self_attn.k_proj,0.00275724,0.01000,2.235
|
164 |
+
23,self_attn.v_proj,0.00029652,0.01000,2.163
|
165 |
+
23,self_attn.q_proj,0.00582164,0.01000,2.318
|
166 |
+
23,self_attn.o_proj,0.00002369,0.01000,2.289
|
167 |
+
23,mlp.up_proj,0.00490270,0.01000,2.595
|
168 |
+
23,mlp.gate_proj,0.00581410,0.01000,2.898
|
169 |
+
23,mlp.down_proj,0.00009408,0.01000,14.451
|
170 |
+
24,self_attn.k_proj,0.00156649,0.01000,2.263
|
171 |
+
24,self_attn.v_proj,0.00017187,0.01000,2.217
|
172 |
+
24,self_attn.q_proj,0.00275502,0.01000,2.264
|
173 |
+
24,self_attn.o_proj,0.00001927,0.01000,2.321
|
174 |
+
24,mlp.up_proj,0.00517267,0.01000,2.543
|
175 |
+
24,mlp.gate_proj,0.00599205,0.01000,2.536
|
176 |
+
24,mlp.down_proj,0.00009361,0.01000,14.575
|
177 |
+
25,self_attn.k_proj,0.00277608,0.01000,2.226
|
178 |
+
25,self_attn.v_proj,0.00030894,0.01000,2.128
|
179 |
+
25,self_attn.q_proj,0.00531310,0.01000,2.229
|
180 |
+
25,self_attn.o_proj,0.00001787,0.01000,2.348
|
181 |
+
25,mlp.up_proj,0.00557528,0.01000,2.782
|
182 |
+
25,mlp.gate_proj,0.00646406,0.01000,2.593
|
183 |
+
25,mlp.down_proj,0.00009615,0.01000,14.278
|
184 |
+
26,self_attn.k_proj,0.00267533,0.01000,2.529
|
185 |
+
26,self_attn.v_proj,0.00030349,0.01000,2.416
|
186 |
+
26,self_attn.q_proj,0.00525413,0.01000,2.470
|
187 |
+
26,self_attn.o_proj,0.00002120,0.01000,2.468
|
188 |
+
26,mlp.up_proj,0.00566407,0.01000,2.543
|
189 |
+
26,mlp.gate_proj,0.00651728,0.01000,2.648
|
190 |
+
26,mlp.down_proj,0.00010098,0.01000,14.278
|
191 |
+
27,self_attn.k_proj,0.00276544,0.01000,2.249
|
192 |
+
27,self_attn.v_proj,0.00019987,0.01000,2.485
|
193 |
+
27,self_attn.q_proj,0.00541387,0.01000,2.236
|
194 |
+
27,self_attn.o_proj,0.00004007,0.01000,2.257
|
195 |
+
27,mlp.up_proj,0.00537560,0.01000,2.555
|
196 |
+
27,mlp.gate_proj,0.00606567,0.01000,2.554
|
197 |
+
27,mlp.down_proj,0.00011214,0.01000,14.367
|
198 |
+
28,self_attn.k_proj,0.00310899,0.01000,2.225
|
199 |
+
28,self_attn.v_proj,0.00026346,0.01000,2.219
|
200 |
+
28,self_attn.q_proj,0.00628278,0.01000,2.335
|
201 |
+
28,self_attn.o_proj,0.00003322,0.01000,2.319
|
202 |
+
28,mlp.up_proj,0.00547245,0.01000,2.536
|
203 |
+
28,mlp.gate_proj,0.00618226,0.01000,2.555
|
204 |
+
28,mlp.down_proj,0.00012137,0.01000,14.354
|
205 |
+
29,self_attn.k_proj,0.00326921,0.01000,2.150
|
206 |
+
29,self_attn.v_proj,0.00024837,0.01000,2.251
|
207 |
+
29,self_attn.q_proj,0.00631405,0.01000,2.300
|
208 |
+
29,self_attn.o_proj,0.00004234,0.01000,2.334
|
209 |
+
29,mlp.up_proj,0.00565995,0.01000,2.625
|
210 |
+
29,mlp.gate_proj,0.00638525,0.01000,2.603
|
211 |
+
29,mlp.down_proj,0.00012668,0.01000,14.598
|
212 |
+
30,self_attn.k_proj,0.00272176,0.01000,2.208
|
213 |
+
30,self_attn.v_proj,0.00022260,0.01000,2.235
|
214 |
+
30,self_attn.q_proj,0.00522464,0.01000,2.224
|
215 |
+
30,self_attn.o_proj,0.00004569,0.01000,2.340
|
216 |
+
30,mlp.up_proj,0.00552805,0.01000,2.601
|
217 |
+
30,mlp.gate_proj,0.00616731,0.01000,2.531
|
218 |
+
30,mlp.down_proj,0.00012557,0.01000,14.435
|
219 |
+
31,self_attn.k_proj,0.00349681,0.01000,2.143
|
220 |
+
31,self_attn.v_proj,0.00022708,0.01000,2.206
|
221 |
+
31,self_attn.q_proj,0.00763659,0.01000,2.244
|
222 |
+
31,self_attn.o_proj,0.00006857,0.01000,2.242
|
223 |
+
31,mlp.up_proj,0.00557746,0.01000,2.615
|
224 |
+
31,mlp.gate_proj,0.00609849,0.01000,2.588
|
225 |
+
31,mlp.down_proj,0.00013022,0.01000,14.201
|
226 |
+
32,self_attn.k_proj,0.00226732,0.01000,2.128
|
227 |
+
32,self_attn.v_proj,0.00024760,0.01000,2.523
|
228 |
+
32,self_attn.q_proj,0.00452715,0.01000,2.312
|
229 |
+
32,self_attn.o_proj,0.00003520,0.01000,2.320
|
230 |
+
32,mlp.up_proj,0.00558923,0.01000,2.597
|
231 |
+
32,mlp.gate_proj,0.00618265,0.01000,2.775
|
232 |
+
32,mlp.down_proj,0.00013658,0.01000,14.435
|
233 |
+
33,self_attn.k_proj,0.00350281,0.01000,2.535
|
234 |
+
33,self_attn.v_proj,0.00024202,0.01000,2.260
|
235 |
+
33,self_attn.q_proj,0.00755977,0.01000,2.521
|
236 |
+
33,self_attn.o_proj,0.00006247,0.01000,2.344
|
237 |
+
33,mlp.up_proj,0.16260509,0.01000,2.538
|
238 |
+
33,mlp.gate_proj,0.15796259,0.01000,2.873
|
239 |
+
33,mlp.down_proj,0.00014013,0.01000,14.322
|
240 |
+
34,self_attn.k_proj,0.00311725,0.01000,2.239
|
241 |
+
34,self_attn.v_proj,0.00024898,0.01000,2.210
|
242 |
+
34,self_attn.q_proj,0.00668666,0.01000,2.322
|
243 |
+
34,self_attn.o_proj,0.00006654,0.01000,2.246
|
244 |
+
34,mlp.up_proj,0.00556542,0.01000,2.745
|
245 |
+
34,mlp.gate_proj,0.00609831,0.01000,2.531
|
246 |
+
34,mlp.down_proj,0.00014447,0.01000,14.462
|
247 |
+
35,self_attn.k_proj,0.00359427,0.01000,2.152
|
248 |
+
35,self_attn.v_proj,0.00025139,0.01000,2.232
|
249 |
+
35,self_attn.q_proj,0.00730760,0.01000,2.254
|
250 |
+
35,self_attn.o_proj,0.00006642,0.01000,2.323
|
251 |
+
35,mlp.up_proj,0.00560310,0.01000,2.591
|
252 |
+
35,mlp.gate_proj,0.00618040,0.01000,2.593
|
253 |
+
35,mlp.down_proj,0.00014230,0.01000,14.348
|
254 |
+
36,self_attn.k_proj,0.00272656,0.01000,2.142
|
255 |
+
36,self_attn.v_proj,0.00022664,0.01000,2.140
|
256 |
+
36,self_attn.q_proj,0.00506092,0.01000,2.235
|
257 |
+
36,self_attn.o_proj,0.00003944,0.01000,2.267
|
258 |
+
36,mlp.up_proj,0.00556493,0.01000,2.512
|
259 |
+
36,mlp.gate_proj,0.00627050,0.01000,2.605
|
260 |
+
36,mlp.down_proj,0.00014777,0.01000,14.739
|
261 |
+
37,self_attn.k_proj,0.00350139,0.01000,2.218
|
262 |
+
37,self_attn.v_proj,0.00024101,0.01000,2.496
|
263 |
+
37,self_attn.q_proj,0.00652192,0.01000,2.461
|
264 |
+
37,self_attn.o_proj,0.00004713,0.01000,2.221
|
265 |
+
37,mlp.up_proj,0.00575240,0.01000,2.520
|
266 |
+
37,mlp.gate_proj,0.00647600,0.01000,2.536
|
267 |
+
37,mlp.down_proj,0.00014741,0.01000,14.349
|
268 |
+
38,self_attn.k_proj,0.00320943,0.01000,2.298
|
269 |
+
38,self_attn.v_proj,0.00026979,0.01000,2.567
|
270 |
+
38,self_attn.q_proj,0.00599274,0.01000,2.534
|
271 |
+
38,self_attn.o_proj,0.00004404,0.01000,2.277
|
272 |
+
38,mlp.up_proj,0.00590949,0.01000,2.542
|
273 |
+
38,mlp.gate_proj,0.00676073,0.01000,2.543
|
274 |
+
38,mlp.down_proj,0.00014726,0.01000,14.332
|
275 |
+
39,self_attn.k_proj,0.00337179,0.01000,2.200
|
276 |
+
39,self_attn.v_proj,0.00028973,0.01000,2.162
|
277 |
+
39,self_attn.q_proj,0.00590662,0.01000,2.337
|
278 |
+
39,self_attn.o_proj,0.00004030,0.01000,2.246
|
279 |
+
39,mlp.up_proj,0.00601658,0.01000,2.540
|
280 |
+
39,mlp.gate_proj,0.00695606,0.01000,2.540
|
281 |
+
39,mlp.down_proj,0.00015062,0.01000,14.299
|
282 |
+
40,self_attn.k_proj,0.00264664,0.01000,2.516
|
283 |
+
40,self_attn.v_proj,0.00033356,0.01000,2.257
|
284 |
+
40,self_attn.q_proj,0.00530649,0.01000,2.253
|
285 |
+
40,self_attn.o_proj,0.00002599,0.01000,2.350
|
286 |
+
40,mlp.up_proj,0.00619913,0.01000,2.520
|
287 |
+
40,mlp.gate_proj,0.00722285,0.01000,2.616
|
288 |
+
40,mlp.down_proj,0.00016387,0.01000,14.463
|
289 |
+
41,self_attn.k_proj,0.00339176,0.01000,2.228
|
290 |
+
41,self_attn.v_proj,0.00028905,0.01000,2.165
|
291 |
+
41,self_attn.q_proj,0.00583924,0.01000,2.278
|
292 |
+
41,self_attn.o_proj,0.00002880,0.01000,2.242
|
293 |
+
41,mlp.up_proj,0.00644423,0.01000,2.524
|
294 |
+
41,mlp.gate_proj,0.00750206,0.01000,2.546
|
295 |
+
41,mlp.down_proj,0.00015499,0.01000,14.304
|
296 |
+
42,self_attn.k_proj,0.00249285,0.01000,2.245
|
297 |
+
42,self_attn.v_proj,0.00024378,0.01000,2.224
|
298 |
+
42,self_attn.q_proj,0.00374034,0.01000,2.659
|
299 |
+
42,self_attn.o_proj,0.00002117,0.01000,2.240
|
300 |
+
42,mlp.up_proj,0.00664672,0.01000,2.537
|
301 |
+
42,mlp.gate_proj,0.00780422,0.01000,2.525
|
302 |
+
42,mlp.down_proj,0.00014769,0.01000,14.382
|
303 |
+
43,self_attn.k_proj,0.00260633,0.01000,2.141
|
304 |
+
43,self_attn.v_proj,0.00026766,0.01000,2.130
|
305 |
+
43,self_attn.q_proj,0.00408305,0.01000,2.572
|
306 |
+
43,self_attn.o_proj,0.00002182,0.01000,2.329
|
307 |
+
43,mlp.up_proj,0.00684404,0.01000,2.822
|
308 |
+
43,mlp.gate_proj,0.00809492,0.01000,2.784
|
309 |
+
43,mlp.down_proj,0.00016001,0.01000,14.271
|
310 |
+
44,self_attn.k_proj,0.00374258,0.01000,2.253
|
311 |
+
44,self_attn.v_proj,0.00040415,0.01000,2.178
|
312 |
+
44,self_attn.q_proj,0.00746921,0.01000,2.543
|
313 |
+
44,self_attn.o_proj,0.00002436,0.01000,2.347
|
314 |
+
44,mlp.up_proj,0.00705280,0.01000,2.603
|
315 |
+
44,mlp.gate_proj,0.00838935,0.01000,2.552
|
316 |
+
44,mlp.down_proj,0.00017917,0.01000,14.330
|
317 |
+
45,self_attn.k_proj,0.00259186,0.01000,2.132
|
318 |
+
45,self_attn.v_proj,0.00026303,0.01000,2.210
|
319 |
+
45,self_attn.q_proj,0.00413763,0.01000,2.554
|
320 |
+
45,self_attn.o_proj,0.00002852,0.01000,2.469
|
321 |
+
45,mlp.up_proj,0.00725334,0.01000,2.525
|
322 |
+
45,mlp.gate_proj,0.00863705,0.01000,2.544
|
323 |
+
45,mlp.down_proj,0.00017817,0.01000,14.453
|
324 |
+
46,self_attn.k_proj,0.00254326,0.01000,2.256
|
325 |
+
46,self_attn.v_proj,0.00014475,0.01000,2.151
|
326 |
+
46,self_attn.q_proj,0.00157745,0.01000,2.235
|
327 |
+
46,self_attn.o_proj,0.00001012,0.01000,2.353
|
328 |
+
46,mlp.up_proj,0.00743493,0.01000,2.587
|
329 |
+
46,mlp.gate_proj,0.00885557,0.01000,2.553
|
330 |
+
46,mlp.down_proj,0.00016830,0.01000,14.273
|
331 |
+
47,self_attn.k_proj,0.00299457,0.01000,2.139
|
332 |
+
47,self_attn.v_proj,0.00031027,0.01000,2.155
|
333 |
+
47,self_attn.q_proj,0.00418583,0.01000,2.257
|
334 |
+
47,self_attn.o_proj,0.00001946,0.01000,2.366
|
335 |
+
47,mlp.up_proj,0.00762452,0.01000,2.574
|
336 |
+
47,mlp.gate_proj,0.00908347,0.01000,2.530
|
337 |
+
47,mlp.down_proj,0.00018185,0.01000,14.303
|
338 |
+
48,self_attn.k_proj,0.00305288,0.01000,2.145
|
339 |
+
48,self_attn.v_proj,0.00032935,0.01000,2.153
|
340 |
+
48,self_attn.q_proj,0.00433153,0.01000,2.293
|
341 |
+
48,self_attn.o_proj,0.00001627,0.01000,2.237
|
342 |
+
48,mlp.up_proj,0.00781525,0.01000,2.608
|
343 |
+
48,mlp.gate_proj,0.00933722,0.01000,2.549
|
344 |
+
48,mlp.down_proj,0.00020314,0.01000,14.338
|
345 |
+
49,self_attn.k_proj,0.00313424,0.01000,2.134
|
346 |
+
49,self_attn.v_proj,0.00031533,0.01000,2.138
|
347 |
+
49,self_attn.q_proj,0.00436140,0.01000,2.357
|
348 |
+
49,self_attn.o_proj,0.00002037,0.01000,2.338
|
349 |
+
49,mlp.up_proj,0.00805284,0.01000,2.539
|
350 |
+
49,mlp.gate_proj,0.00963054,0.01000,2.830
|
351 |
+
49,mlp.down_proj,0.00020094,0.01000,14.320
|
352 |
+
50,self_attn.k_proj,0.00278837,0.01000,2.258
|
353 |
+
50,self_attn.v_proj,0.00027250,0.01000,2.165
|
354 |
+
50,self_attn.q_proj,0.00297215,0.01000,2.232
|
355 |
+
50,self_attn.o_proj,0.00001660,0.01000,2.330
|
356 |
+
50,mlp.up_proj,0.00824856,0.01000,2.536
|
357 |
+
50,mlp.gate_proj,0.00986769,0.01000,2.602
|
358 |
+
50,mlp.down_proj,0.00018972,0.01000,14.471
|
359 |
+
51,self_attn.k_proj,0.00294202,0.01000,2.117
|
360 |
+
51,self_attn.v_proj,0.00031639,0.01000,2.148
|
361 |
+
51,self_attn.q_proj,0.00395915,0.01000,2.310
|
362 |
+
51,self_attn.o_proj,0.00001946,0.01000,2.332
|
363 |
+
51,mlp.up_proj,0.00842321,0.01000,2.861
|
364 |
+
51,mlp.gate_proj,0.01007194,0.01000,2.701
|
365 |
+
51,mlp.down_proj,0.00020502,0.01000,14.477
|
366 |
+
52,self_attn.k_proj,0.00422941,0.01000,2.116
|
367 |
+
52,self_attn.v_proj,0.00042246,0.01000,2.141
|
368 |
+
52,self_attn.q_proj,0.00779809,0.01000,2.357
|
369 |
+
52,self_attn.o_proj,0.00004034,0.01000,2.314
|
370 |
+
52,mlp.up_proj,0.00860144,0.01000,2.514
|
371 |
+
52,mlp.gate_proj,0.01032276,0.01000,2.533
|
372 |
+
52,mlp.down_proj,0.00023085,0.01000,14.520
|
373 |
+
53,self_attn.k_proj,0.00370212,0.01000,2.126
|
374 |
+
53,self_attn.v_proj,0.00036148,0.01000,2.267
|
375 |
+
53,self_attn.q_proj,0.00486950,0.01000,2.246
|
376 |
+
53,self_attn.o_proj,0.00001780,0.01000,2.244
|
377 |
+
53,mlp.up_proj,0.00881044,0.01000,2.522
|
378 |
+
53,mlp.gate_proj,0.01056989,0.01000,2.586
|
379 |
+
53,mlp.down_proj,0.00022099,0.01000,14.307
|
380 |
+
54,self_attn.k_proj,0.00276770,0.01000,2.127
|
381 |
+
54,self_attn.v_proj,0.00024462,0.01000,2.157
|
382 |
+
54,self_attn.q_proj,0.00298669,0.01000,2.265
|
383 |
+
54,self_attn.o_proj,0.00002612,0.01000,2.197
|
384 |
+
54,mlp.up_proj,0.00896720,0.01000,2.513
|
385 |
+
54,mlp.gate_proj,0.01063462,0.01000,2.524
|
386 |
+
54,mlp.down_proj,0.00020870,0.01000,14.338
|
387 |
+
55,self_attn.k_proj,0.00236778,0.01000,2.114
|
388 |
+
55,self_attn.v_proj,0.00023843,0.01000,2.239
|
389 |
+
55,self_attn.q_proj,0.00318089,0.01000,2.238
|
390 |
+
55,self_attn.o_proj,0.00002542,0.01000,2.207
|
391 |
+
55,mlp.up_proj,0.00909690,0.01000,2.514
|
392 |
+
55,mlp.gate_proj,0.01082866,0.01000,2.519
|
393 |
+
55,mlp.down_proj,0.00022635,0.01000,14.289
|
394 |
+
56,self_attn.k_proj,0.00427612,0.01000,2.112
|
395 |
+
56,self_attn.v_proj,0.00053001,0.01000,2.113
|
396 |
+
56,self_attn.q_proj,0.00812575,0.01000,2.218
|
397 |
+
56,self_attn.o_proj,0.00004122,0.01000,2.211
|
398 |
+
56,mlp.up_proj,0.00927737,0.01000,2.513
|
399 |
+
56,mlp.gate_proj,0.01116376,0.01000,2.523
|
400 |
+
56,mlp.down_proj,0.00025352,0.01000,14.796
|
401 |
+
57,self_attn.k_proj,0.00353201,0.01000,2.387
|
402 |
+
57,self_attn.v_proj,0.00041382,0.01000,2.234
|
403 |
+
57,self_attn.q_proj,0.00498484,0.01000,2.242
|
404 |
+
57,self_attn.o_proj,0.00003430,0.01000,2.254
|
405 |
+
57,mlp.up_proj,0.00953939,0.01000,2.517
|
406 |
+
57,mlp.gate_proj,0.01148811,0.01000,2.516
|
407 |
+
57,mlp.down_proj,0.00025164,0.01000,14.227
|
408 |
+
58,self_attn.k_proj,0.00291640,0.01000,2.118
|
409 |
+
58,self_attn.v_proj,0.00034334,0.01000,2.162
|
410 |
+
58,self_attn.q_proj,0.00339125,0.01000,2.535
|
411 |
+
58,self_attn.o_proj,0.00002322,0.01000,2.473
|
412 |
+
58,mlp.up_proj,0.00973206,0.01000,2.751
|
413 |
+
58,mlp.gate_proj,0.01154304,0.01000,2.781
|
414 |
+
58,mlp.down_proj,0.00023655,0.01000,14.518
|
415 |
+
59,self_attn.k_proj,0.00287889,0.01000,2.109
|
416 |
+
59,self_attn.v_proj,0.00027770,0.01000,2.479
|
417 |
+
59,self_attn.q_proj,0.00299363,0.01000,2.496
|
418 |
+
59,self_attn.o_proj,0.00001713,0.01000,2.489
|
419 |
+
59,mlp.up_proj,0.01001918,0.01000,2.517
|
420 |
+
59,mlp.gate_proj,0.01211420,0.01000,2.520
|
421 |
+
59,mlp.down_proj,0.00026114,0.01000,15.326
|
422 |
+
60,self_attn.k_proj,0.00409228,0.01000,2.227
|
423 |
+
60,self_attn.v_proj,0.00061484,0.01000,2.151
|
424 |
+
60,self_attn.q_proj,0.00751846,0.01000,2.247
|
425 |
+
60,self_attn.o_proj,0.00003243,0.01000,2.233
|
426 |
+
60,mlp.up_proj,0.01027059,0.01000,2.515
|
427 |
+
60,mlp.gate_proj,0.01257516,0.01000,2.585
|
428 |
+
60,mlp.down_proj,0.00030225,0.01000,14.431
|
429 |
+
61,self_attn.k_proj,0.00328570,0.01000,2.142
|
430 |
+
61,self_attn.v_proj,0.00039241,0.01000,2.136
|
431 |
+
61,self_attn.q_proj,0.00363173,0.01000,2.238
|
432 |
+
61,self_attn.o_proj,0.00002612,0.01000,2.240
|
433 |
+
61,mlp.up_proj,0.01041328,0.01000,2.534
|
434 |
+
61,mlp.gate_proj,0.01271570,0.01000,2.600
|
435 |
+
61,mlp.down_proj,0.00028982,0.01000,14.343
|
436 |
+
62,self_attn.k_proj,0.00352705,0.01000,2.136
|
437 |
+
62,self_attn.v_proj,0.00018642,0.01000,2.148
|
438 |
+
62,self_attn.q_proj,0.00156097,0.01000,2.241
|
439 |
+
62,self_attn.o_proj,0.00001258,0.01000,2.213
|
440 |
+
62,mlp.up_proj,0.01050729,0.01000,2.521
|
441 |
+
62,mlp.gate_proj,0.01262953,0.01000,2.609
|
442 |
+
62,mlp.down_proj,0.00026185,0.01000,15.286
|
443 |
+
63,self_attn.k_proj,0.00280159,0.01000,2.116
|
444 |
+
63,self_attn.v_proj,0.00026644,0.01000,2.119
|
445 |
+
63,self_attn.q_proj,0.00263812,0.01000,2.223
|
446 |
+
63,self_attn.o_proj,0.00002351,0.01000,2.580
|
447 |
+
63,mlp.up_proj,0.01086035,0.01000,2.784
|
448 |
+
63,mlp.gate_proj,0.01329846,0.01000,2.803
|
449 |
+
63,mlp.down_proj,0.00029048,0.01000,14.323
|
450 |
+
64,self_attn.k_proj,0.00394442,0.01000,2.110
|
451 |
+
64,self_attn.v_proj,0.00057195,0.01000,2.117
|
452 |
+
64,self_attn.q_proj,0.00611058,0.01000,2.223
|
453 |
+
64,self_attn.o_proj,0.00005128,0.01000,2.211
|
454 |
+
64,mlp.up_proj,0.01127844,0.01000,2.514
|
455 |
+
64,mlp.gate_proj,0.01417364,0.01000,2.546
|
456 |
+
64,mlp.down_proj,0.00034165,0.01000,14.405
|
457 |
+
65,self_attn.k_proj,0.00291121,0.01000,2.135
|
458 |
+
65,self_attn.v_proj,0.00024450,0.01000,2.155
|
459 |
+
65,self_attn.q_proj,0.00225852,0.01000,2.248
|
460 |
+
65,self_attn.o_proj,0.00001677,0.01000,2.241
|
461 |
+
65,mlp.up_proj,0.01142753,0.01000,2.542
|
462 |
+
65,mlp.gate_proj,0.01437326,0.01000,2.552
|
463 |
+
65,mlp.down_proj,0.00032856,0.01000,14.547
|
464 |
+
66,self_attn.k_proj,0.00270638,0.01000,2.248
|
465 |
+
66,self_attn.v_proj,0.00030892,0.01000,2.153
|
466 |
+
66,self_attn.q_proj,0.00290197,0.01000,2.222
|
467 |
+
66,self_attn.o_proj,0.00003615,0.01000,2.208
|
468 |
+
66,mlp.up_proj,0.01151047,0.01000,2.512
|
469 |
+
66,mlp.gate_proj,0.01390077,0.01000,2.522
|
470 |
+
66,mlp.down_proj,0.00030044,0.01000,14.219
|
471 |
+
67,self_attn.k_proj,0.00389660,0.01000,2.344
|
472 |
+
67,self_attn.v_proj,0.00061348,0.01000,2.446
|
473 |
+
67,self_attn.q_proj,0.00594299,0.01000,2.492
|
474 |
+
67,self_attn.o_proj,0.00005146,0.01000,2.398
|
475 |
+
67,mlp.up_proj,0.01190181,0.01000,2.511
|
476 |
+
67,mlp.gate_proj,0.01440422,0.01000,2.536
|
477 |
+
67,mlp.down_proj,0.00033289,0.01000,14.234
|
478 |
+
68,self_attn.k_proj,0.00401482,0.01000,2.120
|
479 |
+
68,self_attn.v_proj,0.00077012,0.01000,2.126
|
480 |
+
68,self_attn.q_proj,0.00704382,0.01000,2.224
|
481 |
+
68,self_attn.o_proj,0.00006389,0.01000,2.235
|
482 |
+
68,mlp.up_proj,0.01254046,0.01000,2.534
|
483 |
+
68,mlp.gate_proj,0.01567885,0.01000,2.534
|
484 |
+
68,mlp.down_proj,0.00040795,0.01000,15.106
|
485 |
+
69,self_attn.k_proj,0.00336821,0.01000,2.148
|
486 |
+
69,self_attn.v_proj,0.00059575,0.01000,2.521
|
487 |
+
69,self_attn.q_proj,0.00522180,0.01000,2.483
|
488 |
+
69,self_attn.o_proj,0.00005292,0.01000,2.431
|
489 |
+
69,mlp.up_proj,0.01281787,0.01000,2.540
|
490 |
+
69,mlp.gate_proj,0.01581918,0.01000,2.544
|
491 |
+
69,mlp.down_proj,0.00039612,0.01000,14.446
|
492 |
+
70,self_attn.k_proj,0.00308006,0.01000,2.111
|
493 |
+
70,self_attn.v_proj,0.00054821,0.01000,2.115
|
494 |
+
70,self_attn.q_proj,0.00568390,0.01000,2.214
|
495 |
+
70,self_attn.o_proj,0.00009120,0.01000,2.209
|
496 |
+
70,mlp.up_proj,0.01294702,0.01000,2.630
|
497 |
+
70,mlp.gate_proj,0.01542095,0.01000,2.595
|
498 |
+
70,mlp.down_proj,0.00038803,0.01000,14.363
|
499 |
+
71,self_attn.k_proj,0.00349605,0.01000,2.133
|
500 |
+
71,self_attn.v_proj,0.00073223,0.01000,2.136
|
501 |
+
71,self_attn.q_proj,0.00758155,0.01000,2.246
|
502 |
+
71,self_attn.o_proj,0.00015277,0.01000,2.233
|
503 |
+
71,mlp.up_proj,0.01378454,0.01000,2.534
|
504 |
+
71,mlp.gate_proj,0.01638385,0.01000,2.539
|
505 |
+
71,mlp.down_proj,0.00046208,0.01000,14.225
|
506 |
+
72,self_attn.k_proj,0.00350005,0.01000,2.124
|
507 |
+
72,self_attn.v_proj,0.00079058,0.01000,2.139
|
508 |
+
72,self_attn.q_proj,0.00803616,0.01000,2.247
|
509 |
+
72,self_attn.o_proj,0.00010237,0.01000,2.223
|
510 |
+
72,mlp.up_proj,0.01440539,0.01000,2.625
|
511 |
+
72,mlp.gate_proj,0.01776966,0.01000,2.594
|
512 |
+
72,mlp.down_proj,0.00056738,0.01000,14.587
|
513 |
+
73,self_attn.k_proj,0.00349766,0.01000,2.263
|
514 |
+
73,self_attn.v_proj,0.00085442,0.01000,2.150
|
515 |
+
73,self_attn.q_proj,0.00798300,0.01000,2.259
|
516 |
+
73,self_attn.o_proj,0.00019480,0.01000,2.229
|
517 |
+
73,mlp.up_proj,0.01458233,0.01000,2.529
|
518 |
+
73,mlp.gate_proj,0.01785847,0.01000,2.536
|
519 |
+
73,mlp.down_proj,0.00060137,0.01000,14.436
|
520 |
+
74,self_attn.k_proj,0.00332821,0.01000,2.285
|
521 |
+
74,self_attn.v_proj,0.00082522,0.01000,2.223
|
522 |
+
74,self_attn.q_proj,0.00769713,0.01000,2.322
|
523 |
+
74,self_attn.o_proj,0.00021573,0.01000,2.228
|
524 |
+
74,mlp.up_proj,0.01528064,0.01000,2.759
|
525 |
+
74,mlp.gate_proj,0.01855173,0.01000,2.754
|
526 |
+
74,mlp.down_proj,0.00069634,0.01000,14.272
|
527 |
+
75,self_attn.k_proj,0.00308453,0.01000,2.131
|
528 |
+
75,self_attn.v_proj,0.00091262,0.01000,2.140
|
529 |
+
75,self_attn.q_proj,0.00795672,0.01000,2.240
|
530 |
+
75,self_attn.o_proj,0.00041153,0.01000,2.548
|
531 |
+
75,mlp.up_proj,0.01628249,0.01000,2.512
|
532 |
+
75,mlp.gate_proj,0.02033485,0.01000,2.520
|
533 |
+
75,mlp.down_proj,0.00102099,0.01000,14.455
|
534 |
+
76,self_attn.k_proj,0.00264909,0.01000,2.111
|
535 |
+
76,self_attn.v_proj,0.00085828,0.01000,2.123
|
536 |
+
76,self_attn.q_proj,0.00706456,0.01000,2.229
|
537 |
+
76,self_attn.o_proj,0.00021942,0.01000,2.427
|
538 |
+
76,mlp.up_proj,0.01879866,0.01000,2.584
|
539 |
+
76,mlp.gate_proj,0.02869590,0.01000,2.881
|
540 |
+
76,mlp.down_proj,0.00148636,0.01000,14.349
|
541 |
+
77,self_attn.k_proj,0.00307182,0.01000,2.140
|
542 |
+
77,self_attn.v_proj,0.00101997,0.01000,2.204
|
543 |
+
77,self_attn.q_proj,0.00768160,0.01000,2.316
|
544 |
+
77,self_attn.o_proj,0.00033220,0.01000,2.237
|
545 |
+
77,mlp.up_proj,0.01798910,0.01000,2.609
|
546 |
+
77,mlp.gate_proj,0.02279103,0.01000,2.577
|
547 |
+
77,mlp.down_proj,0.00181538,0.01000,14.642
|
548 |
+
78,self_attn.k_proj,0.00263272,0.01000,2.245
|
549 |
+
78,self_attn.v_proj,0.00071832,0.01000,2.135
|
550 |
+
78,self_attn.q_proj,0.00615946,0.01000,2.261
|
551 |
+
78,self_attn.o_proj,0.00041727,0.01000,2.281
|
552 |
+
78,mlp.up_proj,0.01729591,0.01000,2.872
|
553 |
+
78,mlp.gate_proj,0.02142724,0.01000,2.626
|
554 |
+
78,mlp.down_proj,0.00454792,0.01000,14.483
|
555 |
+
79,self_attn.k_proj,0.00129535,0.01000,2.190
|
556 |
+
79,self_attn.v_proj,0.00037950,0.01000,2.137
|
557 |
+
79,self_attn.q_proj,0.00238648,0.01000,2.254
|
558 |
+
79,self_attn.o_proj,0.00022433,0.01000,2.510
|
559 |
+
79,mlp.up_proj,0.01110691,0.01000,2.690
|
560 |
+
79,mlp.gate_proj,0.01304671,0.01000,2.554
|
561 |
+
79,mlp.down_proj,0.00758310,0.01000,14.453
|
quantize_config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bits": 8,
|
3 |
+
"group_size": 64,
|
4 |
+
"desc_act": false,
|
5 |
+
"sym": true,
|
6 |
+
"lm_head": false,
|
7 |
+
"quant_method": "gptq",
|
8 |
+
"checkpoint_format": "gptq",
|
9 |
+
"pack_dtype": "int32",
|
10 |
+
"meta": {
|
11 |
+
"quantizer": [
|
12 |
+
"gptqmodel:2.3.0-dev"
|
13 |
+
],
|
14 |
+
"uri": "https://github.com/modelcloud/gptqmodel",
|
15 |
+
"damp_percent": 0.01,
|
16 |
+
"damp_auto_increment": 0.0025,
|
17 |
+
"static_groups": false,
|
18 |
+
"true_sequential": true,
|
19 |
+
"mse": 0.0
|
20 |
+
}
|
21 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|begin_of_text|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|eot_id|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<|finetune_right_pad_id|>"
|
17 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b
|
3 |
+
size 17209920
|