Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- added_tokens.json +24 -0
- chat_template.jinja +54 -0
- config.json +53 -0
- generation_config.json +6 -0
- merges.txt +0 -0
- model-00001-of-00042.safetensors +3 -0
- model-00002-of-00042.safetensors +3 -0
- model-00003-of-00042.safetensors +3 -0
- model-00004-of-00042.safetensors +3 -0
- model-00005-of-00042.safetensors +3 -0
- model-00006-of-00042.safetensors +3 -0
- model-00007-of-00042.safetensors +3 -0
- model-00008-of-00042.safetensors +3 -0
- model-00009-of-00042.safetensors +3 -0
- model-00010-of-00042.safetensors +3 -0
- model-00011-of-00042.safetensors +3 -0
- model-00012-of-00042.safetensors +3 -0
- model-00013-of-00042.safetensors +3 -0
- model-00014-of-00042.safetensors +3 -0
- model-00015-of-00042.safetensors +3 -0
- model-00016-of-00042.safetensors +3 -0
- model-00017-of-00042.safetensors +3 -0
- model-00018-of-00042.safetensors +3 -0
- model-00019-of-00042.safetensors +3 -0
- model-00020-of-00042.safetensors +3 -0
- model-00021-of-00042.safetensors +3 -0
- model-00022-of-00042.safetensors +3 -0
- model-00023-of-00042.safetensors +3 -0
- model-00024-of-00042.safetensors +3 -0
- model-00025-of-00042.safetensors +3 -0
- model-00026-of-00042.safetensors +3 -0
- model-00027-of-00042.safetensors +3 -0
- model-00028-of-00042.safetensors +3 -0
- model-00029-of-00042.safetensors +3 -0
- model-00030-of-00042.safetensors +3 -0
- model-00031-of-00042.safetensors +3 -0
- model-00032-of-00042.safetensors +3 -0
- model-00033-of-00042.safetensors +3 -0
- model-00034-of-00042.safetensors +3 -0
- model-00035-of-00042.safetensors +3 -0
- model-00036-of-00042.safetensors +3 -0
- model-00037-of-00042.safetensors +3 -0
- model-00038-of-00042.safetensors +3 -0
- model-00039-of-00042.safetensors +3 -0
- model-00040-of-00042.safetensors +3 -0
- model-00041-of-00042.safetensors +3 -0
- model-00042-of-00042.safetensors +3 -0
- model.safetensors.index.json +0 -0
- quant_log.csv +561 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
added_tokens.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"</tool_call>": 151658,
|
3 |
+
"<tool_call>": 151657,
|
4 |
+
"<|box_end|>": 151649,
|
5 |
+
"<|box_start|>": 151648,
|
6 |
+
"<|endoftext|>": 151643,
|
7 |
+
"<|file_sep|>": 151664,
|
8 |
+
"<|fim_middle|>": 151660,
|
9 |
+
"<|fim_pad|>": 151662,
|
10 |
+
"<|fim_prefix|>": 151659,
|
11 |
+
"<|fim_suffix|>": 151661,
|
12 |
+
"<|im_end|>": 151645,
|
13 |
+
"<|im_start|>": 151644,
|
14 |
+
"<|image_pad|>": 151655,
|
15 |
+
"<|object_ref_end|>": 151647,
|
16 |
+
"<|object_ref_start|>": 151646,
|
17 |
+
"<|quad_end|>": 151651,
|
18 |
+
"<|quad_start|>": 151650,
|
19 |
+
"<|repo_name|>": 151663,
|
20 |
+
"<|video_pad|>": 151656,
|
21 |
+
"<|vision_end|>": 151653,
|
22 |
+
"<|vision_pad|>": 151654,
|
23 |
+
"<|vision_start|>": 151652
|
24 |
+
}
|
chat_template.jinja
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{%- if tools %}
|
2 |
+
{{- '<|im_start|>system\n' }}
|
3 |
+
{%- if messages[0]['role'] == 'system' %}
|
4 |
+
{{- messages[0]['content'] }}
|
5 |
+
{%- else %}
|
6 |
+
{{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
|
7 |
+
{%- endif %}
|
8 |
+
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
9 |
+
{%- for tool in tools %}
|
10 |
+
{{- "\n" }}
|
11 |
+
{{- tool | tojson }}
|
12 |
+
{%- endfor %}
|
13 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
14 |
+
{%- else %}
|
15 |
+
{%- if messages[0]['role'] == 'system' %}
|
16 |
+
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
|
17 |
+
{%- else %}
|
18 |
+
{{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
|
19 |
+
{%- endif %}
|
20 |
+
{%- endif %}
|
21 |
+
{%- for message in messages %}
|
22 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
|
23 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
24 |
+
{%- elif message.role == "assistant" %}
|
25 |
+
{{- '<|im_start|>' + message.role }}
|
26 |
+
{%- if message.content %}
|
27 |
+
{{- '\n' + message.content }}
|
28 |
+
{%- endif %}
|
29 |
+
{%- for tool_call in message.tool_calls %}
|
30 |
+
{%- if tool_call.function is defined %}
|
31 |
+
{%- set tool_call = tool_call.function %}
|
32 |
+
{%- endif %}
|
33 |
+
{{- '\n<tool_call>\n{"name": "' }}
|
34 |
+
{{- tool_call.name }}
|
35 |
+
{{- '", "arguments": ' }}
|
36 |
+
{{- tool_call.arguments | tojson }}
|
37 |
+
{{- '}\n</tool_call>' }}
|
38 |
+
{%- endfor %}
|
39 |
+
{{- '<|im_end|>\n' }}
|
40 |
+
{%- elif message.role == "tool" %}
|
41 |
+
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
|
42 |
+
{{- '<|im_start|>user' }}
|
43 |
+
{%- endif %}
|
44 |
+
{{- '\n<tool_response>\n' }}
|
45 |
+
{{- message.content }}
|
46 |
+
{{- '\n</tool_response>' }}
|
47 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
48 |
+
{{- '<|im_end|>\n' }}
|
49 |
+
{%- endif %}
|
50 |
+
{%- endif %}
|
51 |
+
{%- endfor %}
|
52 |
+
{%- if add_generation_prompt %}
|
53 |
+
{{- '<|im_start|>assistant\n' }}
|
54 |
+
{%- endif %}
|
config.json
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"Qwen2ForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_dropout": 0.0,
|
6 |
+
"bos_token_id": 151643,
|
7 |
+
"eos_token_id": 151645,
|
8 |
+
"hidden_act": "silu",
|
9 |
+
"hidden_size": 8192,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 29568,
|
12 |
+
"max_position_embeddings": 131072,
|
13 |
+
"max_window_layers": 70,
|
14 |
+
"model_type": "qwen2",
|
15 |
+
"num_attention_heads": 64,
|
16 |
+
"num_hidden_layers": 80,
|
17 |
+
"num_key_value_heads": 8,
|
18 |
+
"quantization_config": {
|
19 |
+
"bits": 8,
|
20 |
+
"checkpoint_format": "gptq",
|
21 |
+
"desc_act": false,
|
22 |
+
"group_size": 32,
|
23 |
+
"lm_head": false,
|
24 |
+
"memory_aware_allocation": true,
|
25 |
+
"memory_threshold": 0.8,
|
26 |
+
"meta": {
|
27 |
+
"damp_auto_increment": 0.01,
|
28 |
+
"damp_percent": 0.1,
|
29 |
+
"mse": 0.0,
|
30 |
+
"quantizer": [
|
31 |
+
"gptqmodel:4.0.0-dev"
|
32 |
+
],
|
33 |
+
"static_groups": false,
|
34 |
+
"true_sequential": true,
|
35 |
+
"uri": "https://github.com/modelcloud/gptqmodel",
|
36 |
+
"v2": false,
|
37 |
+
"v2_alpha": 0.25
|
38 |
+
},
|
39 |
+
"pack_dtype": "int32",
|
40 |
+
"quant_method": "gptq",
|
41 |
+
"sym": true
|
42 |
+
},
|
43 |
+
"rms_norm_eps": 1e-06,
|
44 |
+
"rope_scaling": null,
|
45 |
+
"rope_theta": 1000000.0,
|
46 |
+
"sliding_window": 131072,
|
47 |
+
"tie_word_embeddings": false,
|
48 |
+
"torch_dtype": "float16",
|
49 |
+
"transformers_version": "4.52.4",
|
50 |
+
"use_cache": true,
|
51 |
+
"use_sliding_window": false,
|
52 |
+
"vocab_size": 152064
|
53 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 151643,
|
4 |
+
"eos_token_id": 151645,
|
5 |
+
"transformers_version": "4.52.4"
|
6 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model-00001-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86d66e00d30887a66e68f6cd5d72c12fabc2919f9b519b003b3c2ee863f0a0bd
|
3 |
+
size 2491416712
|
model-00002-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d41a9d98ba81271cbcbe3ba7148208c98b5173b7d2776f331466415ab68b363
|
3 |
+
size 1994070824
|
model-00003-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4646c651e6cb4c69acb221b10e998864e67cd6e2b044d192f1511b9ef4e91d1d
|
3 |
+
size 1939041640
|
model-00004-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0029e3728b2dcd3ab060a459ff7774005c7f80674cb245c96bb4ed4d0709c356
|
3 |
+
size 1994054352
|
model-00005-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ef12563193062ae643c486902b4e307d7df1774fab93f57583a51c7d014e212
|
3 |
+
size 1920620808
|
model-00006-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8af313e8070c1ac496e19f9323a0eeeaa6b158917372bd7578d6c6bb9b68aa45
|
3 |
+
size 1920620784
|
model-00007-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3166450ab7fb88c918416e7ead4a76cbc4ae9f5d25609b86846d4a6d83aa138
|
3 |
+
size 1920620872
|
model-00008-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09540dca82465dda17f4fe89ac6af0453f43adab5158286d147d5c79c5e6a7e3
|
3 |
+
size 1920620872
|
model-00009-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f74819f49c0d99da35302cba147c671ee257b26afdf4324122b3ec2618816517
|
3 |
+
size 1920620872
|
model-00010-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78bd5c98ac115fd02bc83953809f0c26ae57ec6587cfd69dcd62bd182dea961d
|
3 |
+
size 1920620872
|
model-00011-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74ae4ff0f19d535a4f621329033aae128704f508d62bddc55a2b36d74103a54a
|
3 |
+
size 1920620872
|
model-00012-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e93737c40202750274e525d5d27e539457b9ed32662ac406b1c43fceca5005f
|
3 |
+
size 1920620872
|
model-00013-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:953d9716da4c162ac3f3a5b7b6cbb329b643c72934000b1fc4f5c57f5d7051c0
|
3 |
+
size 1920620872
|
model-00014-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a6dacd117982a798dba93956e5f80831f8ead33e20fdf29f6dfcb1dfbe4a122
|
3 |
+
size 1920620872
|
model-00015-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:453a14b4beffdcde20e56d3e6b74606938cb978dba4bcfecbe4edc24b9d53b1c
|
3 |
+
size 1920620872
|
model-00016-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c980a966efcbe78404de2d499d13e5c22d1af3cba897ab02ef38c6d2c85fab8
|
3 |
+
size 1920620872
|
model-00017-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1ef89002d2bcdde6ed5293f640dbef29924260b9262a0fded474135c698c3de7
|
3 |
+
size 1920620872
|
model-00018-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:931c6409a8c4378ba5b9c17f26ec4b3d87a5bd38ec9a87ef3e73b580265eb23b
|
3 |
+
size 1920620872
|
model-00019-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f304e8326481d9c1d119ef89412a62bfb7747b40ffcd88429a079604db9997e9
|
3 |
+
size 1920620872
|
model-00020-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5e226cca001413b9ccb101a95e65047e54cfb91e76b0e6f38b1db534705575f
|
3 |
+
size 1920620872
|
model-00021-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1967df7eff2a72dc8f8412f458007528358253cf92c0351827e7209f77a6bf65
|
3 |
+
size 1920620872
|
model-00022-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b2c6cd98252ed51edf44918ad28f67b3189c7ad8569b7380ecd6d55475dd0b69
|
3 |
+
size 1920620872
|
model-00023-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:964259f0fd06a7ef01cd22de5b99b469629f36f96da2816e5b8bda440f9672ee
|
3 |
+
size 1920620872
|
model-00024-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:878ef405be0a1e0500218b5c64f16f8804dbe3975a1659302023d7cf978675d7
|
3 |
+
size 1920620872
|
model-00025-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6a6d5964fd7097d494a9921100945b3944cefece7a554aab646ab7347d9237a
|
3 |
+
size 1920620872
|
model-00026-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fcc46cb211c0f9b190d1698e17ed52c5016ee8c9793cdc67fd6c1cc1f2353e9
|
3 |
+
size 1920620872
|
model-00027-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:077f9bf54e8ebc1966a790287259acfadac64864d5e3e8d11a53026ca5fef8ae
|
3 |
+
size 1920620872
|
model-00028-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e0ee67ac83c0a0ede699c4ddcf96567ee028e34972cbc99eaa2a873ec24e937
|
3 |
+
size 1920620872
|
model-00029-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d735b3cae0c633dfcdabf4a9cfc6865648fcbf169161d9c1d473d7b5a72e3d9c
|
3 |
+
size 1920620872
|
model-00030-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ab65e1d4c1eada84bc89fb56ebeadefe85beffbaa089443578d2cd5f97e270e
|
3 |
+
size 1920620872
|
model-00031-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d11d168fd1ec349fac385ec65e6eb4f123aef3f275e1b0c748499ee691c6ee54
|
3 |
+
size 1920620872
|
model-00032-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c44860d8520ea5a9156c1fb841a66af4a745de031c6f3b30b37ac7311be342f7
|
3 |
+
size 1920620872
|
model-00033-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e299cb40d4e4d17d08f1878081e4c72c6758d7ed2d087bd1fcf9ea3e248e393
|
3 |
+
size 1920620872
|
model-00034-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:83b217a8f2831b986912543de732f8b315c9a2af2bbcdc2a9c87b4651f99291f
|
3 |
+
size 1920620872
|
model-00035-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50dafcb6e6a5d0c048e54505fab6e339e90de469a1aa13ac5e470d0b2e54a90e
|
3 |
+
size 1920620872
|
model-00036-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1c99c1a2784b919c25cb1bc469a4c8207903c550f033c6691db365de663bfbbc
|
3 |
+
size 1920620872
|
model-00037-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b5e3b75330e943ea4398252945de1a4c583f3b5ab3363c22bb1241616e71840
|
3 |
+
size 1920620872
|
model-00038-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51bd0c8a7f1e2ed56b1cc0113a12b1f3525f3ce17d022be19e2d47c744e9de60
|
3 |
+
size 1920620872
|
model-00039-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f06679a981bee69785172eb95f1b8679e527322d6fd0bca02df44e160a65358
|
3 |
+
size 1920620872
|
model-00040-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c88e362e2f29d492ee82dfe5f54108b93968fcfbef002e58677fa819974935e
|
3 |
+
size 1920620872
|
model-00041-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3582e7b57110273dd4fd22a4c2b7009a1f98cbbd1770fb0638dcfd599782ad6f
|
3 |
+
size 2491416704
|
model-00042-of-00042.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd9e59cb735ce91dac931a750107d4e4253730544303e795369d91561e36f3f0
|
3 |
+
size 1755332904
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
quant_log.csv
ADDED
@@ -0,0 +1,561 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
layer,module,loss,samples,damp,time
|
2 |
+
0,self_attn.q_proj,0.0000000006,0.10000,8.433
|
3 |
+
0,self_attn.v_proj,0.0000000000,0.10000,8.523
|
4 |
+
0,self_attn.k_proj,0.0000000001,0.10000,8.525
|
5 |
+
0,self_attn.o_proj,0.0000000000,0.10000,3.546
|
6 |
+
0,mlp.gate_proj,0.0000000003,0.10000,6.809
|
7 |
+
0,mlp.up_proj,0.0000000003,0.10000,6.818
|
8 |
+
0,mlp.down_proj,0.0000000000,0.10000,20.401
|
9 |
+
1,self_attn.k_proj,0.0000000001,0.10000,7.392
|
10 |
+
1,self_attn.q_proj,0.0000000002,0.10000,7.457
|
11 |
+
1,self_attn.v_proj,0.0000000000,0.10000,7.479
|
12 |
+
1,self_attn.o_proj,0.0000000000,0.10000,2.588
|
13 |
+
1,mlp.gate_proj,0.0000000009,0.10000,5.444
|
14 |
+
1,mlp.up_proj,0.0000000009,0.10000,5.464
|
15 |
+
1,mlp.down_proj,0.0000012333,0.10000,19.740
|
16 |
+
2,self_attn.q_proj,0.0000000117,0.10000,7.445
|
17 |
+
2,self_attn.k_proj,0.0000000033,0.10000,7.475
|
18 |
+
2,self_attn.v_proj,0.0000000013,0.10000,7.496
|
19 |
+
2,self_attn.o_proj,0.0000000000,0.10000,2.569
|
20 |
+
2,mlp.up_proj,0.0000000016,0.10000,5.568
|
21 |
+
2,mlp.gate_proj,0.0000000016,0.10000,5.591
|
22 |
+
2,mlp.down_proj,0.0000000000,0.10000,19.740
|
23 |
+
3,self_attn.v_proj,0.0000000012,0.10000,7.504
|
24 |
+
3,self_attn.k_proj,0.0000000026,0.10000,7.513
|
25 |
+
3,self_attn.q_proj,0.0000000084,0.10000,7.554
|
26 |
+
3,self_attn.o_proj,0.0000000000,0.10000,2.598
|
27 |
+
3,mlp.gate_proj,0.0000000029,0.10000,5.530
|
28 |
+
3,mlp.up_proj,0.0000000028,0.10000,5.551
|
29 |
+
3,mlp.down_proj,0.0000000001,0.10000,20.208
|
30 |
+
4,self_attn.k_proj,0.0000000035,0.10000,8.418
|
31 |
+
4,self_attn.q_proj,0.0000000143,0.10000,8.438
|
32 |
+
4,self_attn.v_proj,0.0000000019,0.10000,8.457
|
33 |
+
4,self_attn.o_proj,0.0000000000,0.10000,2.637
|
34 |
+
4,mlp.gate_proj,0.0000000054,0.10000,5.674
|
35 |
+
4,mlp.up_proj,0.0000000053,0.10000,5.700
|
36 |
+
4,mlp.down_proj,0.0000000001,0.10000,21.098
|
37 |
+
5,self_attn.q_proj,0.0000000177,0.10000,7.396
|
38 |
+
5,self_attn.v_proj,0.0000000026,0.10000,7.403
|
39 |
+
5,self_attn.k_proj,0.0000000044,0.10000,7.450
|
40 |
+
5,self_attn.o_proj,0.0000000000,0.10000,2.574
|
41 |
+
5,mlp.gate_proj,0.0000000081,0.10000,5.457
|
42 |
+
5,mlp.up_proj,0.0000000078,0.10000,5.484
|
43 |
+
5,mlp.down_proj,0.0000000002,0.10000,21.327
|
44 |
+
6,self_attn.k_proj,0.0000000031,0.10000,7.467
|
45 |
+
6,self_attn.q_proj,0.0000000118,0.10000,7.508
|
46 |
+
6,self_attn.v_proj,0.0000000018,0.10000,7.533
|
47 |
+
6,self_attn.o_proj,0.0000000000,0.10000,2.456
|
48 |
+
6,mlp.up_proj,0.0000000095,0.10000,5.399
|
49 |
+
6,mlp.gate_proj,0.0000000099,0.10000,5.421
|
50 |
+
6,mlp.down_proj,0.0000000002,0.10000,21.591
|
51 |
+
7,self_attn.v_proj,0.0000000023,0.10000,7.400
|
52 |
+
7,self_attn.k_proj,0.0000000043,0.10000,7.482
|
53 |
+
7,self_attn.q_proj,0.0000000171,0.10000,7.488
|
54 |
+
7,self_attn.o_proj,0.0000000000,0.10000,2.625
|
55 |
+
7,mlp.up_proj,0.0000000120,0.10000,5.333
|
56 |
+
7,mlp.gate_proj,0.0000000123,0.10000,5.361
|
57 |
+
7,mlp.down_proj,0.0000000003,0.10000,22.012
|
58 |
+
8,self_attn.v_proj,0.0000000024,0.10000,7.140
|
59 |
+
8,self_attn.k_proj,0.0000000041,0.10000,7.208
|
60 |
+
8,self_attn.q_proj,0.0000000155,0.10000,7.214
|
61 |
+
8,self_attn.o_proj,0.0000000001,0.10000,2.608
|
62 |
+
8,mlp.gate_proj,0.0000000149,0.10000,5.446
|
63 |
+
8,mlp.up_proj,0.0000000142,0.10000,5.502
|
64 |
+
8,mlp.down_proj,0.0000000004,0.10000,22.092
|
65 |
+
9,self_attn.k_proj,0.0000000110,0.10000,7.281
|
66 |
+
9,self_attn.v_proj,0.0000000069,0.10000,7.285
|
67 |
+
9,self_attn.q_proj,0.0000000496,0.10000,7.300
|
68 |
+
9,self_attn.o_proj,0.0000000002,0.10000,2.397
|
69 |
+
9,mlp.up_proj,0.0000000160,0.10000,5.529
|
70 |
+
9,mlp.gate_proj,0.0000000166,0.10000,5.561
|
71 |
+
9,mlp.down_proj,0.0000000004,0.10000,22.118
|
72 |
+
10,self_attn.k_proj,0.0000000071,0.10000,7.222
|
73 |
+
10,self_attn.q_proj,0.0000000296,0.10000,7.225
|
74 |
+
10,self_attn.v_proj,0.0000000038,0.10000,7.233
|
75 |
+
10,self_attn.o_proj,0.0000000002,0.10000,2.642
|
76 |
+
10,mlp.gate_proj,0.0000000207,0.10000,5.568
|
77 |
+
10,mlp.up_proj,0.0000000198,0.10000,5.604
|
78 |
+
10,mlp.down_proj,0.0000000006,0.10000,22.287
|
79 |
+
11,self_attn.q_proj,0.0000000349,0.10000,7.111
|
80 |
+
11,self_attn.k_proj,0.0000000080,0.10000,7.175
|
81 |
+
11,self_attn.v_proj,0.0000000038,0.10000,7.201
|
82 |
+
11,self_attn.o_proj,0.0000000003,0.10000,2.594
|
83 |
+
11,mlp.gate_proj,0.0000000242,0.10000,5.426
|
84 |
+
11,mlp.up_proj,0.0000000231,0.10000,5.453
|
85 |
+
11,mlp.down_proj,0.0000000008,0.10000,21.926
|
86 |
+
12,self_attn.v_proj,0.0000000057,0.10000,7.172
|
87 |
+
12,self_attn.q_proj,0.0000000486,0.10000,7.217
|
88 |
+
12,self_attn.k_proj,0.0000000105,0.10000,7.263
|
89 |
+
12,self_attn.o_proj,0.0000000004,0.10000,2.451
|
90 |
+
12,mlp.up_proj,0.0000000264,0.10000,5.450
|
91 |
+
12,mlp.gate_proj,0.0000000275,0.10000,5.468
|
92 |
+
12,mlp.down_proj,0.0000000010,0.10000,21.748
|
93 |
+
13,self_attn.v_proj,0.0000000055,0.10000,7.622
|
94 |
+
13,self_attn.q_proj,0.0000000456,0.10000,7.642
|
95 |
+
13,self_attn.k_proj,0.0000000100,0.10000,7.669
|
96 |
+
13,self_attn.o_proj,0.0000000004,0.10000,2.690
|
97 |
+
13,mlp.gate_proj,0.0000000321,0.10000,5.361
|
98 |
+
13,mlp.up_proj,0.0000000307,0.10000,5.397
|
99 |
+
13,mlp.down_proj,0.0000000013,0.10000,21.820
|
100 |
+
14,self_attn.q_proj,0.0000000447,0.10000,7.525
|
101 |
+
14,self_attn.k_proj,0.0000000098,0.10000,7.554
|
102 |
+
14,self_attn.v_proj,0.0000000054,0.10000,7.583
|
103 |
+
14,self_attn.o_proj,0.0000000006,0.10000,2.511
|
104 |
+
14,mlp.up_proj,0.0000000323,0.10000,5.552
|
105 |
+
14,mlp.gate_proj,0.0000000337,0.10000,5.571
|
106 |
+
14,mlp.down_proj,0.0000000015,0.10000,22.382
|
107 |
+
15,self_attn.v_proj,0.0000000066,0.10000,7.419
|
108 |
+
15,self_attn.q_proj,0.0000000590,0.10000,7.450
|
109 |
+
15,self_attn.k_proj,0.0000000120,0.10000,7.475
|
110 |
+
15,self_attn.o_proj,0.0000000005,0.10000,2.598
|
111 |
+
15,mlp.gate_proj,0.0000000427,0.10000,5.365
|
112 |
+
15,mlp.up_proj,0.0000000408,0.10000,5.392
|
113 |
+
15,mlp.down_proj,0.0000000019,0.10000,22.294
|
114 |
+
16,self_attn.k_proj,0.0000000112,0.10000,7.328
|
115 |
+
16,self_attn.q_proj,0.0000000529,0.10000,7.336
|
116 |
+
16,self_attn.v_proj,0.0000000058,0.10000,7.372
|
117 |
+
16,self_attn.o_proj,0.0000000005,0.10000,2.501
|
118 |
+
16,mlp.gate_proj,0.0000000467,0.10000,5.539
|
119 |
+
16,mlp.up_proj,0.0000000447,0.10000,5.571
|
120 |
+
16,mlp.down_proj,0.0000000022,0.10000,22.187
|
121 |
+
17,self_attn.v_proj,0.0000000057,0.10000,7.075
|
122 |
+
17,self_attn.k_proj,0.0000000095,0.10000,7.155
|
123 |
+
17,self_attn.q_proj,0.0000000433,0.10000,7.172
|
124 |
+
17,self_attn.o_proj,0.0000000008,0.10000,2.519
|
125 |
+
17,mlp.up_proj,0.0000000434,0.10000,5.482
|
126 |
+
17,mlp.gate_proj,0.0000000451,0.10000,5.545
|
127 |
+
17,mlp.down_proj,0.0000000024,0.10000,22.221
|
128 |
+
18,self_attn.k_proj,0.0000000175,0.10000,7.383
|
129 |
+
18,self_attn.q_proj,0.0000000805,0.10000,7.388
|
130 |
+
18,self_attn.v_proj,0.0000000099,0.10000,7.409
|
131 |
+
18,self_attn.o_proj,0.0000000006,0.10000,2.573
|
132 |
+
18,mlp.up_proj,0.0000000523,0.10000,5.492
|
133 |
+
18,mlp.gate_proj,0.0000000546,0.10000,5.510
|
134 |
+
18,mlp.down_proj,0.0000000029,0.10000,22.175
|
135 |
+
19,self_attn.v_proj,0.0000000140,0.10000,7.286
|
136 |
+
19,self_attn.q_proj,0.0000001108,0.10000,7.319
|
137 |
+
19,self_attn.k_proj,0.0000000230,0.10000,7.330
|
138 |
+
19,self_attn.o_proj,0.0000000007,0.10000,2.389
|
139 |
+
19,mlp.gate_proj,0.0000000634,0.10000,5.577
|
140 |
+
19,mlp.up_proj,0.0000000607,0.10000,5.618
|
141 |
+
19,mlp.down_proj,0.0000000040,0.10000,22.311
|
142 |
+
20,self_attn.v_proj,0.0000000056,0.10000,7.417
|
143 |
+
20,self_attn.q_proj,0.0000000457,0.10000,7.440
|
144 |
+
20,self_attn.k_proj,0.0000000093,0.10000,7.454
|
145 |
+
20,self_attn.o_proj,0.0000000010,0.10000,2.418
|
146 |
+
20,mlp.up_proj,0.0000000728,0.10000,5.482
|
147 |
+
20,mlp.gate_proj,0.0000000765,0.10000,5.566
|
148 |
+
20,mlp.down_proj,0.0000000054,0.10000,21.961
|
149 |
+
21,self_attn.q_proj,0.0000000387,0.10000,7.487
|
150 |
+
21,self_attn.v_proj,0.0000000051,0.10000,7.562
|
151 |
+
21,self_attn.k_proj,0.0000000078,0.10000,7.574
|
152 |
+
21,self_attn.o_proj,0.0000000017,0.10000,2.428
|
153 |
+
21,mlp.gate_proj,0.0000000693,0.10000,5.477
|
154 |
+
21,mlp.up_proj,0.0000000665,0.10000,5.515
|
155 |
+
21,mlp.down_proj,0.0000000053,0.10000,22.161
|
156 |
+
22,self_attn.k_proj,0.0000000189,0.10000,7.254
|
157 |
+
22,self_attn.q_proj,0.0000001047,0.10000,7.257
|
158 |
+
22,self_attn.v_proj,0.0000000119,0.10000,7.286
|
159 |
+
22,self_attn.o_proj,0.0000000023,0.10000,2.451
|
160 |
+
22,mlp.gate_proj,0.0000000953,0.10000,5.428
|
161 |
+
22,mlp.up_proj,0.0000000915,0.10000,5.452
|
162 |
+
22,mlp.down_proj,0.0000000101,0.10000,22.137
|
163 |
+
23,self_attn.k_proj,0.0000000156,0.10000,7.354
|
164 |
+
23,self_attn.q_proj,0.0000000896,0.10000,7.405
|
165 |
+
23,self_attn.v_proj,0.0000000106,0.10000,7.414
|
166 |
+
23,self_attn.o_proj,0.0000000028,0.10000,2.547
|
167 |
+
23,mlp.gate_proj,0.0000001081,0.10000,5.480
|
168 |
+
23,mlp.up_proj,0.0000001026,0.10000,5.505
|
169 |
+
23,mlp.down_proj,0.0000000106,0.10000,22.164
|
170 |
+
24,self_attn.v_proj,0.0000000111,0.10000,7.370
|
171 |
+
24,self_attn.k_proj,0.0000000138,0.10000,7.429
|
172 |
+
24,self_attn.q_proj,0.0000000774,0.10000,7.461
|
173 |
+
24,self_attn.o_proj,0.0000000028,0.10000,2.425
|
174 |
+
24,mlp.gate_proj,0.0000001285,0.10000,5.526
|
175 |
+
24,mlp.up_proj,0.0000001210,0.10000,5.561
|
176 |
+
24,mlp.down_proj,0.0000000125,0.10000,22.022
|
177 |
+
25,self_attn.q_proj,0.0000001378,0.10000,7.288
|
178 |
+
25,self_attn.k_proj,0.0000000241,0.10000,7.362
|
179 |
+
25,self_attn.v_proj,0.0000000155,0.10000,7.407
|
180 |
+
25,self_attn.o_proj,0.0000000029,0.10000,2.509
|
181 |
+
25,mlp.gate_proj,0.0000001537,0.10000,5.490
|
182 |
+
25,mlp.up_proj,0.0000001457,0.10000,5.528
|
183 |
+
25,mlp.down_proj,0.0000000145,0.10000,22.637
|
184 |
+
26,self_attn.v_proj,0.0000000118,0.10000,7.436
|
185 |
+
26,self_attn.q_proj,0.0000000986,0.10000,7.472
|
186 |
+
26,self_attn.k_proj,0.0000000178,0.10000,7.533
|
187 |
+
26,self_attn.o_proj,0.0000000041,0.10000,2.595
|
188 |
+
26,mlp.up_proj,0.0000001662,0.10000,5.439
|
189 |
+
26,mlp.gate_proj,0.0000001746,0.10000,5.441
|
190 |
+
26,mlp.down_proj,0.0000000153,0.10000,22.215
|
191 |
+
27,self_attn.v_proj,0.0000000124,0.10000,7.330
|
192 |
+
27,self_attn.q_proj,0.0000000959,0.10000,7.344
|
193 |
+
27,self_attn.k_proj,0.0000000178,0.10000,7.351
|
194 |
+
27,self_attn.o_proj,0.0000000039,0.10000,2.502
|
195 |
+
27,mlp.gate_proj,0.0000001913,0.10000,5.305
|
196 |
+
27,mlp.up_proj,0.0000001828,0.10000,5.325
|
197 |
+
27,mlp.down_proj,0.0000000164,0.10000,22.087
|
198 |
+
28,self_attn.q_proj,0.0000000973,0.10000,7.503
|
199 |
+
28,self_attn.k_proj,0.0000000174,0.10000,7.519
|
200 |
+
28,self_attn.v_proj,0.0000000132,0.10000,7.531
|
201 |
+
28,self_attn.o_proj,0.0000000028,0.10000,2.589
|
202 |
+
28,mlp.gate_proj,0.0000002029,0.10000,5.543
|
203 |
+
28,mlp.up_proj,0.0000001947,0.10000,5.569
|
204 |
+
28,mlp.down_proj,0.0000000182,0.10000,22.324
|
205 |
+
29,self_attn.q_proj,0.0000001183,0.10000,7.513
|
206 |
+
29,self_attn.k_proj,0.0000000219,0.10000,7.569
|
207 |
+
29,self_attn.v_proj,0.0000000144,0.10000,7.600
|
208 |
+
29,self_attn.o_proj,0.0000000052,0.10000,2.698
|
209 |
+
29,mlp.gate_proj,0.0000002155,0.10000,5.447
|
210 |
+
29,mlp.up_proj,0.0000002066,0.10000,5.480
|
211 |
+
29,mlp.down_proj,0.0000000202,0.10000,21.811
|
212 |
+
30,self_attn.q_proj,0.0000001278,0.10000,7.383
|
213 |
+
30,self_attn.v_proj,0.0000000172,0.10000,7.454
|
214 |
+
30,self_attn.k_proj,0.0000000221,0.10000,7.514
|
215 |
+
30,self_attn.o_proj,0.0000000052,0.10000,2.480
|
216 |
+
30,mlp.gate_proj,0.0000002370,0.10000,5.544
|
217 |
+
30,mlp.up_proj,0.0000002246,0.10000,5.586
|
218 |
+
30,mlp.down_proj,0.0000000225,0.10000,21.930
|
219 |
+
31,self_attn.k_proj,0.0000000243,0.10000,7.175
|
220 |
+
31,self_attn.v_proj,0.0000000185,0.10000,7.233
|
221 |
+
31,self_attn.q_proj,0.0000001357,0.10000,7.270
|
222 |
+
31,self_attn.o_proj,0.0000000032,0.10000,2.675
|
223 |
+
31,mlp.gate_proj,0.0000002590,0.10000,5.433
|
224 |
+
31,mlp.up_proj,0.0000002450,0.10000,5.452
|
225 |
+
31,mlp.down_proj,0.0000000241,0.10000,21.739
|
226 |
+
32,self_attn.q_proj,0.0000001497,0.10000,7.253
|
227 |
+
32,self_attn.v_proj,0.0000000206,0.10000,7.273
|
228 |
+
32,self_attn.k_proj,0.0000000269,0.10000,7.295
|
229 |
+
32,self_attn.o_proj,0.0000000036,0.10000,2.636
|
230 |
+
32,mlp.up_proj,0.0000002581,0.10000,5.534
|
231 |
+
32,mlp.gate_proj,0.0000002723,0.10000,5.551
|
232 |
+
32,mlp.down_proj,0.0000000252,0.10000,21.909
|
233 |
+
33,self_attn.k_proj,0.0000000253,0.10000,7.358
|
234 |
+
33,self_attn.q_proj,0.0000001362,0.10000,7.369
|
235 |
+
33,self_attn.v_proj,0.0000000229,0.10000,7.379
|
236 |
+
33,self_attn.o_proj,0.0000000038,0.10000,2.527
|
237 |
+
33,mlp.gate_proj,0.0000002941,0.10000,5.440
|
238 |
+
33,mlp.up_proj,0.0000002748,0.10000,5.469
|
239 |
+
33,mlp.down_proj,0.0000000269,0.10000,22.038
|
240 |
+
34,self_attn.k_proj,0.0000000286,0.10000,7.464
|
241 |
+
34,self_attn.v_proj,0.0000000262,0.10000,7.484
|
242 |
+
34,self_attn.q_proj,0.0000001592,0.10000,7.494
|
243 |
+
34,self_attn.o_proj,0.0000000032,0.10000,2.582
|
244 |
+
34,mlp.up_proj,0.0000002879,0.10000,5.538
|
245 |
+
34,mlp.gate_proj,0.0000003089,0.10000,5.562
|
246 |
+
34,mlp.down_proj,0.0000000290,0.10000,21.733
|
247 |
+
35,self_attn.k_proj,0.0000000311,0.10000,7.366
|
248 |
+
35,self_attn.v_proj,0.0000000291,0.10000,7.377
|
249 |
+
35,self_attn.q_proj,0.0000001728,0.10000,7.379
|
250 |
+
35,self_attn.o_proj,0.0000000032,0.10000,2.550
|
251 |
+
35,mlp.gate_proj,0.0000003185,0.10000,5.484
|
252 |
+
35,mlp.up_proj,0.0000002989,0.10000,5.508
|
253 |
+
35,mlp.down_proj,0.0000000307,0.10000,21.711
|
254 |
+
36,self_attn.v_proj,0.0000000310,0.10000,7.721
|
255 |
+
36,self_attn.q_proj,0.0000001821,0.10000,7.742
|
256 |
+
36,self_attn.k_proj,0.0000000325,0.10000,7.751
|
257 |
+
36,self_attn.o_proj,0.0000000034,0.10000,2.624
|
258 |
+
36,mlp.gate_proj,0.0000003245,0.10000,5.345
|
259 |
+
36,mlp.up_proj,0.0000003066,0.10000,5.368
|
260 |
+
36,mlp.down_proj,0.0000000304,0.10000,21.899
|
261 |
+
37,self_attn.k_proj,0.0000000316,0.10000,6.939
|
262 |
+
37,self_attn.v_proj,0.0000000282,0.10000,6.953
|
263 |
+
37,self_attn.q_proj,0.0000001720,0.10000,6.976
|
264 |
+
37,self_attn.o_proj,0.0000000037,0.10000,2.519
|
265 |
+
37,mlp.up_proj,0.0000003163,0.10000,5.371
|
266 |
+
37,mlp.gate_proj,0.0000003322,0.10000,5.397
|
267 |
+
37,mlp.down_proj,0.0000000318,0.10000,19.887
|
268 |
+
38,self_attn.k_proj,0.0000000312,0.10000,7.383
|
269 |
+
38,self_attn.q_proj,0.0000001654,0.10000,7.412
|
270 |
+
38,self_attn.v_proj,0.0000000256,0.10000,7.441
|
271 |
+
38,self_attn.o_proj,0.0000000050,0.10000,2.496
|
272 |
+
38,mlp.up_proj,0.0000003290,0.10000,5.493
|
273 |
+
38,mlp.gate_proj,0.0000003453,0.10000,5.544
|
274 |
+
38,mlp.down_proj,0.0000000338,0.10000,20.116
|
275 |
+
39,self_attn.k_proj,0.0000000301,0.10000,7.216
|
276 |
+
39,self_attn.q_proj,0.0000001588,0.10000,7.269
|
277 |
+
39,self_attn.v_proj,0.0000000260,0.10000,7.290
|
278 |
+
39,self_attn.o_proj,0.0000000050,0.10000,2.631
|
279 |
+
39,mlp.gate_proj,0.0000003537,0.10000,5.568
|
280 |
+
39,mlp.up_proj,0.0000003390,0.10000,5.618
|
281 |
+
39,mlp.down_proj,0.0000000358,0.10000,19.983
|
282 |
+
40,self_attn.k_proj,0.0000000301,0.10000,7.297
|
283 |
+
40,self_attn.q_proj,0.0000001559,0.10000,7.342
|
284 |
+
40,self_attn.v_proj,0.0000000208,0.10000,7.355
|
285 |
+
40,self_attn.o_proj,0.0000000073,0.10000,2.527
|
286 |
+
40,mlp.up_proj,0.0000003505,0.10000,5.421
|
287 |
+
40,mlp.gate_proj,0.0000003630,0.10000,5.451
|
288 |
+
40,mlp.down_proj,0.0000000389,0.10000,19.609
|
289 |
+
41,self_attn.q_proj,0.0000001367,0.10000,7.205
|
290 |
+
41,self_attn.k_proj,0.0000000267,0.10000,7.287
|
291 |
+
41,self_attn.v_proj,0.0000000197,0.10000,7.313
|
292 |
+
41,self_attn.o_proj,0.0000000084,0.10000,2.542
|
293 |
+
41,mlp.gate_proj,0.0000003830,0.10000,5.387
|
294 |
+
41,mlp.up_proj,0.0000003693,0.10000,5.388
|
295 |
+
41,mlp.down_proj,0.0000000416,0.10000,20.146
|
296 |
+
42,self_attn.q_proj,0.0000001816,0.10000,7.226
|
297 |
+
42,self_attn.v_proj,0.0000000258,0.10000,7.241
|
298 |
+
42,self_attn.k_proj,0.0000000331,0.10000,7.257
|
299 |
+
42,self_attn.o_proj,0.0000000066,0.10000,2.523
|
300 |
+
42,mlp.gate_proj,0.0000003972,0.10000,5.597
|
301 |
+
42,mlp.up_proj,0.0000003861,0.10000,5.616
|
302 |
+
42,mlp.down_proj,0.0000000464,0.10000,19.683
|
303 |
+
43,self_attn.q_proj,0.0000001734,0.10000,7.062
|
304 |
+
43,self_attn.k_proj,0.0000000328,0.10000,7.078
|
305 |
+
43,self_attn.v_proj,0.0000000228,0.10000,7.086
|
306 |
+
43,self_attn.o_proj,0.0000000094,0.10000,2.467
|
307 |
+
43,mlp.gate_proj,0.0000004086,0.10000,5.548
|
308 |
+
43,mlp.up_proj,0.0000004008,0.10000,5.576
|
309 |
+
43,mlp.down_proj,0.0000000508,0.10000,19.758
|
310 |
+
44,self_attn.k_proj,0.0000000311,0.10000,7.145
|
311 |
+
44,self_attn.q_proj,0.0000001729,0.10000,7.148
|
312 |
+
44,self_attn.v_proj,0.0000000210,0.10000,7.261
|
313 |
+
44,self_attn.o_proj,0.0000000130,0.10000,2.521
|
314 |
+
44,mlp.up_proj,0.0000004158,0.10000,5.542
|
315 |
+
44,mlp.gate_proj,0.0000004165,0.10000,5.566
|
316 |
+
44,mlp.down_proj,0.0000000610,0.10000,19.854
|
317 |
+
45,self_attn.v_proj,0.0000000191,0.10000,7.172
|
318 |
+
45,self_attn.q_proj,0.0000001847,0.10000,7.179
|
319 |
+
45,self_attn.k_proj,0.0000000328,0.10000,7.192
|
320 |
+
45,self_attn.o_proj,0.0000000170,0.10000,2.558
|
321 |
+
45,mlp.up_proj,0.0000004313,0.10000,5.618
|
322 |
+
45,mlp.gate_proj,0.0000004280,0.10000,5.659
|
323 |
+
45,mlp.down_proj,0.0000000999,0.10000,19.934
|
324 |
+
46,self_attn.k_proj,0.0000000364,0.10000,7.268
|
325 |
+
46,self_attn.v_proj,0.0000000217,0.10000,7.296
|
326 |
+
46,self_attn.q_proj,0.0000001988,0.10000,7.316
|
327 |
+
46,self_attn.o_proj,0.0000000236,0.10000,2.494
|
328 |
+
46,mlp.gate_proj,0.0000004367,0.10000,5.585
|
329 |
+
46,mlp.up_proj,0.0000004444,0.10000,5.597
|
330 |
+
46,mlp.down_proj,0.0000000659,0.10000,20.187
|
331 |
+
47,self_attn.q_proj,0.0000002005,0.10000,7.424
|
332 |
+
47,self_attn.k_proj,0.0000000355,0.10000,7.435
|
333 |
+
47,self_attn.v_proj,0.0000000224,0.10000,7.448
|
334 |
+
47,self_attn.o_proj,0.0000000246,0.10000,2.648
|
335 |
+
47,mlp.gate_proj,0.0000004369,0.10000,5.538
|
336 |
+
47,mlp.up_proj,0.0000004500,0.10000,5.584
|
337 |
+
47,mlp.down_proj,0.0000000736,0.10000,20.966
|
338 |
+
48,self_attn.v_proj,0.0000000253,0.10000,7.299
|
339 |
+
48,self_attn.k_proj,0.0000000347,0.10000,7.327
|
340 |
+
48,self_attn.q_proj,0.0000002075,0.10000,7.333
|
341 |
+
48,self_attn.o_proj,0.0000000238,0.10000,2.441
|
342 |
+
48,mlp.gate_proj,0.0000004571,0.10000,5.604
|
343 |
+
48,mlp.up_proj,0.0000004734,0.10000,5.628
|
344 |
+
48,mlp.down_proj,0.0000000806,0.10000,20.734
|
345 |
+
49,self_attn.q_proj,0.0000001961,0.10000,9.009
|
346 |
+
49,self_attn.v_proj,0.0000000241,0.10000,9.024
|
347 |
+
49,self_attn.k_proj,0.0000000345,0.10000,9.037
|
348 |
+
49,self_attn.o_proj,0.0000000250,0.10000,2.535
|
349 |
+
49,mlp.gate_proj,0.0000004585,0.10000,5.607
|
350 |
+
49,mlp.up_proj,0.0000004795,0.10000,5.643
|
351 |
+
49,mlp.down_proj,0.0000000855,0.10000,19.500
|
352 |
+
50,self_attn.q_proj,0.0000002443,0.10000,7.122
|
353 |
+
50,self_attn.k_proj,0.0000000414,0.10000,7.148
|
354 |
+
50,self_attn.v_proj,0.0000000279,0.10000,7.153
|
355 |
+
50,self_attn.o_proj,0.0000000327,0.10000,2.518
|
356 |
+
50,mlp.gate_proj,0.0000004506,0.10000,5.431
|
357 |
+
50,mlp.up_proj,0.0000004762,0.10000,5.454
|
358 |
+
50,mlp.down_proj,0.0000000934,0.10000,20.451
|
359 |
+
51,self_attn.q_proj,0.0000002188,0.10000,7.389
|
360 |
+
51,self_attn.v_proj,0.0000000248,0.10000,7.425
|
361 |
+
51,self_attn.k_proj,0.0000000378,0.10000,7.429
|
362 |
+
51,self_attn.o_proj,0.0000000364,0.10000,2.570
|
363 |
+
51,mlp.gate_proj,0.0000004722,0.10000,5.442
|
364 |
+
51,mlp.up_proj,0.0000004974,0.10000,5.478
|
365 |
+
51,mlp.down_proj,0.0000001018,0.10000,19.328
|
366 |
+
52,self_attn.q_proj,0.0000002062,0.10000,7.248
|
367 |
+
52,self_attn.v_proj,0.0000000299,0.10000,7.353
|
368 |
+
52,self_attn.k_proj,0.0000000333,0.10000,7.364
|
369 |
+
52,self_attn.o_proj,0.0000000394,0.10000,2.514
|
370 |
+
52,mlp.gate_proj,0.0000005355,0.10000,5.560
|
371 |
+
52,mlp.up_proj,0.0000005580,0.10000,5.596
|
372 |
+
52,mlp.down_proj,0.0000001059,0.10000,20.357
|
373 |
+
53,self_attn.k_proj,0.0000000378,0.10000,7.768
|
374 |
+
53,self_attn.v_proj,0.0000000327,0.10000,7.781
|
375 |
+
53,self_attn.q_proj,0.0000002302,0.10000,7.784
|
376 |
+
53,self_attn.o_proj,0.0000000428,0.10000,2.584
|
377 |
+
53,mlp.gate_proj,0.0000005775,0.10000,5.513
|
378 |
+
53,mlp.up_proj,0.0000005966,0.10000,5.547
|
379 |
+
53,mlp.down_proj,0.0000001133,0.10000,19.860
|
380 |
+
54,self_attn.q_proj,0.0000002454,0.10000,7.557
|
381 |
+
54,self_attn.v_proj,0.0000000422,0.10000,7.561
|
382 |
+
54,self_attn.k_proj,0.0000000365,0.10000,7.582
|
383 |
+
54,self_attn.o_proj,0.0000000386,0.10000,2.506
|
384 |
+
54,mlp.gate_proj,0.0000005656,0.10000,5.468
|
385 |
+
54,mlp.up_proj,0.0000005878,0.10000,5.499
|
386 |
+
54,mlp.down_proj,0.0000001216,0.10000,20.033
|
387 |
+
55,self_attn.k_proj,0.0000000352,0.10000,7.106
|
388 |
+
55,self_attn.v_proj,0.0000000412,0.10000,7.137
|
389 |
+
55,self_attn.q_proj,0.0000002385,0.10000,7.201
|
390 |
+
55,self_attn.o_proj,0.0000000478,0.10000,2.551
|
391 |
+
55,mlp.gate_proj,0.0000005846,0.10000,5.452
|
392 |
+
55,mlp.up_proj,0.0000006090,0.10000,5.479
|
393 |
+
55,mlp.down_proj,0.0000001418,0.10000,19.931
|
394 |
+
56,self_attn.k_proj,0.0000000359,0.10000,7.452
|
395 |
+
56,self_attn.v_proj,0.0000000356,0.10000,7.456
|
396 |
+
56,self_attn.q_proj,0.0000002264,0.10000,7.468
|
397 |
+
56,self_attn.o_proj,0.0000000601,0.10000,2.559
|
398 |
+
56,mlp.gate_proj,0.0000006445,0.10000,5.604
|
399 |
+
56,mlp.up_proj,0.0000006655,0.10000,5.626
|
400 |
+
56,mlp.down_proj,0.0000001594,0.10000,20.208
|
401 |
+
57,self_attn.k_proj,0.0000000381,0.10000,7.531
|
402 |
+
57,self_attn.v_proj,0.0000000407,0.10000,7.533
|
403 |
+
57,self_attn.q_proj,0.0000002412,0.10000,7.552
|
404 |
+
57,self_attn.o_proj,0.0000000576,0.10000,2.509
|
405 |
+
57,mlp.up_proj,0.0000006661,0.10000,5.426
|
406 |
+
57,mlp.gate_proj,0.0000006429,0.10000,5.484
|
407 |
+
57,mlp.down_proj,0.0000001745,0.10000,19.825
|
408 |
+
58,self_attn.q_proj,0.0000002396,0.10000,7.228
|
409 |
+
58,self_attn.k_proj,0.0000000356,0.10000,7.243
|
410 |
+
58,self_attn.v_proj,0.0000000423,0.10000,7.269
|
411 |
+
58,self_attn.o_proj,0.0000000717,0.10000,2.504
|
412 |
+
58,mlp.up_proj,0.0000006885,0.10000,5.462
|
413 |
+
58,mlp.gate_proj,0.0000006628,0.10000,5.496
|
414 |
+
58,mlp.down_proj,0.0000002187,0.10000,20.706
|
415 |
+
59,self_attn.k_proj,0.0000000358,0.10000,7.316
|
416 |
+
59,self_attn.q_proj,0.0000002358,0.10000,7.319
|
417 |
+
59,self_attn.v_proj,0.0000000438,0.10000,7.367
|
418 |
+
59,self_attn.o_proj,0.0000000830,0.10000,2.489
|
419 |
+
59,mlp.gate_proj,0.0000006991,0.10000,5.441
|
420 |
+
59,mlp.up_proj,0.0000007101,0.10000,5.447
|
421 |
+
59,mlp.down_proj,0.0000002605,0.10000,20.113
|
422 |
+
60,self_attn.k_proj,0.0000000385,0.10000,7.595
|
423 |
+
60,self_attn.v_proj,0.0000000539,0.10000,7.601
|
424 |
+
60,self_attn.q_proj,0.0000002725,0.10000,7.617
|
425 |
+
60,self_attn.o_proj,0.0000000656,0.10000,2.459
|
426 |
+
60,mlp.gate_proj,0.0000008601,0.10000,5.598
|
427 |
+
60,mlp.up_proj,0.0000008547,0.10000,5.599
|
428 |
+
60,mlp.down_proj,0.0000002924,0.10000,19.667
|
429 |
+
61,self_attn.q_proj,0.0000002734,0.10000,7.421
|
430 |
+
61,self_attn.k_proj,0.0000000390,0.10000,7.479
|
431 |
+
61,self_attn.v_proj,0.0000000580,0.10000,7.497
|
432 |
+
61,self_attn.o_proj,0.0000000729,0.10000,2.582
|
433 |
+
61,mlp.up_proj,0.0000009288,0.10000,5.431
|
434 |
+
61,mlp.gate_proj,0.0000009403,0.10000,5.462
|
435 |
+
61,mlp.down_proj,0.0000003304,0.10000,20.248
|
436 |
+
62,self_attn.v_proj,0.0000000552,0.10000,7.353
|
437 |
+
62,self_attn.k_proj,0.0000000405,0.10000,7.411
|
438 |
+
62,self_attn.q_proj,0.0000002804,0.10000,7.417
|
439 |
+
62,self_attn.o_proj,0.0000000758,0.10000,2.597
|
440 |
+
62,mlp.up_proj,0.0000010517,0.10000,5.496
|
441 |
+
62,mlp.gate_proj,0.0000010895,0.10000,5.531
|
442 |
+
62,mlp.down_proj,0.0000003508,0.10000,20.600
|
443 |
+
63,self_attn.v_proj,0.0000000561,0.10000,7.348
|
444 |
+
63,self_attn.q_proj,0.0000002738,0.10000,7.362
|
445 |
+
63,self_attn.k_proj,0.0000000403,0.10000,7.369
|
446 |
+
63,self_attn.o_proj,0.0000000833,0.10000,2.474
|
447 |
+
63,mlp.gate_proj,0.0000011510,0.10000,5.576
|
448 |
+
63,mlp.up_proj,0.0000011206,0.10000,5.612
|
449 |
+
63,mlp.down_proj,0.0000004220,0.10000,19.897
|
450 |
+
64,self_attn.v_proj,0.0000000565,0.10000,7.117
|
451 |
+
64,self_attn.q_proj,0.0000002876,0.10000,7.118
|
452 |
+
64,self_attn.k_proj,0.0000000428,0.10000,7.213
|
453 |
+
64,self_attn.o_proj,0.0000000875,0.10000,2.483
|
454 |
+
64,mlp.up_proj,0.0000012125,0.10000,5.393
|
455 |
+
64,mlp.gate_proj,0.0000012552,0.10000,5.437
|
456 |
+
64,mlp.down_proj,0.0000005112,0.10000,20.388
|
457 |
+
65,self_attn.q_proj,0.0000002937,0.10000,7.266
|
458 |
+
65,self_attn.k_proj,0.0000000437,0.10000,7.331
|
459 |
+
65,self_attn.v_proj,0.0000000609,0.10000,7.387
|
460 |
+
65,self_attn.o_proj,0.0000000966,0.10000,2.618
|
461 |
+
65,mlp.gate_proj,0.0000014031,0.10000,5.631
|
462 |
+
65,mlp.up_proj,0.0000013591,0.10000,5.652
|
463 |
+
65,mlp.down_proj,0.0000005885,0.10000,19.595
|
464 |
+
66,self_attn.q_proj,0.0000002761,0.10000,7.568
|
465 |
+
66,self_attn.k_proj,0.0000000388,0.10000,7.618
|
466 |
+
66,self_attn.v_proj,0.0000000634,0.10000,7.632
|
467 |
+
66,self_attn.o_proj,0.0000000883,0.10000,2.544
|
468 |
+
66,mlp.up_proj,0.0000015823,0.10000,5.292
|
469 |
+
66,mlp.gate_proj,0.0000016563,0.10000,5.335
|
470 |
+
66,mlp.down_proj,0.0000006308,0.10000,19.994
|
471 |
+
67,self_attn.v_proj,0.0000000724,0.10000,7.381
|
472 |
+
67,self_attn.q_proj,0.0000002954,0.10000,7.383
|
473 |
+
67,self_attn.k_proj,0.0000000415,0.10000,7.407
|
474 |
+
67,self_attn.o_proj,0.0000000722,0.10000,2.606
|
475 |
+
67,mlp.up_proj,0.0000016935,0.10000,5.417
|
476 |
+
67,mlp.gate_proj,0.0000017528,0.10000,5.445
|
477 |
+
67,mlp.down_proj,0.0000006867,0.10000,19.713
|
478 |
+
68,self_attn.k_proj,0.0000000429,0.10000,7.401
|
479 |
+
68,self_attn.v_proj,0.0000000829,0.10000,7.416
|
480 |
+
68,self_attn.q_proj,0.0000003136,0.10000,7.420
|
481 |
+
68,self_attn.o_proj,0.0000000874,0.10000,2.638
|
482 |
+
68,mlp.up_proj,0.0000018424,0.10000,5.612
|
483 |
+
68,mlp.gate_proj,0.0000018866,0.10000,5.636
|
484 |
+
68,mlp.down_proj,0.0000007497,0.10000,19.962
|
485 |
+
69,self_attn.v_proj,0.0000000706,0.10000,7.326
|
486 |
+
69,self_attn.k_proj,0.0000000437,0.10000,7.367
|
487 |
+
69,self_attn.q_proj,0.0000003053,0.10000,7.398
|
488 |
+
69,self_attn.o_proj,0.0000001096,0.10000,2.542
|
489 |
+
69,mlp.gate_proj,0.0000020043,0.10000,5.590
|
490 |
+
69,mlp.up_proj,0.0000019949,0.10000,5.654
|
491 |
+
69,mlp.down_proj,0.0000008874,0.10000,19.492
|
492 |
+
70,self_attn.q_proj,0.0000003152,0.10000,7.292
|
493 |
+
70,self_attn.v_proj,0.0000000946,0.10000,7.365
|
494 |
+
70,self_attn.k_proj,0.0000000417,0.10000,7.398
|
495 |
+
70,self_attn.o_proj,0.0000001396,0.10000,2.571
|
496 |
+
70,mlp.gate_proj,0.0000022492,0.10000,5.472
|
497 |
+
70,mlp.up_proj,0.0000022710,0.10000,5.531
|
498 |
+
70,mlp.down_proj,0.0000010752,0.10000,20.280
|
499 |
+
71,self_attn.v_proj,0.0000000978,0.10000,7.116
|
500 |
+
71,self_attn.k_proj,0.0000000436,0.10000,7.148
|
501 |
+
71,self_attn.q_proj,0.0000003238,0.10000,7.152
|
502 |
+
71,self_attn.o_proj,0.0000001525,0.10000,2.581
|
503 |
+
71,mlp.up_proj,0.0000024830,0.10000,5.534
|
504 |
+
71,mlp.gate_proj,0.0000024225,0.10000,5.559
|
505 |
+
71,mlp.down_proj,0.0000012695,0.10000,20.608
|
506 |
+
72,self_attn.q_proj,0.0000003222,0.10000,7.509
|
507 |
+
72,self_attn.v_proj,0.0000001117,0.10000,7.511
|
508 |
+
72,self_attn.k_proj,0.0000000405,0.10000,7.527
|
509 |
+
72,self_attn.o_proj,0.0000001605,0.10000,2.526
|
510 |
+
72,mlp.gate_proj,0.0000026203,0.10000,5.694
|
511 |
+
72,mlp.up_proj,0.0000027220,0.10000,5.720
|
512 |
+
72,mlp.down_proj,0.0000014853,0.10000,19.933
|
513 |
+
73,self_attn.q_proj,0.0000003313,0.10000,7.183
|
514 |
+
73,self_attn.k_proj,0.0000000419,0.10000,7.194
|
515 |
+
73,self_attn.v_proj,0.0000001306,0.10000,7.225
|
516 |
+
73,self_attn.o_proj,0.0000001893,0.10000,2.388
|
517 |
+
73,mlp.gate_proj,0.0000027619,0.10000,5.585
|
518 |
+
73,mlp.up_proj,0.0000029067,0.10000,5.659
|
519 |
+
73,mlp.down_proj,0.0000017333,0.10000,19.561
|
520 |
+
74,self_attn.k_proj,0.0000000418,0.10000,7.129
|
521 |
+
74,self_attn.v_proj,0.0000001241,0.10000,7.183
|
522 |
+
74,self_attn.q_proj,0.0000003531,0.10000,7.196
|
523 |
+
74,self_attn.o_proj,0.0000001913,0.10000,2.548
|
524 |
+
74,mlp.up_proj,0.0000030490,0.10000,5.648
|
525 |
+
74,mlp.gate_proj,0.0000028471,0.10000,5.666
|
526 |
+
74,mlp.down_proj,0.0000020671,0.10000,19.825
|
527 |
+
75,self_attn.v_proj,0.0000001411,0.10000,7.414
|
528 |
+
75,self_attn.k_proj,0.0000000404,0.10000,7.435
|
529 |
+
75,self_attn.q_proj,0.0000003362,0.10000,7.443
|
530 |
+
75,self_attn.o_proj,0.0000002744,0.10000,2.555
|
531 |
+
75,mlp.gate_proj,0.0000030461,0.10000,5.553
|
532 |
+
75,mlp.up_proj,0.0000033062,0.10000,5.553
|
533 |
+
75,mlp.down_proj,0.0000024828,0.10000,19.960
|
534 |
+
76,self_attn.v_proj,0.0000002220,0.10000,6.971
|
535 |
+
76,self_attn.k_proj,0.0000000397,0.10000,7.015
|
536 |
+
76,self_attn.q_proj,0.0000003707,0.10000,7.018
|
537 |
+
76,self_attn.o_proj,0.0000003982,0.10000,2.552
|
538 |
+
76,mlp.gate_proj,0.0000032092,0.10000,5.624
|
539 |
+
76,mlp.up_proj,0.0000035158,0.10000,5.624
|
540 |
+
76,mlp.down_proj,0.0000034027,0.10000,20.258
|
541 |
+
77,self_attn.k_proj,0.0000000374,0.10000,7.514
|
542 |
+
77,self_attn.q_proj,0.0000003344,0.10000,7.524
|
543 |
+
77,self_attn.v_proj,0.0000001427,0.10000,7.594
|
544 |
+
77,self_attn.o_proj,0.0000003980,0.10000,2.495
|
545 |
+
77,mlp.gate_proj,0.0000033776,0.10000,5.525
|
546 |
+
77,mlp.up_proj,0.0000037065,0.10000,5.542
|
547 |
+
77,mlp.down_proj,0.0000043159,0.10000,20.320
|
548 |
+
78,self_attn.v_proj,0.0000001428,0.10000,7.170
|
549 |
+
78,self_attn.k_proj,0.0000000335,0.10000,7.181
|
550 |
+
78,self_attn.q_proj,0.0000003073,0.10000,7.183
|
551 |
+
78,self_attn.o_proj,0.0000005447,0.10000,2.457
|
552 |
+
78,mlp.gate_proj,0.0000030503,0.10000,5.725
|
553 |
+
78,mlp.up_proj,0.0000033010,0.10000,5.750
|
554 |
+
78,mlp.down_proj,0.0000107036,0.10000,19.873
|
555 |
+
79,self_attn.q_proj,0.0000001919,0.10000,7.286
|
556 |
+
79,self_attn.v_proj,0.0000000454,0.10000,7.302
|
557 |
+
79,self_attn.k_proj,0.0000000254,0.10000,7.316
|
558 |
+
79,self_attn.o_proj,0.0000001316,0.10000,2.527
|
559 |
+
79,mlp.gate_proj,0.0000027360,0.10000,5.491
|
560 |
+
79,mlp.up_proj,0.0000028670,0.10000,5.523
|
561 |
+
79,mlp.down_proj,0.0000358965,0.10000,19.992
|