btbtyler09 commited on
Commit
ed6c522
·
verified ·
1 Parent(s): a1673c7

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. added_tokens.json +24 -0
  3. chat_template.jinja +54 -0
  4. config.json +53 -0
  5. generation_config.json +6 -0
  6. merges.txt +0 -0
  7. model-00001-of-00042.safetensors +3 -0
  8. model-00002-of-00042.safetensors +3 -0
  9. model-00003-of-00042.safetensors +3 -0
  10. model-00004-of-00042.safetensors +3 -0
  11. model-00005-of-00042.safetensors +3 -0
  12. model-00006-of-00042.safetensors +3 -0
  13. model-00007-of-00042.safetensors +3 -0
  14. model-00008-of-00042.safetensors +3 -0
  15. model-00009-of-00042.safetensors +3 -0
  16. model-00010-of-00042.safetensors +3 -0
  17. model-00011-of-00042.safetensors +3 -0
  18. model-00012-of-00042.safetensors +3 -0
  19. model-00013-of-00042.safetensors +3 -0
  20. model-00014-of-00042.safetensors +3 -0
  21. model-00015-of-00042.safetensors +3 -0
  22. model-00016-of-00042.safetensors +3 -0
  23. model-00017-of-00042.safetensors +3 -0
  24. model-00018-of-00042.safetensors +3 -0
  25. model-00019-of-00042.safetensors +3 -0
  26. model-00020-of-00042.safetensors +3 -0
  27. model-00021-of-00042.safetensors +3 -0
  28. model-00022-of-00042.safetensors +3 -0
  29. model-00023-of-00042.safetensors +3 -0
  30. model-00024-of-00042.safetensors +3 -0
  31. model-00025-of-00042.safetensors +3 -0
  32. model-00026-of-00042.safetensors +3 -0
  33. model-00027-of-00042.safetensors +3 -0
  34. model-00028-of-00042.safetensors +3 -0
  35. model-00029-of-00042.safetensors +3 -0
  36. model-00030-of-00042.safetensors +3 -0
  37. model-00031-of-00042.safetensors +3 -0
  38. model-00032-of-00042.safetensors +3 -0
  39. model-00033-of-00042.safetensors +3 -0
  40. model-00034-of-00042.safetensors +3 -0
  41. model-00035-of-00042.safetensors +3 -0
  42. model-00036-of-00042.safetensors +3 -0
  43. model-00037-of-00042.safetensors +3 -0
  44. model-00038-of-00042.safetensors +3 -0
  45. model-00039-of-00042.safetensors +3 -0
  46. model-00040-of-00042.safetensors +3 -0
  47. model-00041-of-00042.safetensors +3 -0
  48. model-00042-of-00042.safetensors +3 -0
  49. model.safetensors.index.json +0 -0
  50. quant_log.csv +561 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 8192,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 29568,
12
+ "max_position_embeddings": 131072,
13
+ "max_window_layers": 70,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 64,
16
+ "num_hidden_layers": 80,
17
+ "num_key_value_heads": 8,
18
+ "quantization_config": {
19
+ "bits": 8,
20
+ "checkpoint_format": "gptq",
21
+ "desc_act": false,
22
+ "group_size": 32,
23
+ "lm_head": false,
24
+ "memory_aware_allocation": true,
25
+ "memory_threshold": 0.8,
26
+ "meta": {
27
+ "damp_auto_increment": 0.01,
28
+ "damp_percent": 0.1,
29
+ "mse": 0.0,
30
+ "quantizer": [
31
+ "gptqmodel:4.0.0-dev"
32
+ ],
33
+ "static_groups": false,
34
+ "true_sequential": true,
35
+ "uri": "https://github.com/modelcloud/gptqmodel",
36
+ "v2": false,
37
+ "v2_alpha": 0.25
38
+ },
39
+ "pack_dtype": "int32",
40
+ "quant_method": "gptq",
41
+ "sym": true
42
+ },
43
+ "rms_norm_eps": 1e-06,
44
+ "rope_scaling": null,
45
+ "rope_theta": 1000000.0,
46
+ "sliding_window": 131072,
47
+ "tie_word_embeddings": false,
48
+ "torch_dtype": "float16",
49
+ "transformers_version": "4.52.4",
50
+ "use_cache": true,
51
+ "use_sliding_window": false,
52
+ "vocab_size": 152064
53
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "eos_token_id": 151645,
5
+ "transformers_version": "4.52.4"
6
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86d66e00d30887a66e68f6cd5d72c12fabc2919f9b519b003b3c2ee863f0a0bd
3
+ size 2491416712
model-00002-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d41a9d98ba81271cbcbe3ba7148208c98b5173b7d2776f331466415ab68b363
3
+ size 1994070824
model-00003-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4646c651e6cb4c69acb221b10e998864e67cd6e2b044d192f1511b9ef4e91d1d
3
+ size 1939041640
model-00004-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0029e3728b2dcd3ab060a459ff7774005c7f80674cb245c96bb4ed4d0709c356
3
+ size 1994054352
model-00005-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ef12563193062ae643c486902b4e307d7df1774fab93f57583a51c7d014e212
3
+ size 1920620808
model-00006-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8af313e8070c1ac496e19f9323a0eeeaa6b158917372bd7578d6c6bb9b68aa45
3
+ size 1920620784
model-00007-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3166450ab7fb88c918416e7ead4a76cbc4ae9f5d25609b86846d4a6d83aa138
3
+ size 1920620872
model-00008-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09540dca82465dda17f4fe89ac6af0453f43adab5158286d147d5c79c5e6a7e3
3
+ size 1920620872
model-00009-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f74819f49c0d99da35302cba147c671ee257b26afdf4324122b3ec2618816517
3
+ size 1920620872
model-00010-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78bd5c98ac115fd02bc83953809f0c26ae57ec6587cfd69dcd62bd182dea961d
3
+ size 1920620872
model-00011-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74ae4ff0f19d535a4f621329033aae128704f508d62bddc55a2b36d74103a54a
3
+ size 1920620872
model-00012-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e93737c40202750274e525d5d27e539457b9ed32662ac406b1c43fceca5005f
3
+ size 1920620872
model-00013-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:953d9716da4c162ac3f3a5b7b6cbb329b643c72934000b1fc4f5c57f5d7051c0
3
+ size 1920620872
model-00014-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a6dacd117982a798dba93956e5f80831f8ead33e20fdf29f6dfcb1dfbe4a122
3
+ size 1920620872
model-00015-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:453a14b4beffdcde20e56d3e6b74606938cb978dba4bcfecbe4edc24b9d53b1c
3
+ size 1920620872
model-00016-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c980a966efcbe78404de2d499d13e5c22d1af3cba897ab02ef38c6d2c85fab8
3
+ size 1920620872
model-00017-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ef89002d2bcdde6ed5293f640dbef29924260b9262a0fded474135c698c3de7
3
+ size 1920620872
model-00018-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:931c6409a8c4378ba5b9c17f26ec4b3d87a5bd38ec9a87ef3e73b580265eb23b
3
+ size 1920620872
model-00019-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f304e8326481d9c1d119ef89412a62bfb7747b40ffcd88429a079604db9997e9
3
+ size 1920620872
model-00020-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5e226cca001413b9ccb101a95e65047e54cfb91e76b0e6f38b1db534705575f
3
+ size 1920620872
model-00021-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1967df7eff2a72dc8f8412f458007528358253cf92c0351827e7209f77a6bf65
3
+ size 1920620872
model-00022-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2c6cd98252ed51edf44918ad28f67b3189c7ad8569b7380ecd6d55475dd0b69
3
+ size 1920620872
model-00023-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:964259f0fd06a7ef01cd22de5b99b469629f36f96da2816e5b8bda440f9672ee
3
+ size 1920620872
model-00024-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:878ef405be0a1e0500218b5c64f16f8804dbe3975a1659302023d7cf978675d7
3
+ size 1920620872
model-00025-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6a6d5964fd7097d494a9921100945b3944cefece7a554aab646ab7347d9237a
3
+ size 1920620872
model-00026-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fcc46cb211c0f9b190d1698e17ed52c5016ee8c9793cdc67fd6c1cc1f2353e9
3
+ size 1920620872
model-00027-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:077f9bf54e8ebc1966a790287259acfadac64864d5e3e8d11a53026ca5fef8ae
3
+ size 1920620872
model-00028-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e0ee67ac83c0a0ede699c4ddcf96567ee028e34972cbc99eaa2a873ec24e937
3
+ size 1920620872
model-00029-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d735b3cae0c633dfcdabf4a9cfc6865648fcbf169161d9c1d473d7b5a72e3d9c
3
+ size 1920620872
model-00030-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ab65e1d4c1eada84bc89fb56ebeadefe85beffbaa089443578d2cd5f97e270e
3
+ size 1920620872
model-00031-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d11d168fd1ec349fac385ec65e6eb4f123aef3f275e1b0c748499ee691c6ee54
3
+ size 1920620872
model-00032-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c44860d8520ea5a9156c1fb841a66af4a745de031c6f3b30b37ac7311be342f7
3
+ size 1920620872
model-00033-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e299cb40d4e4d17d08f1878081e4c72c6758d7ed2d087bd1fcf9ea3e248e393
3
+ size 1920620872
model-00034-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83b217a8f2831b986912543de732f8b315c9a2af2bbcdc2a9c87b4651f99291f
3
+ size 1920620872
model-00035-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50dafcb6e6a5d0c048e54505fab6e339e90de469a1aa13ac5e470d0b2e54a90e
3
+ size 1920620872
model-00036-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c99c1a2784b919c25cb1bc469a4c8207903c550f033c6691db365de663bfbbc
3
+ size 1920620872
model-00037-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b5e3b75330e943ea4398252945de1a4c583f3b5ab3363c22bb1241616e71840
3
+ size 1920620872
model-00038-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51bd0c8a7f1e2ed56b1cc0113a12b1f3525f3ce17d022be19e2d47c744e9de60
3
+ size 1920620872
model-00039-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f06679a981bee69785172eb95f1b8679e527322d6fd0bca02df44e160a65358
3
+ size 1920620872
model-00040-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c88e362e2f29d492ee82dfe5f54108b93968fcfbef002e58677fa819974935e
3
+ size 1920620872
model-00041-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3582e7b57110273dd4fd22a4c2b7009a1f98cbbd1770fb0638dcfd599782ad6f
3
+ size 2491416704
model-00042-of-00042.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd9e59cb735ce91dac931a750107d4e4253730544303e795369d91561e36f3f0
3
+ size 1755332904
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
quant_log.csv ADDED
@@ -0,0 +1,561 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ layer,module,loss,samples,damp,time
2
+ 0,self_attn.q_proj,0.0000000006,0.10000,8.433
3
+ 0,self_attn.v_proj,0.0000000000,0.10000,8.523
4
+ 0,self_attn.k_proj,0.0000000001,0.10000,8.525
5
+ 0,self_attn.o_proj,0.0000000000,0.10000,3.546
6
+ 0,mlp.gate_proj,0.0000000003,0.10000,6.809
7
+ 0,mlp.up_proj,0.0000000003,0.10000,6.818
8
+ 0,mlp.down_proj,0.0000000000,0.10000,20.401
9
+ 1,self_attn.k_proj,0.0000000001,0.10000,7.392
10
+ 1,self_attn.q_proj,0.0000000002,0.10000,7.457
11
+ 1,self_attn.v_proj,0.0000000000,0.10000,7.479
12
+ 1,self_attn.o_proj,0.0000000000,0.10000,2.588
13
+ 1,mlp.gate_proj,0.0000000009,0.10000,5.444
14
+ 1,mlp.up_proj,0.0000000009,0.10000,5.464
15
+ 1,mlp.down_proj,0.0000012333,0.10000,19.740
16
+ 2,self_attn.q_proj,0.0000000117,0.10000,7.445
17
+ 2,self_attn.k_proj,0.0000000033,0.10000,7.475
18
+ 2,self_attn.v_proj,0.0000000013,0.10000,7.496
19
+ 2,self_attn.o_proj,0.0000000000,0.10000,2.569
20
+ 2,mlp.up_proj,0.0000000016,0.10000,5.568
21
+ 2,mlp.gate_proj,0.0000000016,0.10000,5.591
22
+ 2,mlp.down_proj,0.0000000000,0.10000,19.740
23
+ 3,self_attn.v_proj,0.0000000012,0.10000,7.504
24
+ 3,self_attn.k_proj,0.0000000026,0.10000,7.513
25
+ 3,self_attn.q_proj,0.0000000084,0.10000,7.554
26
+ 3,self_attn.o_proj,0.0000000000,0.10000,2.598
27
+ 3,mlp.gate_proj,0.0000000029,0.10000,5.530
28
+ 3,mlp.up_proj,0.0000000028,0.10000,5.551
29
+ 3,mlp.down_proj,0.0000000001,0.10000,20.208
30
+ 4,self_attn.k_proj,0.0000000035,0.10000,8.418
31
+ 4,self_attn.q_proj,0.0000000143,0.10000,8.438
32
+ 4,self_attn.v_proj,0.0000000019,0.10000,8.457
33
+ 4,self_attn.o_proj,0.0000000000,0.10000,2.637
34
+ 4,mlp.gate_proj,0.0000000054,0.10000,5.674
35
+ 4,mlp.up_proj,0.0000000053,0.10000,5.700
36
+ 4,mlp.down_proj,0.0000000001,0.10000,21.098
37
+ 5,self_attn.q_proj,0.0000000177,0.10000,7.396
38
+ 5,self_attn.v_proj,0.0000000026,0.10000,7.403
39
+ 5,self_attn.k_proj,0.0000000044,0.10000,7.450
40
+ 5,self_attn.o_proj,0.0000000000,0.10000,2.574
41
+ 5,mlp.gate_proj,0.0000000081,0.10000,5.457
42
+ 5,mlp.up_proj,0.0000000078,0.10000,5.484
43
+ 5,mlp.down_proj,0.0000000002,0.10000,21.327
44
+ 6,self_attn.k_proj,0.0000000031,0.10000,7.467
45
+ 6,self_attn.q_proj,0.0000000118,0.10000,7.508
46
+ 6,self_attn.v_proj,0.0000000018,0.10000,7.533
47
+ 6,self_attn.o_proj,0.0000000000,0.10000,2.456
48
+ 6,mlp.up_proj,0.0000000095,0.10000,5.399
49
+ 6,mlp.gate_proj,0.0000000099,0.10000,5.421
50
+ 6,mlp.down_proj,0.0000000002,0.10000,21.591
51
+ 7,self_attn.v_proj,0.0000000023,0.10000,7.400
52
+ 7,self_attn.k_proj,0.0000000043,0.10000,7.482
53
+ 7,self_attn.q_proj,0.0000000171,0.10000,7.488
54
+ 7,self_attn.o_proj,0.0000000000,0.10000,2.625
55
+ 7,mlp.up_proj,0.0000000120,0.10000,5.333
56
+ 7,mlp.gate_proj,0.0000000123,0.10000,5.361
57
+ 7,mlp.down_proj,0.0000000003,0.10000,22.012
58
+ 8,self_attn.v_proj,0.0000000024,0.10000,7.140
59
+ 8,self_attn.k_proj,0.0000000041,0.10000,7.208
60
+ 8,self_attn.q_proj,0.0000000155,0.10000,7.214
61
+ 8,self_attn.o_proj,0.0000000001,0.10000,2.608
62
+ 8,mlp.gate_proj,0.0000000149,0.10000,5.446
63
+ 8,mlp.up_proj,0.0000000142,0.10000,5.502
64
+ 8,mlp.down_proj,0.0000000004,0.10000,22.092
65
+ 9,self_attn.k_proj,0.0000000110,0.10000,7.281
66
+ 9,self_attn.v_proj,0.0000000069,0.10000,7.285
67
+ 9,self_attn.q_proj,0.0000000496,0.10000,7.300
68
+ 9,self_attn.o_proj,0.0000000002,0.10000,2.397
69
+ 9,mlp.up_proj,0.0000000160,0.10000,5.529
70
+ 9,mlp.gate_proj,0.0000000166,0.10000,5.561
71
+ 9,mlp.down_proj,0.0000000004,0.10000,22.118
72
+ 10,self_attn.k_proj,0.0000000071,0.10000,7.222
73
+ 10,self_attn.q_proj,0.0000000296,0.10000,7.225
74
+ 10,self_attn.v_proj,0.0000000038,0.10000,7.233
75
+ 10,self_attn.o_proj,0.0000000002,0.10000,2.642
76
+ 10,mlp.gate_proj,0.0000000207,0.10000,5.568
77
+ 10,mlp.up_proj,0.0000000198,0.10000,5.604
78
+ 10,mlp.down_proj,0.0000000006,0.10000,22.287
79
+ 11,self_attn.q_proj,0.0000000349,0.10000,7.111
80
+ 11,self_attn.k_proj,0.0000000080,0.10000,7.175
81
+ 11,self_attn.v_proj,0.0000000038,0.10000,7.201
82
+ 11,self_attn.o_proj,0.0000000003,0.10000,2.594
83
+ 11,mlp.gate_proj,0.0000000242,0.10000,5.426
84
+ 11,mlp.up_proj,0.0000000231,0.10000,5.453
85
+ 11,mlp.down_proj,0.0000000008,0.10000,21.926
86
+ 12,self_attn.v_proj,0.0000000057,0.10000,7.172
87
+ 12,self_attn.q_proj,0.0000000486,0.10000,7.217
88
+ 12,self_attn.k_proj,0.0000000105,0.10000,7.263
89
+ 12,self_attn.o_proj,0.0000000004,0.10000,2.451
90
+ 12,mlp.up_proj,0.0000000264,0.10000,5.450
91
+ 12,mlp.gate_proj,0.0000000275,0.10000,5.468
92
+ 12,mlp.down_proj,0.0000000010,0.10000,21.748
93
+ 13,self_attn.v_proj,0.0000000055,0.10000,7.622
94
+ 13,self_attn.q_proj,0.0000000456,0.10000,7.642
95
+ 13,self_attn.k_proj,0.0000000100,0.10000,7.669
96
+ 13,self_attn.o_proj,0.0000000004,0.10000,2.690
97
+ 13,mlp.gate_proj,0.0000000321,0.10000,5.361
98
+ 13,mlp.up_proj,0.0000000307,0.10000,5.397
99
+ 13,mlp.down_proj,0.0000000013,0.10000,21.820
100
+ 14,self_attn.q_proj,0.0000000447,0.10000,7.525
101
+ 14,self_attn.k_proj,0.0000000098,0.10000,7.554
102
+ 14,self_attn.v_proj,0.0000000054,0.10000,7.583
103
+ 14,self_attn.o_proj,0.0000000006,0.10000,2.511
104
+ 14,mlp.up_proj,0.0000000323,0.10000,5.552
105
+ 14,mlp.gate_proj,0.0000000337,0.10000,5.571
106
+ 14,mlp.down_proj,0.0000000015,0.10000,22.382
107
+ 15,self_attn.v_proj,0.0000000066,0.10000,7.419
108
+ 15,self_attn.q_proj,0.0000000590,0.10000,7.450
109
+ 15,self_attn.k_proj,0.0000000120,0.10000,7.475
110
+ 15,self_attn.o_proj,0.0000000005,0.10000,2.598
111
+ 15,mlp.gate_proj,0.0000000427,0.10000,5.365
112
+ 15,mlp.up_proj,0.0000000408,0.10000,5.392
113
+ 15,mlp.down_proj,0.0000000019,0.10000,22.294
114
+ 16,self_attn.k_proj,0.0000000112,0.10000,7.328
115
+ 16,self_attn.q_proj,0.0000000529,0.10000,7.336
116
+ 16,self_attn.v_proj,0.0000000058,0.10000,7.372
117
+ 16,self_attn.o_proj,0.0000000005,0.10000,2.501
118
+ 16,mlp.gate_proj,0.0000000467,0.10000,5.539
119
+ 16,mlp.up_proj,0.0000000447,0.10000,5.571
120
+ 16,mlp.down_proj,0.0000000022,0.10000,22.187
121
+ 17,self_attn.v_proj,0.0000000057,0.10000,7.075
122
+ 17,self_attn.k_proj,0.0000000095,0.10000,7.155
123
+ 17,self_attn.q_proj,0.0000000433,0.10000,7.172
124
+ 17,self_attn.o_proj,0.0000000008,0.10000,2.519
125
+ 17,mlp.up_proj,0.0000000434,0.10000,5.482
126
+ 17,mlp.gate_proj,0.0000000451,0.10000,5.545
127
+ 17,mlp.down_proj,0.0000000024,0.10000,22.221
128
+ 18,self_attn.k_proj,0.0000000175,0.10000,7.383
129
+ 18,self_attn.q_proj,0.0000000805,0.10000,7.388
130
+ 18,self_attn.v_proj,0.0000000099,0.10000,7.409
131
+ 18,self_attn.o_proj,0.0000000006,0.10000,2.573
132
+ 18,mlp.up_proj,0.0000000523,0.10000,5.492
133
+ 18,mlp.gate_proj,0.0000000546,0.10000,5.510
134
+ 18,mlp.down_proj,0.0000000029,0.10000,22.175
135
+ 19,self_attn.v_proj,0.0000000140,0.10000,7.286
136
+ 19,self_attn.q_proj,0.0000001108,0.10000,7.319
137
+ 19,self_attn.k_proj,0.0000000230,0.10000,7.330
138
+ 19,self_attn.o_proj,0.0000000007,0.10000,2.389
139
+ 19,mlp.gate_proj,0.0000000634,0.10000,5.577
140
+ 19,mlp.up_proj,0.0000000607,0.10000,5.618
141
+ 19,mlp.down_proj,0.0000000040,0.10000,22.311
142
+ 20,self_attn.v_proj,0.0000000056,0.10000,7.417
143
+ 20,self_attn.q_proj,0.0000000457,0.10000,7.440
144
+ 20,self_attn.k_proj,0.0000000093,0.10000,7.454
145
+ 20,self_attn.o_proj,0.0000000010,0.10000,2.418
146
+ 20,mlp.up_proj,0.0000000728,0.10000,5.482
147
+ 20,mlp.gate_proj,0.0000000765,0.10000,5.566
148
+ 20,mlp.down_proj,0.0000000054,0.10000,21.961
149
+ 21,self_attn.q_proj,0.0000000387,0.10000,7.487
150
+ 21,self_attn.v_proj,0.0000000051,0.10000,7.562
151
+ 21,self_attn.k_proj,0.0000000078,0.10000,7.574
152
+ 21,self_attn.o_proj,0.0000000017,0.10000,2.428
153
+ 21,mlp.gate_proj,0.0000000693,0.10000,5.477
154
+ 21,mlp.up_proj,0.0000000665,0.10000,5.515
155
+ 21,mlp.down_proj,0.0000000053,0.10000,22.161
156
+ 22,self_attn.k_proj,0.0000000189,0.10000,7.254
157
+ 22,self_attn.q_proj,0.0000001047,0.10000,7.257
158
+ 22,self_attn.v_proj,0.0000000119,0.10000,7.286
159
+ 22,self_attn.o_proj,0.0000000023,0.10000,2.451
160
+ 22,mlp.gate_proj,0.0000000953,0.10000,5.428
161
+ 22,mlp.up_proj,0.0000000915,0.10000,5.452
162
+ 22,mlp.down_proj,0.0000000101,0.10000,22.137
163
+ 23,self_attn.k_proj,0.0000000156,0.10000,7.354
164
+ 23,self_attn.q_proj,0.0000000896,0.10000,7.405
165
+ 23,self_attn.v_proj,0.0000000106,0.10000,7.414
166
+ 23,self_attn.o_proj,0.0000000028,0.10000,2.547
167
+ 23,mlp.gate_proj,0.0000001081,0.10000,5.480
168
+ 23,mlp.up_proj,0.0000001026,0.10000,5.505
169
+ 23,mlp.down_proj,0.0000000106,0.10000,22.164
170
+ 24,self_attn.v_proj,0.0000000111,0.10000,7.370
171
+ 24,self_attn.k_proj,0.0000000138,0.10000,7.429
172
+ 24,self_attn.q_proj,0.0000000774,0.10000,7.461
173
+ 24,self_attn.o_proj,0.0000000028,0.10000,2.425
174
+ 24,mlp.gate_proj,0.0000001285,0.10000,5.526
175
+ 24,mlp.up_proj,0.0000001210,0.10000,5.561
176
+ 24,mlp.down_proj,0.0000000125,0.10000,22.022
177
+ 25,self_attn.q_proj,0.0000001378,0.10000,7.288
178
+ 25,self_attn.k_proj,0.0000000241,0.10000,7.362
179
+ 25,self_attn.v_proj,0.0000000155,0.10000,7.407
180
+ 25,self_attn.o_proj,0.0000000029,0.10000,2.509
181
+ 25,mlp.gate_proj,0.0000001537,0.10000,5.490
182
+ 25,mlp.up_proj,0.0000001457,0.10000,5.528
183
+ 25,mlp.down_proj,0.0000000145,0.10000,22.637
184
+ 26,self_attn.v_proj,0.0000000118,0.10000,7.436
185
+ 26,self_attn.q_proj,0.0000000986,0.10000,7.472
186
+ 26,self_attn.k_proj,0.0000000178,0.10000,7.533
187
+ 26,self_attn.o_proj,0.0000000041,0.10000,2.595
188
+ 26,mlp.up_proj,0.0000001662,0.10000,5.439
189
+ 26,mlp.gate_proj,0.0000001746,0.10000,5.441
190
+ 26,mlp.down_proj,0.0000000153,0.10000,22.215
191
+ 27,self_attn.v_proj,0.0000000124,0.10000,7.330
192
+ 27,self_attn.q_proj,0.0000000959,0.10000,7.344
193
+ 27,self_attn.k_proj,0.0000000178,0.10000,7.351
194
+ 27,self_attn.o_proj,0.0000000039,0.10000,2.502
195
+ 27,mlp.gate_proj,0.0000001913,0.10000,5.305
196
+ 27,mlp.up_proj,0.0000001828,0.10000,5.325
197
+ 27,mlp.down_proj,0.0000000164,0.10000,22.087
198
+ 28,self_attn.q_proj,0.0000000973,0.10000,7.503
199
+ 28,self_attn.k_proj,0.0000000174,0.10000,7.519
200
+ 28,self_attn.v_proj,0.0000000132,0.10000,7.531
201
+ 28,self_attn.o_proj,0.0000000028,0.10000,2.589
202
+ 28,mlp.gate_proj,0.0000002029,0.10000,5.543
203
+ 28,mlp.up_proj,0.0000001947,0.10000,5.569
204
+ 28,mlp.down_proj,0.0000000182,0.10000,22.324
205
+ 29,self_attn.q_proj,0.0000001183,0.10000,7.513
206
+ 29,self_attn.k_proj,0.0000000219,0.10000,7.569
207
+ 29,self_attn.v_proj,0.0000000144,0.10000,7.600
208
+ 29,self_attn.o_proj,0.0000000052,0.10000,2.698
209
+ 29,mlp.gate_proj,0.0000002155,0.10000,5.447
210
+ 29,mlp.up_proj,0.0000002066,0.10000,5.480
211
+ 29,mlp.down_proj,0.0000000202,0.10000,21.811
212
+ 30,self_attn.q_proj,0.0000001278,0.10000,7.383
213
+ 30,self_attn.v_proj,0.0000000172,0.10000,7.454
214
+ 30,self_attn.k_proj,0.0000000221,0.10000,7.514
215
+ 30,self_attn.o_proj,0.0000000052,0.10000,2.480
216
+ 30,mlp.gate_proj,0.0000002370,0.10000,5.544
217
+ 30,mlp.up_proj,0.0000002246,0.10000,5.586
218
+ 30,mlp.down_proj,0.0000000225,0.10000,21.930
219
+ 31,self_attn.k_proj,0.0000000243,0.10000,7.175
220
+ 31,self_attn.v_proj,0.0000000185,0.10000,7.233
221
+ 31,self_attn.q_proj,0.0000001357,0.10000,7.270
222
+ 31,self_attn.o_proj,0.0000000032,0.10000,2.675
223
+ 31,mlp.gate_proj,0.0000002590,0.10000,5.433
224
+ 31,mlp.up_proj,0.0000002450,0.10000,5.452
225
+ 31,mlp.down_proj,0.0000000241,0.10000,21.739
226
+ 32,self_attn.q_proj,0.0000001497,0.10000,7.253
227
+ 32,self_attn.v_proj,0.0000000206,0.10000,7.273
228
+ 32,self_attn.k_proj,0.0000000269,0.10000,7.295
229
+ 32,self_attn.o_proj,0.0000000036,0.10000,2.636
230
+ 32,mlp.up_proj,0.0000002581,0.10000,5.534
231
+ 32,mlp.gate_proj,0.0000002723,0.10000,5.551
232
+ 32,mlp.down_proj,0.0000000252,0.10000,21.909
233
+ 33,self_attn.k_proj,0.0000000253,0.10000,7.358
234
+ 33,self_attn.q_proj,0.0000001362,0.10000,7.369
235
+ 33,self_attn.v_proj,0.0000000229,0.10000,7.379
236
+ 33,self_attn.o_proj,0.0000000038,0.10000,2.527
237
+ 33,mlp.gate_proj,0.0000002941,0.10000,5.440
238
+ 33,mlp.up_proj,0.0000002748,0.10000,5.469
239
+ 33,mlp.down_proj,0.0000000269,0.10000,22.038
240
+ 34,self_attn.k_proj,0.0000000286,0.10000,7.464
241
+ 34,self_attn.v_proj,0.0000000262,0.10000,7.484
242
+ 34,self_attn.q_proj,0.0000001592,0.10000,7.494
243
+ 34,self_attn.o_proj,0.0000000032,0.10000,2.582
244
+ 34,mlp.up_proj,0.0000002879,0.10000,5.538
245
+ 34,mlp.gate_proj,0.0000003089,0.10000,5.562
246
+ 34,mlp.down_proj,0.0000000290,0.10000,21.733
247
+ 35,self_attn.k_proj,0.0000000311,0.10000,7.366
248
+ 35,self_attn.v_proj,0.0000000291,0.10000,7.377
249
+ 35,self_attn.q_proj,0.0000001728,0.10000,7.379
250
+ 35,self_attn.o_proj,0.0000000032,0.10000,2.550
251
+ 35,mlp.gate_proj,0.0000003185,0.10000,5.484
252
+ 35,mlp.up_proj,0.0000002989,0.10000,5.508
253
+ 35,mlp.down_proj,0.0000000307,0.10000,21.711
254
+ 36,self_attn.v_proj,0.0000000310,0.10000,7.721
255
+ 36,self_attn.q_proj,0.0000001821,0.10000,7.742
256
+ 36,self_attn.k_proj,0.0000000325,0.10000,7.751
257
+ 36,self_attn.o_proj,0.0000000034,0.10000,2.624
258
+ 36,mlp.gate_proj,0.0000003245,0.10000,5.345
259
+ 36,mlp.up_proj,0.0000003066,0.10000,5.368
260
+ 36,mlp.down_proj,0.0000000304,0.10000,21.899
261
+ 37,self_attn.k_proj,0.0000000316,0.10000,6.939
262
+ 37,self_attn.v_proj,0.0000000282,0.10000,6.953
263
+ 37,self_attn.q_proj,0.0000001720,0.10000,6.976
264
+ 37,self_attn.o_proj,0.0000000037,0.10000,2.519
265
+ 37,mlp.up_proj,0.0000003163,0.10000,5.371
266
+ 37,mlp.gate_proj,0.0000003322,0.10000,5.397
267
+ 37,mlp.down_proj,0.0000000318,0.10000,19.887
268
+ 38,self_attn.k_proj,0.0000000312,0.10000,7.383
269
+ 38,self_attn.q_proj,0.0000001654,0.10000,7.412
270
+ 38,self_attn.v_proj,0.0000000256,0.10000,7.441
271
+ 38,self_attn.o_proj,0.0000000050,0.10000,2.496
272
+ 38,mlp.up_proj,0.0000003290,0.10000,5.493
273
+ 38,mlp.gate_proj,0.0000003453,0.10000,5.544
274
+ 38,mlp.down_proj,0.0000000338,0.10000,20.116
275
+ 39,self_attn.k_proj,0.0000000301,0.10000,7.216
276
+ 39,self_attn.q_proj,0.0000001588,0.10000,7.269
277
+ 39,self_attn.v_proj,0.0000000260,0.10000,7.290
278
+ 39,self_attn.o_proj,0.0000000050,0.10000,2.631
279
+ 39,mlp.gate_proj,0.0000003537,0.10000,5.568
280
+ 39,mlp.up_proj,0.0000003390,0.10000,5.618
281
+ 39,mlp.down_proj,0.0000000358,0.10000,19.983
282
+ 40,self_attn.k_proj,0.0000000301,0.10000,7.297
283
+ 40,self_attn.q_proj,0.0000001559,0.10000,7.342
284
+ 40,self_attn.v_proj,0.0000000208,0.10000,7.355
285
+ 40,self_attn.o_proj,0.0000000073,0.10000,2.527
286
+ 40,mlp.up_proj,0.0000003505,0.10000,5.421
287
+ 40,mlp.gate_proj,0.0000003630,0.10000,5.451
288
+ 40,mlp.down_proj,0.0000000389,0.10000,19.609
289
+ 41,self_attn.q_proj,0.0000001367,0.10000,7.205
290
+ 41,self_attn.k_proj,0.0000000267,0.10000,7.287
291
+ 41,self_attn.v_proj,0.0000000197,0.10000,7.313
292
+ 41,self_attn.o_proj,0.0000000084,0.10000,2.542
293
+ 41,mlp.gate_proj,0.0000003830,0.10000,5.387
294
+ 41,mlp.up_proj,0.0000003693,0.10000,5.388
295
+ 41,mlp.down_proj,0.0000000416,0.10000,20.146
296
+ 42,self_attn.q_proj,0.0000001816,0.10000,7.226
297
+ 42,self_attn.v_proj,0.0000000258,0.10000,7.241
298
+ 42,self_attn.k_proj,0.0000000331,0.10000,7.257
299
+ 42,self_attn.o_proj,0.0000000066,0.10000,2.523
300
+ 42,mlp.gate_proj,0.0000003972,0.10000,5.597
301
+ 42,mlp.up_proj,0.0000003861,0.10000,5.616
302
+ 42,mlp.down_proj,0.0000000464,0.10000,19.683
303
+ 43,self_attn.q_proj,0.0000001734,0.10000,7.062
304
+ 43,self_attn.k_proj,0.0000000328,0.10000,7.078
305
+ 43,self_attn.v_proj,0.0000000228,0.10000,7.086
306
+ 43,self_attn.o_proj,0.0000000094,0.10000,2.467
307
+ 43,mlp.gate_proj,0.0000004086,0.10000,5.548
308
+ 43,mlp.up_proj,0.0000004008,0.10000,5.576
309
+ 43,mlp.down_proj,0.0000000508,0.10000,19.758
310
+ 44,self_attn.k_proj,0.0000000311,0.10000,7.145
311
+ 44,self_attn.q_proj,0.0000001729,0.10000,7.148
312
+ 44,self_attn.v_proj,0.0000000210,0.10000,7.261
313
+ 44,self_attn.o_proj,0.0000000130,0.10000,2.521
314
+ 44,mlp.up_proj,0.0000004158,0.10000,5.542
315
+ 44,mlp.gate_proj,0.0000004165,0.10000,5.566
316
+ 44,mlp.down_proj,0.0000000610,0.10000,19.854
317
+ 45,self_attn.v_proj,0.0000000191,0.10000,7.172
318
+ 45,self_attn.q_proj,0.0000001847,0.10000,7.179
319
+ 45,self_attn.k_proj,0.0000000328,0.10000,7.192
320
+ 45,self_attn.o_proj,0.0000000170,0.10000,2.558
321
+ 45,mlp.up_proj,0.0000004313,0.10000,5.618
322
+ 45,mlp.gate_proj,0.0000004280,0.10000,5.659
323
+ 45,mlp.down_proj,0.0000000999,0.10000,19.934
324
+ 46,self_attn.k_proj,0.0000000364,0.10000,7.268
325
+ 46,self_attn.v_proj,0.0000000217,0.10000,7.296
326
+ 46,self_attn.q_proj,0.0000001988,0.10000,7.316
327
+ 46,self_attn.o_proj,0.0000000236,0.10000,2.494
328
+ 46,mlp.gate_proj,0.0000004367,0.10000,5.585
329
+ 46,mlp.up_proj,0.0000004444,0.10000,5.597
330
+ 46,mlp.down_proj,0.0000000659,0.10000,20.187
331
+ 47,self_attn.q_proj,0.0000002005,0.10000,7.424
332
+ 47,self_attn.k_proj,0.0000000355,0.10000,7.435
333
+ 47,self_attn.v_proj,0.0000000224,0.10000,7.448
334
+ 47,self_attn.o_proj,0.0000000246,0.10000,2.648
335
+ 47,mlp.gate_proj,0.0000004369,0.10000,5.538
336
+ 47,mlp.up_proj,0.0000004500,0.10000,5.584
337
+ 47,mlp.down_proj,0.0000000736,0.10000,20.966
338
+ 48,self_attn.v_proj,0.0000000253,0.10000,7.299
339
+ 48,self_attn.k_proj,0.0000000347,0.10000,7.327
340
+ 48,self_attn.q_proj,0.0000002075,0.10000,7.333
341
+ 48,self_attn.o_proj,0.0000000238,0.10000,2.441
342
+ 48,mlp.gate_proj,0.0000004571,0.10000,5.604
343
+ 48,mlp.up_proj,0.0000004734,0.10000,5.628
344
+ 48,mlp.down_proj,0.0000000806,0.10000,20.734
345
+ 49,self_attn.q_proj,0.0000001961,0.10000,9.009
346
+ 49,self_attn.v_proj,0.0000000241,0.10000,9.024
347
+ 49,self_attn.k_proj,0.0000000345,0.10000,9.037
348
+ 49,self_attn.o_proj,0.0000000250,0.10000,2.535
349
+ 49,mlp.gate_proj,0.0000004585,0.10000,5.607
350
+ 49,mlp.up_proj,0.0000004795,0.10000,5.643
351
+ 49,mlp.down_proj,0.0000000855,0.10000,19.500
352
+ 50,self_attn.q_proj,0.0000002443,0.10000,7.122
353
+ 50,self_attn.k_proj,0.0000000414,0.10000,7.148
354
+ 50,self_attn.v_proj,0.0000000279,0.10000,7.153
355
+ 50,self_attn.o_proj,0.0000000327,0.10000,2.518
356
+ 50,mlp.gate_proj,0.0000004506,0.10000,5.431
357
+ 50,mlp.up_proj,0.0000004762,0.10000,5.454
358
+ 50,mlp.down_proj,0.0000000934,0.10000,20.451
359
+ 51,self_attn.q_proj,0.0000002188,0.10000,7.389
360
+ 51,self_attn.v_proj,0.0000000248,0.10000,7.425
361
+ 51,self_attn.k_proj,0.0000000378,0.10000,7.429
362
+ 51,self_attn.o_proj,0.0000000364,0.10000,2.570
363
+ 51,mlp.gate_proj,0.0000004722,0.10000,5.442
364
+ 51,mlp.up_proj,0.0000004974,0.10000,5.478
365
+ 51,mlp.down_proj,0.0000001018,0.10000,19.328
366
+ 52,self_attn.q_proj,0.0000002062,0.10000,7.248
367
+ 52,self_attn.v_proj,0.0000000299,0.10000,7.353
368
+ 52,self_attn.k_proj,0.0000000333,0.10000,7.364
369
+ 52,self_attn.o_proj,0.0000000394,0.10000,2.514
370
+ 52,mlp.gate_proj,0.0000005355,0.10000,5.560
371
+ 52,mlp.up_proj,0.0000005580,0.10000,5.596
372
+ 52,mlp.down_proj,0.0000001059,0.10000,20.357
373
+ 53,self_attn.k_proj,0.0000000378,0.10000,7.768
374
+ 53,self_attn.v_proj,0.0000000327,0.10000,7.781
375
+ 53,self_attn.q_proj,0.0000002302,0.10000,7.784
376
+ 53,self_attn.o_proj,0.0000000428,0.10000,2.584
377
+ 53,mlp.gate_proj,0.0000005775,0.10000,5.513
378
+ 53,mlp.up_proj,0.0000005966,0.10000,5.547
379
+ 53,mlp.down_proj,0.0000001133,0.10000,19.860
380
+ 54,self_attn.q_proj,0.0000002454,0.10000,7.557
381
+ 54,self_attn.v_proj,0.0000000422,0.10000,7.561
382
+ 54,self_attn.k_proj,0.0000000365,0.10000,7.582
383
+ 54,self_attn.o_proj,0.0000000386,0.10000,2.506
384
+ 54,mlp.gate_proj,0.0000005656,0.10000,5.468
385
+ 54,mlp.up_proj,0.0000005878,0.10000,5.499
386
+ 54,mlp.down_proj,0.0000001216,0.10000,20.033
387
+ 55,self_attn.k_proj,0.0000000352,0.10000,7.106
388
+ 55,self_attn.v_proj,0.0000000412,0.10000,7.137
389
+ 55,self_attn.q_proj,0.0000002385,0.10000,7.201
390
+ 55,self_attn.o_proj,0.0000000478,0.10000,2.551
391
+ 55,mlp.gate_proj,0.0000005846,0.10000,5.452
392
+ 55,mlp.up_proj,0.0000006090,0.10000,5.479
393
+ 55,mlp.down_proj,0.0000001418,0.10000,19.931
394
+ 56,self_attn.k_proj,0.0000000359,0.10000,7.452
395
+ 56,self_attn.v_proj,0.0000000356,0.10000,7.456
396
+ 56,self_attn.q_proj,0.0000002264,0.10000,7.468
397
+ 56,self_attn.o_proj,0.0000000601,0.10000,2.559
398
+ 56,mlp.gate_proj,0.0000006445,0.10000,5.604
399
+ 56,mlp.up_proj,0.0000006655,0.10000,5.626
400
+ 56,mlp.down_proj,0.0000001594,0.10000,20.208
401
+ 57,self_attn.k_proj,0.0000000381,0.10000,7.531
402
+ 57,self_attn.v_proj,0.0000000407,0.10000,7.533
403
+ 57,self_attn.q_proj,0.0000002412,0.10000,7.552
404
+ 57,self_attn.o_proj,0.0000000576,0.10000,2.509
405
+ 57,mlp.up_proj,0.0000006661,0.10000,5.426
406
+ 57,mlp.gate_proj,0.0000006429,0.10000,5.484
407
+ 57,mlp.down_proj,0.0000001745,0.10000,19.825
408
+ 58,self_attn.q_proj,0.0000002396,0.10000,7.228
409
+ 58,self_attn.k_proj,0.0000000356,0.10000,7.243
410
+ 58,self_attn.v_proj,0.0000000423,0.10000,7.269
411
+ 58,self_attn.o_proj,0.0000000717,0.10000,2.504
412
+ 58,mlp.up_proj,0.0000006885,0.10000,5.462
413
+ 58,mlp.gate_proj,0.0000006628,0.10000,5.496
414
+ 58,mlp.down_proj,0.0000002187,0.10000,20.706
415
+ 59,self_attn.k_proj,0.0000000358,0.10000,7.316
416
+ 59,self_attn.q_proj,0.0000002358,0.10000,7.319
417
+ 59,self_attn.v_proj,0.0000000438,0.10000,7.367
418
+ 59,self_attn.o_proj,0.0000000830,0.10000,2.489
419
+ 59,mlp.gate_proj,0.0000006991,0.10000,5.441
420
+ 59,mlp.up_proj,0.0000007101,0.10000,5.447
421
+ 59,mlp.down_proj,0.0000002605,0.10000,20.113
422
+ 60,self_attn.k_proj,0.0000000385,0.10000,7.595
423
+ 60,self_attn.v_proj,0.0000000539,0.10000,7.601
424
+ 60,self_attn.q_proj,0.0000002725,0.10000,7.617
425
+ 60,self_attn.o_proj,0.0000000656,0.10000,2.459
426
+ 60,mlp.gate_proj,0.0000008601,0.10000,5.598
427
+ 60,mlp.up_proj,0.0000008547,0.10000,5.599
428
+ 60,mlp.down_proj,0.0000002924,0.10000,19.667
429
+ 61,self_attn.q_proj,0.0000002734,0.10000,7.421
430
+ 61,self_attn.k_proj,0.0000000390,0.10000,7.479
431
+ 61,self_attn.v_proj,0.0000000580,0.10000,7.497
432
+ 61,self_attn.o_proj,0.0000000729,0.10000,2.582
433
+ 61,mlp.up_proj,0.0000009288,0.10000,5.431
434
+ 61,mlp.gate_proj,0.0000009403,0.10000,5.462
435
+ 61,mlp.down_proj,0.0000003304,0.10000,20.248
436
+ 62,self_attn.v_proj,0.0000000552,0.10000,7.353
437
+ 62,self_attn.k_proj,0.0000000405,0.10000,7.411
438
+ 62,self_attn.q_proj,0.0000002804,0.10000,7.417
439
+ 62,self_attn.o_proj,0.0000000758,0.10000,2.597
440
+ 62,mlp.up_proj,0.0000010517,0.10000,5.496
441
+ 62,mlp.gate_proj,0.0000010895,0.10000,5.531
442
+ 62,mlp.down_proj,0.0000003508,0.10000,20.600
443
+ 63,self_attn.v_proj,0.0000000561,0.10000,7.348
444
+ 63,self_attn.q_proj,0.0000002738,0.10000,7.362
445
+ 63,self_attn.k_proj,0.0000000403,0.10000,7.369
446
+ 63,self_attn.o_proj,0.0000000833,0.10000,2.474
447
+ 63,mlp.gate_proj,0.0000011510,0.10000,5.576
448
+ 63,mlp.up_proj,0.0000011206,0.10000,5.612
449
+ 63,mlp.down_proj,0.0000004220,0.10000,19.897
450
+ 64,self_attn.v_proj,0.0000000565,0.10000,7.117
451
+ 64,self_attn.q_proj,0.0000002876,0.10000,7.118
452
+ 64,self_attn.k_proj,0.0000000428,0.10000,7.213
453
+ 64,self_attn.o_proj,0.0000000875,0.10000,2.483
454
+ 64,mlp.up_proj,0.0000012125,0.10000,5.393
455
+ 64,mlp.gate_proj,0.0000012552,0.10000,5.437
456
+ 64,mlp.down_proj,0.0000005112,0.10000,20.388
457
+ 65,self_attn.q_proj,0.0000002937,0.10000,7.266
458
+ 65,self_attn.k_proj,0.0000000437,0.10000,7.331
459
+ 65,self_attn.v_proj,0.0000000609,0.10000,7.387
460
+ 65,self_attn.o_proj,0.0000000966,0.10000,2.618
461
+ 65,mlp.gate_proj,0.0000014031,0.10000,5.631
462
+ 65,mlp.up_proj,0.0000013591,0.10000,5.652
463
+ 65,mlp.down_proj,0.0000005885,0.10000,19.595
464
+ 66,self_attn.q_proj,0.0000002761,0.10000,7.568
465
+ 66,self_attn.k_proj,0.0000000388,0.10000,7.618
466
+ 66,self_attn.v_proj,0.0000000634,0.10000,7.632
467
+ 66,self_attn.o_proj,0.0000000883,0.10000,2.544
468
+ 66,mlp.up_proj,0.0000015823,0.10000,5.292
469
+ 66,mlp.gate_proj,0.0000016563,0.10000,5.335
470
+ 66,mlp.down_proj,0.0000006308,0.10000,19.994
471
+ 67,self_attn.v_proj,0.0000000724,0.10000,7.381
472
+ 67,self_attn.q_proj,0.0000002954,0.10000,7.383
473
+ 67,self_attn.k_proj,0.0000000415,0.10000,7.407
474
+ 67,self_attn.o_proj,0.0000000722,0.10000,2.606
475
+ 67,mlp.up_proj,0.0000016935,0.10000,5.417
476
+ 67,mlp.gate_proj,0.0000017528,0.10000,5.445
477
+ 67,mlp.down_proj,0.0000006867,0.10000,19.713
478
+ 68,self_attn.k_proj,0.0000000429,0.10000,7.401
479
+ 68,self_attn.v_proj,0.0000000829,0.10000,7.416
480
+ 68,self_attn.q_proj,0.0000003136,0.10000,7.420
481
+ 68,self_attn.o_proj,0.0000000874,0.10000,2.638
482
+ 68,mlp.up_proj,0.0000018424,0.10000,5.612
483
+ 68,mlp.gate_proj,0.0000018866,0.10000,5.636
484
+ 68,mlp.down_proj,0.0000007497,0.10000,19.962
485
+ 69,self_attn.v_proj,0.0000000706,0.10000,7.326
486
+ 69,self_attn.k_proj,0.0000000437,0.10000,7.367
487
+ 69,self_attn.q_proj,0.0000003053,0.10000,7.398
488
+ 69,self_attn.o_proj,0.0000001096,0.10000,2.542
489
+ 69,mlp.gate_proj,0.0000020043,0.10000,5.590
490
+ 69,mlp.up_proj,0.0000019949,0.10000,5.654
491
+ 69,mlp.down_proj,0.0000008874,0.10000,19.492
492
+ 70,self_attn.q_proj,0.0000003152,0.10000,7.292
493
+ 70,self_attn.v_proj,0.0000000946,0.10000,7.365
494
+ 70,self_attn.k_proj,0.0000000417,0.10000,7.398
495
+ 70,self_attn.o_proj,0.0000001396,0.10000,2.571
496
+ 70,mlp.gate_proj,0.0000022492,0.10000,5.472
497
+ 70,mlp.up_proj,0.0000022710,0.10000,5.531
498
+ 70,mlp.down_proj,0.0000010752,0.10000,20.280
499
+ 71,self_attn.v_proj,0.0000000978,0.10000,7.116
500
+ 71,self_attn.k_proj,0.0000000436,0.10000,7.148
501
+ 71,self_attn.q_proj,0.0000003238,0.10000,7.152
502
+ 71,self_attn.o_proj,0.0000001525,0.10000,2.581
503
+ 71,mlp.up_proj,0.0000024830,0.10000,5.534
504
+ 71,mlp.gate_proj,0.0000024225,0.10000,5.559
505
+ 71,mlp.down_proj,0.0000012695,0.10000,20.608
506
+ 72,self_attn.q_proj,0.0000003222,0.10000,7.509
507
+ 72,self_attn.v_proj,0.0000001117,0.10000,7.511
508
+ 72,self_attn.k_proj,0.0000000405,0.10000,7.527
509
+ 72,self_attn.o_proj,0.0000001605,0.10000,2.526
510
+ 72,mlp.gate_proj,0.0000026203,0.10000,5.694
511
+ 72,mlp.up_proj,0.0000027220,0.10000,5.720
512
+ 72,mlp.down_proj,0.0000014853,0.10000,19.933
513
+ 73,self_attn.q_proj,0.0000003313,0.10000,7.183
514
+ 73,self_attn.k_proj,0.0000000419,0.10000,7.194
515
+ 73,self_attn.v_proj,0.0000001306,0.10000,7.225
516
+ 73,self_attn.o_proj,0.0000001893,0.10000,2.388
517
+ 73,mlp.gate_proj,0.0000027619,0.10000,5.585
518
+ 73,mlp.up_proj,0.0000029067,0.10000,5.659
519
+ 73,mlp.down_proj,0.0000017333,0.10000,19.561
520
+ 74,self_attn.k_proj,0.0000000418,0.10000,7.129
521
+ 74,self_attn.v_proj,0.0000001241,0.10000,7.183
522
+ 74,self_attn.q_proj,0.0000003531,0.10000,7.196
523
+ 74,self_attn.o_proj,0.0000001913,0.10000,2.548
524
+ 74,mlp.up_proj,0.0000030490,0.10000,5.648
525
+ 74,mlp.gate_proj,0.0000028471,0.10000,5.666
526
+ 74,mlp.down_proj,0.0000020671,0.10000,19.825
527
+ 75,self_attn.v_proj,0.0000001411,0.10000,7.414
528
+ 75,self_attn.k_proj,0.0000000404,0.10000,7.435
529
+ 75,self_attn.q_proj,0.0000003362,0.10000,7.443
530
+ 75,self_attn.o_proj,0.0000002744,0.10000,2.555
531
+ 75,mlp.gate_proj,0.0000030461,0.10000,5.553
532
+ 75,mlp.up_proj,0.0000033062,0.10000,5.553
533
+ 75,mlp.down_proj,0.0000024828,0.10000,19.960
534
+ 76,self_attn.v_proj,0.0000002220,0.10000,6.971
535
+ 76,self_attn.k_proj,0.0000000397,0.10000,7.015
536
+ 76,self_attn.q_proj,0.0000003707,0.10000,7.018
537
+ 76,self_attn.o_proj,0.0000003982,0.10000,2.552
538
+ 76,mlp.gate_proj,0.0000032092,0.10000,5.624
539
+ 76,mlp.up_proj,0.0000035158,0.10000,5.624
540
+ 76,mlp.down_proj,0.0000034027,0.10000,20.258
541
+ 77,self_attn.k_proj,0.0000000374,0.10000,7.514
542
+ 77,self_attn.q_proj,0.0000003344,0.10000,7.524
543
+ 77,self_attn.v_proj,0.0000001427,0.10000,7.594
544
+ 77,self_attn.o_proj,0.0000003980,0.10000,2.495
545
+ 77,mlp.gate_proj,0.0000033776,0.10000,5.525
546
+ 77,mlp.up_proj,0.0000037065,0.10000,5.542
547
+ 77,mlp.down_proj,0.0000043159,0.10000,20.320
548
+ 78,self_attn.v_proj,0.0000001428,0.10000,7.170
549
+ 78,self_attn.k_proj,0.0000000335,0.10000,7.181
550
+ 78,self_attn.q_proj,0.0000003073,0.10000,7.183
551
+ 78,self_attn.o_proj,0.0000005447,0.10000,2.457
552
+ 78,mlp.gate_proj,0.0000030503,0.10000,5.725
553
+ 78,mlp.up_proj,0.0000033010,0.10000,5.750
554
+ 78,mlp.down_proj,0.0000107036,0.10000,19.873
555
+ 79,self_attn.q_proj,0.0000001919,0.10000,7.286
556
+ 79,self_attn.v_proj,0.0000000454,0.10000,7.302
557
+ 79,self_attn.k_proj,0.0000000254,0.10000,7.316
558
+ 79,self_attn.o_proj,0.0000001316,0.10000,2.527
559
+ 79,mlp.gate_proj,0.0000027360,0.10000,5.491
560
+ 79,mlp.up_proj,0.0000028670,0.10000,5.523
561
+ 79,mlp.down_proj,0.0000358965,0.10000,19.992