diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,18049 @@ +{ + "metadata": { + "ParamSize": 1227, + "ParamBytes": 132312161280.0, + "BitsPerParam": 3.6623472527700525 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 311164928, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 151936, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 311164928, + "byteOffset": 0 + } + ], + "md5sum": "0ea644e6a2d6ffe999e3436b3a99537f" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 38895616, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 151936, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 38895616, + "byteOffset": 0 + } + ], + "md5sum": "cef00afef23c0509d7021c40f1aa2337" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.93.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "cadf7e68b234ad8d2a84f49a7ca473c7" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.93.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "b31ffd2e2f0e0df5313b8ec101cbf3dc" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.93.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "55d443396bda589d72aff5db3389cf82" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.93.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7f2fadf07bd80667bbccf88cd947c420" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 311164928, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 151936, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 311164928, + "byteOffset": 0 + } + ], + "md5sum": "aa2172ae902369ac56cbd18f319c716a" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 38895616, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 151936, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 38895616, + "byteOffset": 0 + } + ], + "md5sum": "5e642dcf66c7a6875c9bb6682181e215" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.0.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "0bd32a1efc905750bd7b817279c0903d" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.0.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "ee92ffae888798470e9c9dd3759acbfc" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.0.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "0b8c4edbfe636125d12a614a08c0cd06" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.0.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "577902e0bf8335531575fe1c5ddae005" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "aeb91bea3998d9d60cc3963e83b8338e" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.1.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "545e2f6d347f8c01745bc81ed9c484dd" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.1.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5df62be4666329b30fe613672dd105ec" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.1.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "32bc6d871b9c8a82c3ddf91365208843" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.1.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "9dcd7831db3409d4997f853bef2ede74" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "235788bb051dbdc56670f03bd0a6a843" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "63f17d6d1029d0a1fc72cd71a8c3b1b6" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.2.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b69b862ab5718b164848767a7856a389" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.2.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "b1044bf5044af86bf46b14e24de0cbf5" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.2.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "dace624c53c26d72b88761586490da5a" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.2.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "01f7b7b1ca4b6890d52daa07971b5d22" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4cbec3326c230930a579deb730e32b7e" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "0d7cfd8c43e85a5e4ac388eb8444b83a" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33350912, + "records": [ + { + "name": "model.layers.93.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 0 + }, + { + "name": "model.layers.93.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8192 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16384 + }, + { + "name": "model.layers.0.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 24576 + }, + { + "name": "model.layers.0.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1073152 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 1073408 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 19947776 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 22307072 + }, + { + "name": "model.layers.0.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24404224 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24404480 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24412672 + }, + { + "name": "model.layers.1.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 24420864 + }, + { + "name": "model.layers.1.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 25469440 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 25469696 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 27828992 + }, + { + "name": "model.layers.1.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 29926144 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29926400 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29934592 + }, + { + "name": "model.layers.2.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29942784 + }, + { + "name": "model.layers.2.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30991360 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30991616 + } + ], + "md5sum": "60c289fa1e9897e871b162eb46f098ca" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.10.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "31133479b128bc41943524cf4636fe34" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.10.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "7bfa32bedde2e8e3642f100ffc4b75e5" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.10.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "baad333304432fe98ae42e936899877d" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.10.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8839618b700689a2f161a9006d8934a5" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.11.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "d16bd0251cff97b4ad71df7f46680c35" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.11.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "449b8410fb6cf6ead4699e3846560911" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.11.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "46390189b44b46a1fed5c15b41ac8d34" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.11.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e8c6514f92e3ac09c7ce122e2928400a" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "32d00a2817f566b7b3b634f3d354300e" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5dd1b6ce007e46701f90cc54f964d688" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "5751f382c2803e449eebce0cef124953" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.9.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "cc1df5c124ab24354b6859252e40e8b4" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.9.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "dbb2abf6c17c9071b2985067b530a189" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.9.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "eded8ab1d0390d9b2d1dcbb07f294edb" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.9.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "83e2b67f783be9310a682fbf92b68096" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.12.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "f85e08cb710842a01d82a006ce2ed718" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.12.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5447bc2228c247188e44be998be53d63" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.12.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "140da2e7af47799d10d793c09468e016" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.12.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d66ceea871b532dbe75814bffe0f6bc9" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "bd51748506c93133689780f73b4c2620" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 33080832, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 2097152 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2097408 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2105600 + }, + { + "name": "model.layers.11.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 2113792 + }, + { + "name": "model.layers.11.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3162368 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 3162624 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22036992 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24396288 + }, + { + "name": "model.layers.11.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26493440 + }, + { + "name": "model.layers.10.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26493696 + }, + { + "name": "model.layers.10.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27542272 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27542528 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 29901824 + }, + { + "name": "model.layers.10.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 31998976 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 31999232 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32007424 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32015616 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32023808 + }, + { + "name": "model.layers.12.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32032000 + }, + { + "name": "model.layers.12.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33080576 + } + ], + "md5sum": "93541bd537065b75e060b55f86177292" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.13.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "cd80f0a9a931f588beefe075dc78d37c" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.13.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "503a0ed2b7fe0cd42217d1f94c5d27f6" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.13.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "12df992e36211dfe2513efdde08d8335" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.13.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c5297760321df3cad165ed26b93dfa92" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d05077a672659cfdb1dc3df86d11d7dc" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "ab1614facf03f8fa9bdba96ff99fbb49" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.14.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "1fc6bc644bc1969892b821f2d16bf241" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.14.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "32729fc9a029f0534bd94d29ca334c7c" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.14.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "d204e7a0fe4ffe5462b02c81de85515f" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.14.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "fbd12f1d0e973ab826c4b1f3c9957bc4" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "dc4b2501a03b8b8aa19d332cb35c6291" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "1395f1f9a32cfcb66dbfac93b64d1c61" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.15.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "feb2e520ca5458596ff731a5bc084422" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.15.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "6ec5193d25da9fdb77c488d32473ea9c" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.15.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "68084e802b88fcd23bdf75360acd5b0a" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.15.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f3e24caf5950759306d82ae0ad7752be" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b6474c1b5fca92ea6d445b7a539fafae" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.12.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.13.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.13.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.13.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.14.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.14.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.14.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.15.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.15.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "61ed234f15ff55882ddea52123f0b819" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.16.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "33ddd7ac5cc6512b4f693f3ab61d6b0e" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.16.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "66f9f127e7d99bb10b25996e4d689898" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.16.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "1f48cab117ee52b768d538e939a40525" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.16.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b3c30e47c3fa000067017375ce44e8a8" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "113a7311cb86e6b840f01211dfa01872" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "f8528264dc77713e902d3e11e8435c18" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.17.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b0ee5297d6d9917d03c3311218359589" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.17.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "fe30c15508bc5a4a61a32a4d89ffebb7" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.17.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "9eb84fc5da4f58a58c2f2b208d46d060" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.17.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "61ddc5910f2be1647e23a2c9e2d34dff" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "49407d230865a2b26eef119aaa1e4172" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "fe3724b5c287450c30dee1c969762950" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.18.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "80dd45d7c182a943d5201464e440b5b0" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.18.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "3666fa5ee7debe4748191189e495d21e" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.18.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "f28b22ab2f75ac4345b2b9a01e9b759b" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.18.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c787d3c5cb4763bc705797df79682990" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0cfbc6ea239f614d090cacb0285911f0" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.15.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.16.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.16.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.16.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.17.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.17.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.17.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.18.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.18.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "ca5bf9b9762967155427110fc6fdae05" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.19.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "0c7a8e6e04efdf6976cd0d8eb556a154" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.19.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "ae6a491b3259b519ab222f3323ab7704" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.19.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "23c228b31063f106661fdf2391012e0f" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.19.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8aecaab74bd390ddfb93249bbb76270f" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.20.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "6be193b90c48c2e06d9713db7d733cbe" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.20.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2f5b3d7f2b7c8c606df205b28e2a4748" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.20.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "aea898715c5203d76b6d171ff976d8be" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.20.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "162a757540792ce4580602e1692c2264" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5a158d7f40a771926e3683c17a2091da" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "f41997bc91e005573e7144d83ab3c885" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "173d0c61984f6406a27910435bebaaa2" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "b2231ad17f8d58b3bffe9734f44a3977" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.3.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "5d592c99cb566ea66405342eff57bbb3" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.3.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2be8149c114443ea6426f2717514dc93" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.3.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "b1b4170532ff3b29b022872c587becaa" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.3.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "922fb4c09e4c889d9c80717ba1fcd15e" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f187e0b01fdefc525125a85502c53c68" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.18.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21250304 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21258496 + }, + { + "name": "model.layers.20.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21266688 + }, + { + "name": "model.layers.20.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22315264 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22315520 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24674816 + }, + { + "name": "model.layers.20.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26771968 + }, + { + "name": "model.layers.19.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.19.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.19.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.3.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.3.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "e5ea6b8d4ae00adaa79b4c04cc4b5fb6" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.21.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c3ef424b8a28f067cb6e524d2e300d8f" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.21.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "d6f9131199ba91afe85e882eef41b870" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.21.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "ce87a7d3b8250c299f868fcd5ca988f9" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.21.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a65a998700945ee428e29748f13cc6c4" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "02c5a9f3315e875b58e0807ec833ff1f" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "bde0b1b2e58fde8233a969a7e6832ab6" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.22.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "d466aa84f717900d1ba78e0ad964c62f" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.22.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "1110a61e48383c846965a5f154dfd12c" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.22.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "6e404a497568b2a2b9a91d4f77694f9c" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.22.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "add0b4eda720bee42c69947c5ada3687" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "94b90311495ce1bb408b98b297e031b6" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "b34cd3d81378bbc56eb1e122a2800878" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0fb976b8154d2dd405d3e8467469fb4a" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 32294144, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.3.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.21.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.21.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.21.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.22.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.22.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.22.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + } + ], + "md5sum": "362ebe7c1a3b8b553b1e528d81bac211" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.23.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "e9075a4f6fa4f9cd3ef37535b15950ee" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.23.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "96772aecd7a25a1cc9612d72056ed335" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.23.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "a6b31af1334761f2c0a6379c07d42850" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.23.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "46a5f7c84402f47b1c7772e2b1b47eb5" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.24.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c3777473012aea81d70aab91bfd0c019" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.24.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "a75fa45c2c90b9db3dffdff75cd7c93b" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.24.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "a694fb30e517b5a5f7ec0eceea4d2f79" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.24.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "74f0ce65d0ac1451a8ed9e8bb8635020" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "143ff36fd6b0b619c77014064b5d9bb9" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "143b63abeb294e1c578a656315620e1e" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.25.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "d09132c3204efa3806bc0af255da76dc" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.25.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9e001888e76fb56038828d214606deed" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.25.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "00bce5f21260016ac58d0c69e6d3d7e9" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.25.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "14087ba004a1253875cd49e1b2a7a25a" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "00a01730aef8d2d46c290173f352d5a6" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "d2c6c50c01e7dc4413df9f872f2f0b1c" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 33326336, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2359296 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2367488 + }, + { + "name": "model.layers.24.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 2375680 + }, + { + "name": "model.layers.24.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3424256 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 3424512 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22298880 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658176 + }, + { + "name": "model.layers.24.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755328 + }, + { + "name": "model.layers.23.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26755584 + }, + { + "name": "model.layers.23.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27804160 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 27804416 + }, + { + "name": "model.layers.23.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 29901568 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29901824 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29910016 + }, + { + "name": "model.layers.25.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29918208 + }, + { + "name": "model.layers.25.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30966784 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30967040 + } + ], + "md5sum": "0d9ac8f4f7f9cef7ab753ef85f93980b" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.26.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "4b9d9e26969cd2ce561671d88d098855" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.26.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "ab6d6cdc0ea650a1d9b067044358e14a" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.26.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "f264ba5268e8c73e1fc8bcd59ba45af3" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.26.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "56dda089e0b0e5432b175d153e650ced" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "b4e5e66c646026b48ecfacb6bb60dda2" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f07067b0fbb7d96e52506a8dadac9661" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "ec9d6cf5dc919d978013ffb98d9e4e12" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.27.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "9052c4001a9a659c3284867a02659131" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.27.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "17e7e73e4b5d319466316328aee12901" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.27.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "ea554cbafa6a89846c4a3fbba337e97b" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.27.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "0f029d9a5b0ead41c35b7ca6e2d786f4" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.28.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "0eed9f85337b1ff2271d2aff1eaa98d3" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.28.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "ffc00b484a74bc41720ff8e8885458c6" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.28.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "d4fded3793f3ba6c78b5e333c05fe46e" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.28.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "15f6846daababbf2ea8c29b46328e75a" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8e54b4a920928d2f55545e6ec91ecb9a" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 33080832, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 2097152 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2097408 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2105600 + }, + { + "name": "model.layers.26.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 2113792 + }, + { + "name": "model.layers.26.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3162368 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 3162624 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22036992 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24396288 + }, + { + "name": "model.layers.26.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26493440 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26493696 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26501888 + }, + { + "name": "model.layers.27.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26510080 + }, + { + "name": "model.layers.27.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27558656 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27558912 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 29918208 + }, + { + "name": "model.layers.27.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32015360 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32015616 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32023808 + }, + { + "name": "model.layers.28.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32032000 + }, + { + "name": "model.layers.28.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33080576 + } + ], + "md5sum": "2573eec0d2f7562b29d7d12d782c3bc8" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.29.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "6698351297c3c85e0c41d4f6bb48a4f7" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.29.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "37bba13115ab0db0a45e62922aae551e" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.29.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "dd49137e11f91c534f1e5f675c533364" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.29.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7dd6d96a55283c24ff126b0b0b3995ca" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "ed16d7f660f58a2d7025f61dd93c9d41" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "4a10b5b41fc427b3bacbf8a60009fc9a" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.30.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "36164c6852f2511351a59c2b18632adc" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.30.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "cb0318725c0abac491907cab775aff31" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.30.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "d1b944c13ca3ac82ee2c394ca684dc4c" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.30.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8cd71eb703dbf593a35f3b6de5b59202" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f459dd7903680b7381fd941c9369843f" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "10a558a74b494b5f37aa8c736bd0a5f1" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.4.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "f7c4a82472fea77abf86b07d150eea2b" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.4.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "43bcbe42710b76960a1cce2d32836a6b" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.4.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "478ca60bf1422d0cb48b60cdf3c638e7" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.4.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "9eade7c431a8c2a25abb204698825b7d" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a7caed0dd2bd5ddfe3e098339b0e2c6f" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.28.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.29.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.29.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.29.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.30.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.30.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.30.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.4.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.4.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "439a8f36d09dfb68e6f0c3465decc365" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.31.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "2206df6a11151c154b56c6613102a9f8" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.31.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "233c473ab64c68ff5e83622f54ecc90a" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.31.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "0fcd628ee9a879c90110fa295ee52f3b" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.31.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "4118d84d01403b505903f28fce953aaf" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "107885e989a5d9060d4d9d74e11ca16f" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "c85956a49d47c69a17d9fe6b4159f119" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.32.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "54fa02f05828be0ee0220b32f51800e7" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.32.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "3846cdff742fb3ceb0373ff4050391b4" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.32.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "4c5c1f5042d2d6d3b8c8db7847676fed" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.32.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "397a9f239579b84b72976be13d1e625c" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b8c95db484793dc4dd7d08026c52f1b5" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "a5e3e62639cd08e130135f5ca3c36364" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.33.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "5d7b26dc40a184f40a1182e551cabde2" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.33.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9c91a631f7838ad15d4f3cfdb5a76fc9" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.33.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "cc68787b252ea23c04911be484543b72" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.33.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6c16f0adb3fc296ee8c2f607da1d2b24" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "340117e751ce0da7ad330a166c2842d0" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.4.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.31.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.31.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.31.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.32.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.32.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.32.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.33.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.33.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "87da2548e16cbab97186313f1bad9004" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.34.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b8d70fdb7caa802fe9e6ad5d26d9219c" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.34.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "c468203b28a39d1574a15f27647393b2" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.34.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "5ec57479236f7b2ae6336723cd9de9ba" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.34.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "9909670be62f90afc8ec1116782ab8c9" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e7559f7b382f1b970179a92490bd4e39" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "99dbb71db1736b8540f3a89d84ca8ffe" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.35.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "fe140c9cda6d22322479de56e6577a5d" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.35.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "7727bff413119e165d2715f9c2b53512" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.35.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "363272eea43d8010182c42471dac9a02" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.35.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "23a606888a5466f009c1f254394d565f" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c71518763c5568190e5f843aae366c75" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "d3c9a705ca442be201c1fc17b7763edd" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.36.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "0994d8f895ab5ff4f108bff1cc988d2a" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.36.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "3be420678d80ed5b39c20d6d19737a74" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.36.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "6a496b49b82a09f0b8134494278153fa" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.36.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d348951345c0501bd1e1b8823e8ca60f" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d908336c9e4dfd29f4a9d6d4baa36625" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.33.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.34.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.34.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.34.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.35.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.35.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.35.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.36.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.36.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "dfe855331b64e4185ecc390a64379078" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.37.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "3d78161b7d381dbfbeaa9682d8f6829d" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.37.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "45d1097a84d78e1b5112b61cb2ee3dcf" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.37.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "4a96b5c4895a67e57a1aa11c9259d69e" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.37.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "9a5b8b32ac7d9e64217808bd291470f9" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c1a87640440ea556556917ebcb99e386" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "fe3d6c64cef233c76735f7d98dbc1573" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.38.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "930dd91e0cb661985bd7c36632bc546a" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.38.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "843b85be76e723ff965375f94605b0d9" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.38.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "79b4bdc19d7d1c0fcb3a360fad38da2a" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.38.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "4a306614a68ee85560fe1bb23b64408e" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "97b761babeabd716d0429a51c4c15144" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "f9ed56c5dd48be976b0d9be3f80aecc4" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.39.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "baab414742869a0c6b8e05b09dbc5e4f" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.39.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "3dc682615e93409abce328d8caa940cc" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.39.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "cf81d853859c08a14bebcf94037e7e59" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.39.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "1fa7d228cf23cd8296fb80111d7230f9" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0f5bf5f0c73b82a4ad564695a0027cc3" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.36.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.37.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.37.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.37.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.38.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.38.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.38.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.39.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.39.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "b17643de9503ebcb7696be74aca06003" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.40.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "2c28af83331be37176d9bc08b535ed04" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.40.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "e89a1010dfd74b06f48ac9eba6fd613e" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.40.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "91a0a2b2f403791aec1976f226f36981" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.40.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7e59f44c307f6eebc8ab852a4fbd37b7" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "cfd609472acc638238d306d14fb08cae" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "cd045a93c964c928e3804fa1f2acc76a" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.5.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "89f0978e635ed0d1ed3244f33e8c8de9" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.5.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "165ff84a5f915b55d9ee98720d74456f" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.5.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "980058caf9db7afc8ca37f1e062456b8" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.5.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6c61f9dbc8fe2127789862f04a36f268" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "00b37f88fddf0831f59289fb03797681" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "648beb102201693edbdaac1a3b043aa3" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.41.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "0208838ddd5ebe5eef2f790939f3926a" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.41.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "eb50d9c3f0a8b9bbadba33c045a1487b" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.41.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "c5feffce4e1ffd12213a939ea9c88098" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.41.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "fc538ab01834059dc39ad562589cb44d" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d8c08bacf19201ab410ac4a8055b6b82" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.39.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.40.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.40.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.40.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.5.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.5.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.5.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.41.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.41.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "7ae5be13fe596c0fac50be95cb0536d7" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.42.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "efde3a433c8e183bea914bad7fdedb0f" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.42.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9df5bc0c1314824faca7701828a80704" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.42.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "f48006b4569cbca761e7e28f19680717" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.42.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "600c0fccdd496cc60615cc4b730a8c21" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c96bb2be36303e8e0b807a8d3ee42fd2" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "e71ef8c352973c66a5ca005831981e47" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.43.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "7aedd85575efbc20ca6bf6df259f5263" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.43.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "c016814b90a9ecb6683bf6f428a59468" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.43.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "df49e865df9c2318faed54f1d70d82a9" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.43.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "04b123d0459931618e423da84c38b807" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8176365fa2d35d80f9f40fba9ae9814f" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "48b74d1a9322a413fd4c598bf0ab2db5" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.44.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "54fcc0116be099473f6b4572843f223c" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.44.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "b42c6fd65cb81485ea686dfda2b15954" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.44.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "e099f9cc4dbe388a6375cb74e87b322a" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.44.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5f0698322f505f97781d2a7bb83f4db5" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "828643f99cb993eddd4aff9759974d91" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.41.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.42.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.42.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.42.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.43.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.43.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.43.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.44.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.44.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "7060a9eee7326acf07df1972fd2cba7d" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.45.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b3d38df3f7ddd45be10840d3831f30da" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.45.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "63988a26de6d71dbbd3c2250ce7b9909" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.45.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "7e419c013ff1ee12bef10e615b8f6800" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.45.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "185ddeead759bb4e069c7128d9a45c64" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "13d8478dee3cc7e60f9fbf5aac60a963" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "0050c3a7ad594f8cd53df82fd8fcf044" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.46.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "90e15691168f7a62410a57331213185f" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.46.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "89558b5bf0c15a533f2b3c8a76f94768" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.46.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "f4728509ab65a59f9ddebae429cee577" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.46.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c240d05f2377a87fe00536adb80d216b" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "1945c7c93bf4a00618d0fcb06aca8d38" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "a8f2e1a59c4545dc411f3d6709fb5a4a" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.47.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b889af37a5fb0105435ca153b2b74fa9" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.47.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "6c3a81f74885f00c80004873e2c2b6df" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.47.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "1c31eda4ac3b3493a71bc86eea664dcc" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.47.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "4c3588215d5ea68dc4f49bae7fabbad2" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "9f38e8cb6030fcbceec7b8aa27f74c1d" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.44.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.45.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.45.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.45.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.46.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.46.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.46.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.47.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.47.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "702fc4d097d6808655c0f6b3bfd205a9" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.48.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "aa57f9f00740a1ba9e9aa660b95d167b" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.48.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "febaab83594b552040668d3c071974b1" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.48.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "841d509f8c715b3100121a74e76669d3" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.48.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "bd3ac88ae51289d72bb4a7a269c20e60" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f0cb566c7a5d0cae3511dfee53bbc8c6" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "252ba19d15c17d8b72e2e5939d94f52c" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.49.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "dd58d256aa49ec4b0bc40e6a3a3466a4" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.49.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "499489aabf4fc68e1eb527d69d289c61" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.49.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "18af62608aa7a0e930dc8e0697063d26" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.49.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "959601adde6d3c2ba959267ccf86868a" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "faf128d1dea21e2726cc1939f8e918d8" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "89fe4b2e23495253aaa6667d62a6e8fd" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.50.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "f0073a0e65b260dd156403fda8d9c566" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.50.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "bf30c7f496141ba39463665b5dae2a88" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.50.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "f45d751ea100e6e5c33f40edf42bb63d" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.50.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ccb54192f41c8b3df6b553ad6e7b924a" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "82c1d7719e9552a0579339ac88f0f0ca" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.47.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.48.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.48.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.48.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.49.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.49.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.49.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.50.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.50.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "deb94cb67ded22d933974ca05d62efda" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.6.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "e588b4427e8871f9ccc46aed24360489" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.6.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "eccd3b279f6a68b979794e5ce174c42b" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.6.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "b55017fbb7d82c5d2903d6c29295fa81" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.6.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a522def4607bcdbf7d38bd4bd000eb76" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "66800f3f87233edad930dfc995af7c7e" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "7e3b1769c1fc53035e23681cde8912f2" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.51.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "73c0118d167d4f31faa2d6e388e8e0ae" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.51.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "af19f7cd3b8a5077a997c65f4ba3d88d" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.51.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "d24ea0a3e9a38a7f427f6215f63fcb59" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.51.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "89808d4e5daf1927d69064f2e27aaf77" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "27def8419a317deddcce89e731442cee" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "8721d79c1cc363cd85e1dffdc15a0273" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.52.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "766e227be50fa27502a2e8d87d38fb75" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.52.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "d938818fa936d80635a7e3a0dfe9e05c" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.52.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "96fe0c958aad3aaaf2850cb822be5ddb" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.52.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "674f645dafbf24ff1fd665d5e8ffecd1" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "db5d967ba79e5ee19b59af805768239c" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.50.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.6.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.6.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.6.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.51.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.51.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.51.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.52.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.52.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "586babec6dd22899c3dddf1a142b00f3" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.53.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "5e8219db441366da66d0e9ddc4dd59ab" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.53.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "dfadd16b84cecc779eb2c4d5b626bcde" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.53.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "c13860965457ff8d363e6d69b17ca8d6" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.53.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6cdeda34a164110c951aec36abcc3995" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0c55a69082f8fb1688610e3b8d53d84c" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "b5304345cb3b3023a3ac18934e002656" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.54.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "349227bbbef41ca9d9a71dc95a3b5b09" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.54.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "e2c20a739165245e6f94f22f20e407e1" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.54.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "8d75199146a4d18256dbefc26b212c29" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.54.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d499a9f27534d2599cb658652860a716" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4d7467e60cd5cd2084900865ae3d5d58" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "f8742935aa0c8dc280f6aa2defa581ef" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.55.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "a106b9bb89f384adaec172145dcf0219" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.55.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f44bcd5825fbc8111c08fc0dcb2bd756" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.55.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "f763b9046fcc65bf730500a47ea146a5" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.55.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "bd803a87f95ac8cfcda50ab02b00a7ce" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "92e6f3849375116068af0dddf812cc32" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.52.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.53.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.53.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.53.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.54.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.54.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.54.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.55.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.55.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "f3bc18c3e131d65b6f2380567d4f4619" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.56.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "dc173bdef57e57fe7367ad45820d7eda" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.56.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "196b6d5efdfb9b17d0ce2ac27967f75e" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.56.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "2fd70ad0fdd927699439e5026c8af900" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.56.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "dc8100931a5cf66dfa51a75880968913" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "af52e3957a6bbf0ac329f570e4984d33" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "df709b5b9b5df2c042b863c4eba15191" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.57.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "8a05222436082e055c033e7b35a04503" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.57.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f9a8f2cb17e6fa6d2626fd0a15bb5736" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.57.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "d18fcc4a823e38f23d21d0e44ec18167" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.57.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7698fcfb21edfbe28d2d87cf9fcb460a" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "6ce0eadcad1ac97a29725cc7a836b45d" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "e67706f8d853aa23d0105af1179d944c" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.58.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b001f4db0cda0e7a947ffb081e25dca6" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.58.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "40eab6b7955e28d34257d0223028d215" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.58.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "729817000e3208bd7627e707c69013bf" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.58.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "cec98c448eaaa894b4b648d259e07a05" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "ab7b281d0230c0b77809675bc50fd404" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.55.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.56.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.56.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.56.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.57.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.57.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.57.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.58.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.58.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "f288cc50111b7585f1a73b21d4512da4" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.59.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "2a4dc6f25772f2696cec807ca9c89b67" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.59.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f3c88404956baa97d4b23cd45aa796eb" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.59.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "3e7d276cdd2b2c4dc00f50027486f6f1" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.59.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "da6b890427ab1af5192ad6d1e540191d" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "676a79a45e070c77434118a8e76b7c18" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "d7f665e7435729ea8f6b343da68b234b" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.60.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "a0d22cb41fb04b5c16509b12cad32c1d" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.60.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5df380f6897f7e41fce478b61804d611" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.60.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "6c01577a0ff90b459bf6f03235e40d0e" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.60.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "edd934f89c15e26c95d8be351071302c" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8df40a2d728a89081ed73ba793af5c0c" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "93663283e898af9b8c2fdf766d798ac0" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.7.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "08268c0d6b509f5faa6e061d651526a4" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.7.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "78004031f19a6a8c4cee492939e98a6e" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.7.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "50da49e7d829e08c547137cc551a46c2" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.7.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "bed8e524765d45ac285594ea74a68c62" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "cfd9afcf87f06476f51b14a11df0a6fb" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.58.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.59.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.59.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.59.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.60.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.60.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.60.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.7.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.7.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "215c3c3ae31840679bd618db5659ae01" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.61.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "7f80695b20e0a25f9416808fb2c89e4a" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.61.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "4f5ad0cf7f61d2f01d58d03ef9a3eded" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.61.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "e79e94d3e9ec72b74b1bda5b263bd08a" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.61.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e9285f678a5128d7524765bfac8b9fec" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "68d6e6d755a4898360c0df369d83c29d" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "1259ba4cd9398ae8e79b9ce80b85fbf0" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.62.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "35980331f8b9e6956f17f04d5996245b" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.62.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2b1867b00c2e2160ed5439ea072aeb89" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.62.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "a883b47f16c1c28384379e1411447e81" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.62.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e6fa90e53cf160924c165ea831adae9e" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "237a35bbb0227c59f888d39894474ba4" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "57b4d8ed15975a6b75babbfb67cd5021" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.63.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "6ce9334286d457c26b1219b2f503915a" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.63.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "1227dc2355aa2ccb839ac3d160f1f9f3" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.63.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "58f115b7d936962d7a9b4bc01aaff5fa" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.63.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "0cd5f8585df367db0f8fcd8c52e2bf80" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "9acc3e1ef207241b9ac6ea048325fc5a" + }, + { + "dataPath": "params_shard_382.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.7.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.61.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.61.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.61.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.62.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.62.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.62.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.63.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.63.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "b33ade8285b320097d4627edc30622f7" + }, + { + "dataPath": "params_shard_383.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.64.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "45a2cbb2425b9c73870657551ede48fd" + }, + { + "dataPath": "params_shard_384.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.64.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "addceb52c1b8ceaa5147b4c408f6456a" + }, + { + "dataPath": "params_shard_385.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.64.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "5fa62cd5261a830a1f113db92b8a78a9" + }, + { + "dataPath": "params_shard_386.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.64.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8149f2a5327d247a315a49160e0c8026" + }, + { + "dataPath": "params_shard_387.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.64.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "55a8d6fa8dccd8aae6e03cf8bc1a86be" + }, + { + "dataPath": "params_shard_388.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "cfd0334c13de89ffd4306dc1d466f1f1" + }, + { + "dataPath": "params_shard_389.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.65.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "80b32fbad9378428a0105bf61d479d5f" + }, + { + "dataPath": "params_shard_390.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.65.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "010bc540c411b25ac6b3cb399a57a24e" + }, + { + "dataPath": "params_shard_391.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.65.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "f4b5705244d3fcfd17ecf606d693e8fd" + }, + { + "dataPath": "params_shard_392.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.65.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "9d1ae776846d5b9c722920a1ae57c17a" + }, + { + "dataPath": "params_shard_393.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.65.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5868c12686aef0c7fd2ff8e3757d7958" + }, + { + "dataPath": "params_shard_394.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "6984289678c5c36a8e7e13af9f0205fa" + }, + { + "dataPath": "params_shard_395.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.66.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "446c747cc7f57b675105ca378a0af318" + }, + { + "dataPath": "params_shard_396.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.66.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "92f8d078059e05e3c3c445d171298ac0" + }, + { + "dataPath": "params_shard_397.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.66.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "9180aba3845a6a3b6e89757a67f9a328" + }, + { + "dataPath": "params_shard_398.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.66.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c9ee65f3d2cb6e5b15f77ba030d8d618" + }, + { + "dataPath": "params_shard_399.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.66.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "998369ba772a6e196efab1f12cf80079" + }, + { + "dataPath": "params_shard_400.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.63.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.64.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.64.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.64.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.64.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.64.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.65.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.65.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.65.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.65.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.65.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.66.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.66.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "b8b2054d3d98d1f13e410f899abe5f5b" + }, + { + "dataPath": "params_shard_401.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.67.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "96ab247267e2194db226df9e6b83e79a" + }, + { + "dataPath": "params_shard_402.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.67.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5536735dae23fb6d69738c04f7dcf1c2" + }, + { + "dataPath": "params_shard_403.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.67.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "07dbbd4941f8b0cfdf3fc4360b5cbf04" + }, + { + "dataPath": "params_shard_404.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.67.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "104c42acdb235cd538a5e145b0e7427c" + }, + { + "dataPath": "params_shard_405.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.67.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d2cc1c5604c0626b2250cd6447629c61" + }, + { + "dataPath": "params_shard_406.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "da9576da9b6205724cedafd392cb7737" + }, + { + "dataPath": "params_shard_407.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.68.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "d764563dd3ec30ef9be7ba6e1c7defe0" + }, + { + "dataPath": "params_shard_408.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.68.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "45ad826e2c0d0c1dd234be258373c690" + }, + { + "dataPath": "params_shard_409.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.68.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "78ed2d73841c3d2e548a57e5ab9d1811" + }, + { + "dataPath": "params_shard_410.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.68.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e9bd0b555105c0a76583fa282ed3cae0" + }, + { + "dataPath": "params_shard_411.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.68.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "16dd398fcc920838d671b6c054846d2d" + }, + { + "dataPath": "params_shard_412.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "43576ab08ac07857cba12352582f1e8a" + }, + { + "dataPath": "params_shard_413.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.69.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "860e36eeacd705292f356815cb003ebc" + }, + { + "dataPath": "params_shard_414.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.69.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "44a8149aa493c3f74e1ad65bee2cfe01" + }, + { + "dataPath": "params_shard_415.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.69.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "8c36f423a9c9d998b4c67e79c372229b" + }, + { + "dataPath": "params_shard_416.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.69.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "1e96e466d583c7771a6fe025c6d493cd" + }, + { + "dataPath": "params_shard_417.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.69.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "96da3d37c294d35ffa8905f5d3b34b85" + }, + { + "dataPath": "params_shard_418.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.66.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.66.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.67.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.67.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.67.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.67.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.67.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.68.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.68.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.68.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.68.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.68.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.69.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.69.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "3ab8aa917f36c4741b022a73fc09ba68" + }, + { + "dataPath": "params_shard_419.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.70.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c188c60eb5d1d368dab4b59e675acabb" + }, + { + "dataPath": "params_shard_420.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.70.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "807445e1730784729d64f055899dfd31" + }, + { + "dataPath": "params_shard_421.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.70.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "c98b486c3edf8f89b7f8461367057664" + }, + { + "dataPath": "params_shard_422.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.70.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3894c0d913c0fe604483f7ad5a444dbf" + }, + { + "dataPath": "params_shard_423.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.70.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "3e7644b5776cc15b57303d71cd69c7b9" + }, + { + "dataPath": "params_shard_424.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "5b3627bac61ee2206d17e363d09e0147" + }, + { + "dataPath": "params_shard_425.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.8.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "ea150c9cced68769c89e054dc6030969" + }, + { + "dataPath": "params_shard_426.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.8.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "73ef3d3df93b2831b303d8140d2acc23" + }, + { + "dataPath": "params_shard_427.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.8.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "327c9f8ecfc6a3614aaa0d4bd6b86507" + }, + { + "dataPath": "params_shard_428.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.8.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "842b483385fdd85c61cd389b031f61f7" + }, + { + "dataPath": "params_shard_429.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "589bcd6e7c05d2d933d9f2af072260dc" + }, + { + "dataPath": "params_shard_430.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "d0fb60acce356036cd522f64976597a4" + }, + { + "dataPath": "params_shard_431.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.71.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "92bdbae71f086e10d637d418ee64b4a9" + }, + { + "dataPath": "params_shard_432.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.71.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "96b33b7fb19439188f8ceb42bb99f046" + }, + { + "dataPath": "params_shard_433.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.71.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "02410f9e46bee2f5776890f55f932eae" + }, + { + "dataPath": "params_shard_434.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.71.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7d6c316f01e39118ade41c41620fa169" + }, + { + "dataPath": "params_shard_435.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.71.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a355d3a22e4c761b421f467159d84f84" + }, + { + "dataPath": "params_shard_436.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.69.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.69.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.69.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.69.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.70.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.70.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.70.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.70.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.70.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.8.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.8.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.8.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.71.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.71.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "5bb24d1b9e515bc6ccbfc0e81c48f9f8" + }, + { + "dataPath": "params_shard_437.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.72.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "811feba0c4bb4346ae5dda3351d31f8f" + }, + { + "dataPath": "params_shard_438.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.72.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "4df85d52acbfedff1415b1cf0cebaa90" + }, + { + "dataPath": "params_shard_439.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.72.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "5a87ffd9a51ba86184f0cc290a7cd192" + }, + { + "dataPath": "params_shard_440.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.72.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "881f2b9fcbc875a2a03ab793fbcd6ae1" + }, + { + "dataPath": "params_shard_441.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.72.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b70c2858db3c54388d7cebf9ffdbc90c" + }, + { + "dataPath": "params_shard_442.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "18006aa9313023c532ab840e641aaff7" + }, + { + "dataPath": "params_shard_443.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.73.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "0b47a3ee399c7063d012b269d06b003c" + }, + { + "dataPath": "params_shard_444.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.73.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "91834d1fe783fca1bacc03fae7e6e061" + }, + { + "dataPath": "params_shard_445.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.73.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "14ef62e53ed09afabe7c32c05812242a" + }, + { + "dataPath": "params_shard_446.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.73.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8241d56b0a76b363f67c6e02ee43ebff" + }, + { + "dataPath": "params_shard_447.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.73.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "529728641d400d91d8375366aa500508" + }, + { + "dataPath": "params_shard_448.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "ef87e8876282c3937f9308f382d44da9" + }, + { + "dataPath": "params_shard_449.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.74.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b5bcce72834d3d225fc940a2f44dd32c" + }, + { + "dataPath": "params_shard_450.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.74.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "cfc7477a0ab1d0b1da17af904555e7c9" + }, + { + "dataPath": "params_shard_451.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.74.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "4f953097fc598dc91ec775bbb92d73fe" + }, + { + "dataPath": "params_shard_452.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.74.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "0ea20fbd5f9e6f2904f5b790f6306e62" + }, + { + "dataPath": "params_shard_453.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.74.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e94833d5605b81ccf593a56977db141e" + }, + { + "dataPath": "params_shard_454.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.71.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.71.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.72.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.72.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.72.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.72.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.72.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.73.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.73.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.73.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.73.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.73.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.74.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.74.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "a5bcb383f00b6d0e5f4615f8cd79a90b" + }, + { + "dataPath": "params_shard_455.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.75.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b4a7353362ef6b611a841237bfb9b1d1" + }, + { + "dataPath": "params_shard_456.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.75.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "8362f6e99bb250902c336a250e013083" + }, + { + "dataPath": "params_shard_457.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.75.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "6c350a7df540ab15aedf0615954e80bd" + }, + { + "dataPath": "params_shard_458.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.75.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d1252a0040614d6a82e80b6566dcf765" + }, + { + "dataPath": "params_shard_459.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.75.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "395feadbd5c6753a65e393d731a43a4d" + }, + { + "dataPath": "params_shard_460.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "89c113f9e1ae0b976420f07657dcb0c9" + }, + { + "dataPath": "params_shard_461.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.76.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "ff7583787682692d0bd0037d8b3aec14" + }, + { + "dataPath": "params_shard_462.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.76.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "d994d25f1f0abc8457707e7e20ceb3d5" + }, + { + "dataPath": "params_shard_463.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.76.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "673fdbd09d90370bb249dbe0a4fc6c7d" + }, + { + "dataPath": "params_shard_464.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.76.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b5cd03c143684775f0ff37e800427ebf" + }, + { + "dataPath": "params_shard_465.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.76.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "532babc6d43f8d454f13f5818355f7e1" + }, + { + "dataPath": "params_shard_466.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "002c7937257f755579e593f7a070a0c7" + }, + { + "dataPath": "params_shard_467.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.77.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "316227d30a8702e2df80ea41f8a4fd30" + }, + { + "dataPath": "params_shard_468.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.77.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "b71343f188906aa95490c366f7448aff" + }, + { + "dataPath": "params_shard_469.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.77.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "0752aa53e7f752ef2834c1255c6ac95f" + }, + { + "dataPath": "params_shard_470.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.77.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "80e1bdd4d41372c08ae1fda1c154ab3b" + }, + { + "dataPath": "params_shard_471.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.77.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "1d906c087e97efffbd778969354f866c" + }, + { + "dataPath": "params_shard_472.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.74.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.74.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.75.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.75.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.75.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.75.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.75.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.76.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.76.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.76.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.76.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.76.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.77.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.77.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "1771d7a857451caa5102fdee34a199e1" + }, + { + "dataPath": "params_shard_473.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.78.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "18b5a5965e1d88ad9b482ef4f86b84d1" + }, + { + "dataPath": "params_shard_474.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.78.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "8276908cafbdd3c6290d2911950aab2f" + }, + { + "dataPath": "params_shard_475.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.78.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "3169022dac93a8c2b81221f5842406a4" + }, + { + "dataPath": "params_shard_476.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.78.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2fefe279b372be7206a3d8592211b09b" + }, + { + "dataPath": "params_shard_477.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.78.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "be1e590b7228beaa090475c457769a6a" + }, + { + "dataPath": "params_shard_478.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "6641b1a00b3c1200d7087787af4c60f4" + }, + { + "dataPath": "params_shard_479.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.79.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "0a367a4bd34c7b31ac467a30d87ca4af" + }, + { + "dataPath": "params_shard_480.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.79.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "1811d56d75a5501e805f6b9d084c7c57" + }, + { + "dataPath": "params_shard_481.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.79.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "6dabf02ed459d276d3d5ab844bb3b0eb" + }, + { + "dataPath": "params_shard_482.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.79.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6c14a1afddfef7a0419f0123acf9b6aa" + }, + { + "dataPath": "params_shard_483.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.79.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "2150846afe6e2caf501bdaaed620ef7f" + }, + { + "dataPath": "params_shard_484.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "756ec023c0eebc57a130387e7c079e06" + }, + { + "dataPath": "params_shard_485.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.80.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "9d666ca5833b4dda70ea5af5c36699ec" + }, + { + "dataPath": "params_shard_486.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.80.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f0b9db737baec399ba5844764f21475f" + }, + { + "dataPath": "params_shard_487.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.80.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "05830a64179ffe4e00edaa3fc4877f95" + }, + { + "dataPath": "params_shard_488.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.80.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f25c15b16970242bededd3e699caaa89" + }, + { + "dataPath": "params_shard_489.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.80.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "37a0f7bb0f46152c47507e1a3d14a938" + }, + { + "dataPath": "params_shard_490.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.77.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.77.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.77.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.77.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.78.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.78.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.78.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.78.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.78.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.79.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.79.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.79.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.79.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.79.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.80.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.80.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "fd8ec90cefe89f6a1d1d10466e21f4e2" + }, + { + "dataPath": "params_shard_491.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "73fc7086be658425e30b036d711f6ef4" + }, + { + "dataPath": "params_shard_492.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "456a1c41205895933abeebb1eda398a4" + }, + { + "dataPath": "params_shard_493.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.81.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "355d647c0bc6fd6b2875f6dd9ddcef1b" + }, + { + "dataPath": "params_shard_494.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.81.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "69d178b2d8a9cb3ea9861e7342735bc7" + }, + { + "dataPath": "params_shard_495.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.81.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "adcdb42018a5181d7cd46093ca0fd044" + }, + { + "dataPath": "params_shard_496.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.81.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "46485b13cc7fc22c10e677896d88e6c6" + }, + { + "dataPath": "params_shard_497.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.81.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f846339b02538a0cff51de71afcbf610" + }, + { + "dataPath": "params_shard_498.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.81.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "fb3d26a8277c88243f54bad3404e0d35" + }, + { + "dataPath": "params_shard_499.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.82.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "4cecfd49962fa4a5900b559aa5af2abd" + }, + { + "dataPath": "params_shard_500.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.82.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "060d0c097a77bdbd6cdc3fef23f278d0" + }, + { + "dataPath": "params_shard_501.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.82.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "defd719d86e87a80a9adcfe840a2d826" + }, + { + "dataPath": "params_shard_502.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.82.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "1b6471d3d8e7bd070812f4c99e70be1a" + }, + { + "dataPath": "params_shard_503.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.82.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "34aaf8860e74e5bd91b228374a357c1e" + }, + { + "dataPath": "params_shard_504.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.80.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.80.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.80.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.80.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.9.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.9.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.9.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.80.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.80.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.81.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.81.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.81.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.81.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.81.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.81.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.81.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.82.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.82.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "591ef88665355a9ffdc9ecfcf5ea0ce3" + }, + { + "dataPath": "params_shard_505.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.83.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "e2305f7af1f6ea33451c52607a70201f" + }, + { + "dataPath": "params_shard_506.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.83.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "ad6177b24fee9cae678c81414c5342ed" + }, + { + "dataPath": "params_shard_507.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.83.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "08621a2c02db5798910f26be42b37f81" + }, + { + "dataPath": "params_shard_508.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.83.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2575201363d1beca17f0cb3e226ddcb5" + }, + { + "dataPath": "params_shard_509.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.83.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a18aa96516b3b8713b5e9f10b4e3f72f" + }, + { + "dataPath": "params_shard_510.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.83.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "7cc4830a4ded2ce118f5b86a4b2b4984" + }, + { + "dataPath": "params_shard_511.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.84.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "f50f9b1b2358891d6029a6de972a1020" + }, + { + "dataPath": "params_shard_512.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.84.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9fa1aa70b0104ad89098503fe2b78dc3" + }, + { + "dataPath": "params_shard_513.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.84.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "c6e55a7a41d4adec0ffb158a14c85002" + }, + { + "dataPath": "params_shard_514.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.84.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5cf9890f67194487bf4526d09e3dc179" + }, + { + "dataPath": "params_shard_515.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.85.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "527f1db05809b35b91fb41041dea3995" + }, + { + "dataPath": "params_shard_516.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.85.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "889c23772eee6dc8d6ea66278c6078b2" + }, + { + "dataPath": "params_shard_517.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.85.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "1b07decced9a97572da2bbded4f6139d" + }, + { + "dataPath": "params_shard_518.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.85.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c3dd0df8236f9761829143602c544496" + }, + { + "dataPath": "params_shard_519.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.85.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "2692c0ed932d6bbae5f646855d6496db" + }, + { + "dataPath": "params_shard_520.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.85.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "7532b96a1af5d74057e146818e876abd" + }, + { + "dataPath": "params_shard_521.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.84.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "fceb650b8f0f453c3fd7e46b93b8c88f" + }, + { + "dataPath": "params_shard_522.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.82.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.82.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.82.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.82.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.82.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.82.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.83.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.83.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.83.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.83.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.83.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.83.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.83.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.84.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26772224 + }, + { + "name": "model.layers.84.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26780416 + }, + { + "name": "model.layers.85.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26788608 + }, + { + "name": "model.layers.85.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27837184 + }, + { + "name": "model.layers.85.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27837440 + }, + { + "name": "model.layers.85.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30196736 + }, + { + "name": "model.layers.85.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32293888 + }, + { + "name": "model.layers.84.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.84.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "83797c11e4a42d55303d3b1421a51409" + }, + { + "dataPath": "params_shard_523.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.86.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "7e69a4419a258c510e0f3b527048fb26" + }, + { + "dataPath": "params_shard_524.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.86.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "86f49b3138d00749d6d011b8e416d5e5" + }, + { + "dataPath": "params_shard_525.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.86.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "d9aff5cbdc5959e3c1a95a1ca5537f70" + }, + { + "dataPath": "params_shard_526.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.86.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8717ec9f8916aa33c78d995195ce545c" + }, + { + "dataPath": "params_shard_527.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.86.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "dabbaca286bb2cc5a31e3eae1ba11a1e" + }, + { + "dataPath": "params_shard_528.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.86.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "1dc7f57d09c5e505877ccc9d9b60a0e7" + }, + { + "dataPath": "params_shard_529.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.87.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "574282f97aaa9029d5b8e7f890a80456" + }, + { + "dataPath": "params_shard_530.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.87.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "62f1216382576f1eef0e4812b32deab3" + }, + { + "dataPath": "params_shard_531.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.87.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "8d795cd54ed3c0b47660d9b7ba6e36b3" + }, + { + "dataPath": "params_shard_532.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.87.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "fb3d65ece1e9eefd7250f5ecd93be8cb" + }, + { + "dataPath": "params_shard_533.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.87.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "111c591351c7021ee5b2ac8a71bf29ff" + }, + { + "dataPath": "params_shard_534.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.87.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "ac153a107b0d361c47d8227649a9bbd7" + }, + { + "dataPath": "params_shard_535.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.88.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "aef53a8787247586bb31bafee7b0b48b" + }, + { + "dataPath": "params_shard_536.bin", + "format": "raw-shard", + "nbytes": 32294144, + "records": [ + { + "name": "model.layers.84.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.84.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.84.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.84.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.85.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.85.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.86.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.86.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.86.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.86.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.86.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.86.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.86.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.87.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.87.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.87.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.87.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.87.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.87.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.87.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + } + ], + "md5sum": "350459cd425eeda6700c14980076af78" + }, + { + "dataPath": "params_shard_537.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.88.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "e55d3f4465167b577f06b4a9940e3a95" + }, + { + "dataPath": "params_shard_538.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.88.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "6da165d5ab02152d088a412bb11cab27" + }, + { + "dataPath": "params_shard_539.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.88.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "68af7eff4b93209b32d8873454f52928" + }, + { + "dataPath": "params_shard_540.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.88.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d216f3fcbb2b4bf1d0d479b3b9ea342b" + }, + { + "dataPath": "params_shard_541.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.89.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "05ed46a801c80866397864c969793948" + }, + { + "dataPath": "params_shard_542.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.89.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "3fc5cea2ea44dc8f2e8893a78608f254" + }, + { + "dataPath": "params_shard_543.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.89.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "7c07f682d9e574a372e8e94a8fe9bf48" + }, + { + "dataPath": "params_shard_544.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.89.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "bfe23babe45c8875a0ed306691c79188" + }, + { + "dataPath": "params_shard_545.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.89.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "f280a508fb7c7cbb7c4a52f94ee5d00b" + }, + { + "dataPath": "params_shard_546.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.88.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "1299e32b34a3e80aaa76512dfffef273" + }, + { + "dataPath": "params_shard_547.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.90.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "81b9d6888588377dfa5e13eb77f179e0" + }, + { + "dataPath": "params_shard_548.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.90.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f6d1662190f1c0f6bb283a691d84bead" + }, + { + "dataPath": "params_shard_549.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.90.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "80251503662745003e4910a456ff7b51" + }, + { + "dataPath": "params_shard_550.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.90.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c463df0d45c452cf031e89880fec1eec" + }, + { + "dataPath": "params_shard_551.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.90.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "41f8a593031e30d568c805253c4584a2" + }, + { + "dataPath": "params_shard_552.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.90.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "daadb86da4e59e8472e80ed71a4d6935" + }, + { + "dataPath": "params_shard_553.bin", + "format": "raw-shard", + "nbytes": 33326336, + "records": [ + { + "name": "model.layers.88.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.88.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2359296 + }, + { + "name": "model.layers.88.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2367488 + }, + { + "name": "model.layers.89.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 2375680 + }, + { + "name": "model.layers.89.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3424256 + }, + { + "name": "model.layers.89.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 3424512 + }, + { + "name": "model.layers.89.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22298880 + }, + { + "name": "model.layers.89.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658176 + }, + { + "name": "model.layers.89.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755328 + }, + { + "name": "model.layers.88.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26755584 + }, + { + "name": "model.layers.88.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27804160 + }, + { + "name": "model.layers.88.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 27804416 + }, + { + "name": "model.layers.88.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 29901568 + }, + { + "name": "model.layers.89.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29901824 + }, + { + "name": "model.layers.89.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29910016 + }, + { + "name": "model.layers.90.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29918208 + }, + { + "name": "model.layers.90.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30966784 + }, + { + "name": "model.layers.90.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30967040 + } + ], + "md5sum": "b2f819debd99fe42fa470bbfcb872a6d" + }, + { + "dataPath": "params_shard_554.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.91.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c45c3de2f356bc80640ba6a3326cd605" + }, + { + "dataPath": "params_shard_555.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.91.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "753a7bc9fb489fe771885d40e6ef5e2c" + }, + { + "dataPath": "params_shard_556.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.91.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "803484fc0443df1fc70fd669290bdf25" + }, + { + "dataPath": "params_shard_557.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.91.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "1119621dc43c7f39e6e9f6aeb2221331" + }, + { + "dataPath": "params_shard_558.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.91.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "85f1fb09bd3c74456391b2ea41c56ecd" + }, + { + "dataPath": "params_shard_559.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.92.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "491651e433fb36906d50eef04d78bf4d" + }, + { + "dataPath": "params_shard_560.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.92.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "882fe900b62a682f15cb4bf5ca5331e6" + }, + { + "dataPath": "params_shard_561.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.92.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "9263d0f119930ef96feac9f9862d83a1" + }, + { + "dataPath": "params_shard_562.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.92.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "88770d9696e2962bee5afb8de0c8b6dc" + }, + { + "dataPath": "params_shard_563.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.92.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "db9bafb9887fd619e67e4e1c707098a2" + }, + { + "dataPath": "params_shard_564.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.92.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "1745c78e8a5ddbc0d851b9db2b5f580d" + }, + { + "dataPath": "params_shard_565.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.93.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4bb944adcaa58c8b2c753162f945d861" + }, + { + "dataPath": "params_shard_566.bin", + "format": "raw-shard", + "nbytes": 33080832, + "records": [ + { + "name": "model.layers.90.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.90.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 2097152 + }, + { + "name": "model.layers.90.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2097408 + }, + { + "name": "model.layers.90.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2105600 + }, + { + "name": "model.layers.91.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 2113792 + }, + { + "name": "model.layers.91.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3162368 + }, + { + "name": "model.layers.91.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 3162624 + }, + { + "name": "model.layers.91.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22036992 + }, + { + "name": "model.layers.91.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24396288 + }, + { + "name": "model.layers.91.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26493440 + }, + { + "name": "model.layers.91.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26493696 + }, + { + "name": "model.layers.91.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26501888 + }, + { + "name": "model.layers.92.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26510080 + }, + { + "name": "model.layers.92.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27558656 + }, + { + "name": "model.layers.92.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27558912 + }, + { + "name": "model.layers.92.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 29918208 + }, + { + "name": "model.layers.92.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32015360 + }, + { + "name": "model.layers.92.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32015616 + }, + { + "name": "model.layers.92.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32023808 + }, + { + "name": "model.layers.93.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32032000 + }, + { + "name": "model.layers.93.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33080576 + } + ], + "md5sum": "874098213ef8f8041fea24116e1632ad" + }, + { + "dataPath": "params_shard_567.bin", + "format": "raw-shard", + "nbytes": 21233920, + "records": [ + { + "name": "model.layers.93.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.93.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.93.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.93.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + } + ], + "md5sum": "6dd44a3bc4f69b2c9b322e003ca5d6d6" + } + ] +} \ No newline at end of file