diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,18049 @@ +{ + "metadata": { + "ParamSize": 1227, + "ParamBytes": 147102545920.0, + "BitsPerParam": 4.071739133529121 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 311164928, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 151936, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 311164928, + "byteOffset": 0 + } + ], + "md5sum": "49c3b0954736eb6a33ff3dec7fcaca16" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 38895616, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 151936, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 38895616, + "byteOffset": 0 + } + ], + "md5sum": "345e9f36ec085efd96ac768f0beb1042" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.93.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "fd8225ed2a42829150e751f0002e1ecc" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.93.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5e8d593f2114523c5e1cd8fe505dc2f6" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.93.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "ebb49f6142625849d6b928fb985d62ed" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.93.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "219e8807d6787deceb3712f378354312" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 311164928, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 151936, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 311164928, + "byteOffset": 0 + } + ], + "md5sum": "8cad912eaedfefdf597a6f99cc382148" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 38895616, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 151936, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 38895616, + "byteOffset": 0 + } + ], + "md5sum": "adc47ce7e1a766a7f87164b59252d91a" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.0.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "d36693a6053dfb9f898e69d548fcffc6" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.0.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "c8c36f853bf9509e033656929b553b79" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.0.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "30412dac1343f9d79676c6eecdcb1c8c" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.0.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8bdca16a3181822bf5be6df86bef3ffb" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "caa017fe25a224ce5288a23358f6bcca" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.1.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "8a4936af6bfcbf4889b694578ba8258c" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.1.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "89e8a1f2c575d32e636c1aa5b6443e02" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.1.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "7af20ffa343610cdf62738aad092aa9d" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.1.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8a56f3a0e359e1b2cbb4cf68ab45ba8f" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b9578ce9f00da1f66912fc9f73a35ebd" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "12d43cfa57cb8a4aeadcdd0769e05a98" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.2.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "838fb84680ee50526e3c02c8b7fd73b9" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.2.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "bf1d8bd3767078344bd10694b4149799" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.2.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "3b43af65dd340031faa235cc1b3bc90b" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.2.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "62258fd76c508818cd0af4c5a914edcc" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4058c5a2eee516ad7054755556758f5d" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "fea14b862b67aae448d5295cc241d567" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33350912, + "records": [ + { + "name": "model.layers.93.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 0 + }, + { + "name": "model.layers.93.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8192 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16384 + }, + { + "name": "model.layers.0.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 24576 + }, + { + "name": "model.layers.0.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1073152 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 1073408 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 19947776 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 22307072 + }, + { + "name": "model.layers.0.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24404224 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24404480 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24412672 + }, + { + "name": "model.layers.1.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 24420864 + }, + { + "name": "model.layers.1.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 25469440 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 25469696 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 27828992 + }, + { + "name": "model.layers.1.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 29926144 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29926400 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29934592 + }, + { + "name": "model.layers.2.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29942784 + }, + { + "name": "model.layers.2.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30991360 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30991616 + } + ], + "md5sum": "379cfe4eb8409de529d12841eb9fef32" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.10.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c576a3b51ba8717fc4780dfe8b71bb2e" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.10.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9d831b22f30ab42f1ded997a750efa6f" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.10.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "cf78e958559217ad67bf73f8d7e804db" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.10.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "907159ed022363ea79728ef3840355b0" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.11.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "2b5e7376a5a8c10dd76f3b1ef801246f" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.11.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "ae52f5d95f711aec9cd661650f96e20d" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.11.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "2237fac4ce3b5c0187ee9453517343cb" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.11.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "25842ab122f315362a74960fc5d1247f" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "0ff5347f8096af9427a3d853fa3ab14f" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "1e765749a3af3f8b85f69a112fb16daa" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "9e1e36b8a9c0a5657ff13b8b0b9bafe0" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.9.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "10c459b7519fd0e6868ec63a94383026" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.9.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "781f050e77b5359dd94e6e1581200803" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.9.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "b4ddd104ff51a6a5aefb643caea93854" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.9.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2f1906f2647a5eb78e9ab8d97ad9f73c" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.12.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "223b09cdf56d2cd2fcce939ed3e7ef20" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.12.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "34ab923306c4452d3f8b568dc5a679b2" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.12.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "c8d5faf3088a589b204ab11aeb1ebf29" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.12.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "69411658bebd796cd43eaa37a710adaa" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "006398e9617842be0bb11e9617e9d617" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 33080832, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 2097152 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2097408 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2105600 + }, + { + "name": "model.layers.11.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 2113792 + }, + { + "name": "model.layers.11.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3162368 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 3162624 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22036992 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24396288 + }, + { + "name": "model.layers.11.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26493440 + }, + { + "name": "model.layers.10.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26493696 + }, + { + "name": "model.layers.10.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27542272 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27542528 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 29901824 + }, + { + "name": "model.layers.10.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 31998976 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 31999232 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32007424 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32015616 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32023808 + }, + { + "name": "model.layers.12.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32032000 + }, + { + "name": "model.layers.12.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33080576 + } + ], + "md5sum": "2ea02397406661cb684ababf3969826a" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.13.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "7f1f01bc4ccca97df2761cc25a447569" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.13.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "51fdcb2a682dae7b5b33c02e64116767" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.13.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "9e9c3fe2081940de4764f65c9f818b3a" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.13.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3a484efed9a56d4a7f832c1e82c1f0f8" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "520a57b72516b46ce6ffd9f13049fc0c" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "c90c0e6b4a49da37db29226c465e404e" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.14.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "f7c00428193b1ddbb8af34996bdfdabe" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.14.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "1210d0f0578b42bcb7aa59643b0f8369" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.14.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "b175857c9d2b2c6762c7604c498721b7" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.14.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "cde13bb99b0043db26d088f0a970eb42" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "cdded077d045679ac2c8fd71ea1934f7" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "90f42b5467eb02d37d6db45fcd91c700" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.15.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "d376e4d793c895c7fa1bb47a40ae4e9e" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.15.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "cf5cda0b5ed6f1382e29b7b6bc7f8707" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.15.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "90d3e8595e6f1f6b889b923f42b88d1e" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.15.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b7fa1c239dc8556124ffc8e7003c59c8" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "54aace7a88efdb3edf2c5e4434ad4192" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.12.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.13.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.13.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.13.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.14.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.14.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.14.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.15.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.15.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "81bd9bcbb7530a793139c6a5b875d882" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.16.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "700d6e83987e3dadd76accc290206835" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.16.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "1b651fd0b8f5137ef0592a8e7ced71f8" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.16.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "792d0ef2b8a9f98bc3f877a9aed936ac" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.16.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "13b14228b8c39d3ca33cfd7d1a3f5953" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "3498e1f2f343405d952e72397378741d" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "1a253341b3f1a9f1129e16e6c614fae9" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.17.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "bcfc1ade5f2dd2ec80f2810642c313ec" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.17.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "013e7476547b0ecf7fe0775c609f2ced" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.17.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "c65eb8c7bdd1e98bf2cf47466cc099a5" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.17.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "20551fdd048a008087ecc24e49c97c0e" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "9f62f8669c29057848728709241a72ba" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "d97c04f4d737b3c47b16f6e56522938e" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.18.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "2f50124b6f9b66fbdfdecb5c4e4f7801" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.18.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9cd3a8f2da2484d14a0f8c80d4759f5d" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.18.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "ec9834ea811eddce53859b5f5a469230" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.18.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "0dc13913b4c73034143872739ad6acc1" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "3ee5711fcef5b3ca940447f1ac950857" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.15.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.16.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.16.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.16.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.17.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.17.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.17.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.18.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.18.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "9a28fff298231675e9a5a7b4ad1eaca2" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.19.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "0277c79221f0528841235631996f6ae4" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.19.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "15b4a108eef3bd5371d483d53aab4f4e" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.19.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "1de5e1be0f0fe27b40138811eca51f7c" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.19.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "44da44185da97e7a6486f97a267c6d2b" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.20.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "5641766fa3e489246ad5f60f249d5267" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.20.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "905aae6bf9cde42fff264777fdaf4811" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.20.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "d25ac5a995acf84a0f4e73ab59403698" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.20.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b745fe6e37808ea868154af32ed9de7c" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "9c8f5a10b14594e51efecdcdcec02791" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "9667a323de6a69b2bf0c9111ea7b5b1f" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "2a7b4c55b8f3fef7b00080fe0e7caca5" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "bec1b16401af586f5741a0d4e35003ad" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.3.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "629ad30cc1c8f1b593e10acf13db5fa9" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.3.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "c10dd8a99f3478940aa5722e176ee46a" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.3.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "0574e9db8db4147edcd416633416c752" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.3.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8f1fa8d93571ae18ae11e5b60d92926f" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "332ed4906d7a664b8e56b50a557c53e8" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.18.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21250304 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21258496 + }, + { + "name": "model.layers.20.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21266688 + }, + { + "name": "model.layers.20.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22315264 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22315520 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24674816 + }, + { + "name": "model.layers.20.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26771968 + }, + { + "name": "model.layers.19.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.19.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.19.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.3.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.3.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "e55c6d34a5ba3de5c836f77bce075950" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.21.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "ca1b22535abcb03936d2e6712f8011ed" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.21.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "cd628f3c0ae4262d83ac6947f337e050" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.21.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "f302af2d1c7c234155005ce5c02b18b0" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.21.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "159da4a92040b84a4e94e900050a3336" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b4c6f17a11c486bb7a92006e40e8f816" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "d9653322480e21fe965aec029800d999" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.22.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "a6bc553d41c252c092ac1ea615b44527" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.22.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "82edc857cb8a49526eae7007d8157511" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.22.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "0e048f47ce348949ac6b11724e67dac1" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.22.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7e789cdfece66dee6dc0f8984d415a8d" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "2d12638f940f561111f3df1879685811" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "5ffef5612b306ed206a392080cd2bf00" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e27dfb996c4b6edaf3d4346a5a66b6e2" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 32294144, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.3.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.21.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.21.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.21.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.22.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.22.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.22.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + } + ], + "md5sum": "21609c9b6fee318d882f9a3eb71ff032" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.23.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c5bdb1279560e163233475a78583f12f" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.23.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "8202393134643b721ebee0fc26a1d551" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.23.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "9fe058b2e9932d385b6479c2c3cc9616" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.23.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d4b07442e32938c6bd17289089d6311a" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.24.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "1419099aa5e154d3ea5ca9b34c1617e7" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.24.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "b685f328a63ad3d6eb8fdec3acd3b8d5" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.24.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "a035dc2714222cbed8463c9b0e0b06c1" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.24.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7fdad350a013b0f24f5ec2bb6fa66252" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "acbb764e22e27c70c591d87126a7b63c" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "31c3d6970a040aeac955ef58c7c11d3d" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.25.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "ca6bf27078b81ae6071f0c241b3ffbcd" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.25.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "bf12cd07bb188046858634606d780f36" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.25.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "e5d910734db98d3998e10fa97023f9c7" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.25.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a2538f46fa31980f522182a94ca83a0e" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c5e4099cb9657bdf9ff6aab1b4b49168" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "a3c6b42693742594cae4d43a5105b0e7" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 33326336, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2359296 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2367488 + }, + { + "name": "model.layers.24.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 2375680 + }, + { + "name": "model.layers.24.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3424256 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 3424512 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22298880 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658176 + }, + { + "name": "model.layers.24.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755328 + }, + { + "name": "model.layers.23.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26755584 + }, + { + "name": "model.layers.23.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27804160 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 27804416 + }, + { + "name": "model.layers.23.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 29901568 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29901824 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29910016 + }, + { + "name": "model.layers.25.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29918208 + }, + { + "name": "model.layers.25.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30966784 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30967040 + } + ], + "md5sum": "8c778bb87c295204e39a79a1d2cc2eb8" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.26.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "69f0f8d26e567d9f56322326029c2514" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.26.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "7faa31e2563f0fe28e9ba94407698815" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.26.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "db4dd7be6af5b382094cef8ca4ad6690" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.26.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "0781a544b62fd35d8693f6524ee16f8b" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "1c77dcd94cfadea6156744b17d4f4fc5" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5f4f69d141ac0667c57c5c43fc55286a" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "6e72fc5f8ee56fc5697b32e59e86d2b0" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.27.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "5154cfab16204e9b71080e97aeda200e" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.27.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "a55bc23d4942027859f3eb9aab08dc6b" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.27.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "f949f92a144e2cae320b452547938095" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.27.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a8e770c3b044cf75c0f4666e53280d37" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.28.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "349ef4e3b817b1d0b48c491cef01ed69" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.28.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "6553a8b15149cc8156b9142eb83bf129" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.28.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "90201da63053f54d32d674e02f9a1856" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.28.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "94d9f1da224a95d4fa487b4167b30cb7" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "77cdc8c09bdc4deb999d925500961e64" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 33080832, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 2097152 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2097408 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2105600 + }, + { + "name": "model.layers.26.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 2113792 + }, + { + "name": "model.layers.26.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3162368 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 3162624 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22036992 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24396288 + }, + { + "name": "model.layers.26.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26493440 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26493696 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26501888 + }, + { + "name": "model.layers.27.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26510080 + }, + { + "name": "model.layers.27.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27558656 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27558912 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 29918208 + }, + { + "name": "model.layers.27.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32015360 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32015616 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32023808 + }, + { + "name": "model.layers.28.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32032000 + }, + { + "name": "model.layers.28.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33080576 + } + ], + "md5sum": "f934c26c2f5e9ef950749c257e680139" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.29.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "ede0a10491dd0b9f9f0a430778fcae98" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.29.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "95e71a80beb8e2bb6f0a1de230860f33" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.29.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "84b214223244e8977e58966b71e704a1" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.29.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "9908acbb786119cc7519c38c1a8a9274" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "ac7fc9f48719ad0cd428aad2edcc560d" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "e53c1e1056d5099f83ecf2ec446aa988" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.30.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "4973c663fb57c89c2e70a7704ff8d458" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.30.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "a4eba21d3179e64b7d3fba11ac9558b6" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.30.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "35cc0c813ba0c78bb885c622bcdc144c" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.30.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c1fc1fbbe2f79cb741b2fa7d46670cba" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "cc578f665f594e4b66125502ef94c805" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "5f9173962938728ba91651c4ea31e298" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.4.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "ebf6b221bb080b70f04e6c3c5dfee5b4" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.4.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "28499e064889c5b47a453615edefdad1" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.4.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "b388005a6319591e4cc4ab6230724141" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.4.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c470071127d43835b31853fe3e6499ee" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4d2b772df5011ff8d9cdda77d683fe16" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.28.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.29.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.29.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.29.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.30.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.30.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.30.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.4.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.4.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "56caa5a25aa8d847397ca68f5be5606e" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.31.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "882f137523bf9293f113a8ba4993869b" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.31.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "414eeb0818d46d8403c2d9bc0b74f78d" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.31.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "ea1a196d395e3a88d437ee79211106a6" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.31.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "04af4eeda1e9d785ec31175c338e5d47" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "19c5a48111293c755936232d7ea3af80" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "219133a8d78868ca3eab74d691cb179c" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.32.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "bcc4ba3022d264ee1e3ac65d1f236149" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.32.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "3e51f57f2afa34cfe56531f83f3611d0" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.32.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "0e22cce52d6580d14c701639cf9bc919" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.32.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "de8ce4cbc17b217f50ab5a047d35ee49" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "123e5a44bad7a5622dca4f8b5d336648" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "22623179ee18b03c91ddb309f6e0edfd" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.33.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "d788c17b9d874f8d0fb21efa43b771f1" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.33.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "66a9fb15a9ebf402f168d42d15f5a75f" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.33.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "eae49b3250bc6539cac280ed71580119" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.33.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "acf4cb253a88a47a34dc5de93a5f7960" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "6d31a0d0dffd23fd86a1997396b0a821" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.4.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.31.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.31.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.31.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.32.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.32.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.32.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.33.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.33.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "a8c77508b2f4d22a8fd9e827b1f117e5" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.34.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "1fc5e5fccde3cc58a90ac71792f79ca5" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.34.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "48b995dab26736b9895a9fd74d1f17ff" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.34.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "e12c69cbd5f78db6a9825597066453cb" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.34.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a2017894a1e4331f63a37f8f0206cbdf" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b6816347e3290747fc5febdbc26936bf" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "67d4e81b20a833979902f2619190e40b" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.35.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "081ed3d637640b084334cc1903a16b08" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.35.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2b06dfae638602320e50b26c15ab3ddb" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.35.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "08da7d1490be76ac476b1bb5f1b2a553" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.35.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "33e97e8267287332c82a3b7f3a99e57a" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "71207bc1a338d1779f79c33dd1cd4a6b" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "0fb3d43cb6601d67563468c331d01392" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.36.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "12ccaaa711018ca4b095b33302124644" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.36.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "c861e93d19632db3df7ea5a113c672ad" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.36.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "be9839737f674f32c90f52fe5a18eb4e" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.36.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "935bafd32459c8c4cb34439a6e90b75e" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "51d350b820d566bcf6482807aded6324" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.33.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.34.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.34.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.34.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.35.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.35.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.35.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.36.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.36.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "fd26bae9c20ece533cf08efbfbb2a49c" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.37.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "7fc95edfd284f39ccafa6a8cb2259872" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.37.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "fc73f9f9a36af054698d51460437e485" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.37.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "30309c1e9bc1a66f85ec71a5c4ca63e9" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.37.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "32d0c77914f5eb2007f29d1be5c7cb7e" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b7b8dbaf476c3c0ec1dc50d32271e6de" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "73ca4ff4a8f259ffb480471a8ec7716e" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.38.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c3912a18a8a21c21d63d87866cc929f7" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.38.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "1426bb98bdd4d7690a511bd6b2c4f362" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.38.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "68fff7bcb2bead08ac50d62769695b71" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.38.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3613ca08241fb0599c0ade67ff5b8e59" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0685ab38e3357522ce91fbf8890fd265" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "ac7bb93df129f9ab9c478d847d9ba548" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.39.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "cabc7a4c29c9b74919c56dd88f13c474" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.39.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "0fc30a5a2d62eadd67f0368026f97174" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.39.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "1f3bee5416306daa92fdee9139867dd7" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.39.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b1d2d47a2b8726cfbf54e12d66ac7265" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "ca382d80116eed53a3dcb1310b5ef3d2" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.36.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.37.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.37.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.37.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.38.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.38.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.38.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.39.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.39.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "c5f8739d6e13eb36028d9d888d6a9080" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.40.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "20ff9d1faf3c61d94221d6531fbdb395" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.40.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "04f6ac3c6e95187ee2afb4d2555d993f" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.40.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "8515fb10b86ca9f1fd6e21f71d4ac2cd" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.40.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8f9b240ee5f81b113e3c26d14518173d" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "32b506795d489a897fb24aceaab4641f" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "2ebf5a53b9c13fde30cc187c5c4b727a" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.5.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "78110ae4984a8ad74214b110b627e376" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.5.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "92e2865d3ab678b57cb90341f9bd00c9" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.5.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "877ee055f84a75a5471b889ad712f3f8" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.5.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "52aa56ce64a33417a15c1cc8447e4ff0" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e5a512fb10f8b07d300cfe389cd20c5e" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "b8808089a267bafabfe2b4f93add2761" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.41.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "9918af051dde08e546a91512a071f1b5" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.41.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "d6d1220221508c6d11ee9e0a56b59cf8" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.41.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "b59512d04438f415bc980da9b3746798" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.41.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3f0f830f02c7553324409853d9c57ec2" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d8eacd9b3671cd710eca4c1a4339536d" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.39.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.40.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.40.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.40.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.5.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.5.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.5.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.41.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.41.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "a1fa1c3c7895eca27b6a64a0c3ac15de" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.42.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "e78b8c5eb3d27cec6ef3fa757e98a36f" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.42.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "28e9880b4c36926b9a5009cd984232ca" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.42.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "03fc70f04a6cd0f0c75875a588f5f4d5" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.42.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d4f0ed5c8edcd991e37848da0211c90c" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0e4ab065bd0e17ef380cdd0a450ea76b" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "a1abbce8a0fb8e5d7b4f481fc8047c17" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.43.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "d9c4818ef07902ea0db4d762aa6494e1" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.43.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "48614cec572a38dc7669315b5fbfe0f9" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.43.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "65192a7688bcee2b797fc1829197bada" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.43.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "fa593cfe15167f092209835f28afa696" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f28151533932dc403b8d1a500b9f228e" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "a85b5b40b13229322ef902c7249febee" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.44.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b86425c6c8eb25581ce2067559a0e30e" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.44.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5af765539e8b2ddd8182bbf482c5c3f2" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.44.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "c7cce45ba60307ef85fa5af10e4298d4" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.44.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b0c51b283b12db598cfae034a0dea1c8" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0f7aabea29578b8ccfd9641e52379347" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.41.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.42.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.42.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.42.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.43.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.43.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.43.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.44.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.44.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "30a6decfde25b6f13a8c506078fa5196" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.45.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "ffb7311ad5aa3284f20372c6e56adbd4" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.45.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "52303d667c170fdb30dcfd2d5a0e3bc1" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.45.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "10c503ecf6354cb0326de21db4861d25" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.45.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e8500afb1b26830c3c476cbd82a68636" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4d9d25cc24463f82cfe690fe51c25721" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "7e63c4ec65789082ff7e340aa813e46e" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.46.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "da57448197d768de1cbe782852f1e7dd" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.46.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "4bb64f36eb20b2052c7d98709e37e0ae" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.46.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "81fcf8bc795ca1a1629b0153b9569170" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.46.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b232c989ce7025540a7ed40de8733562" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "7301702e04cb7fb6fe0c5ed864ff8025" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "32a9b09925d1679cc599bc484326e1a1" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.47.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "938ba593c5c35b71c065bd2b61d2df09" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.47.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "b35b48ed57eb81761f45f5d22de79364" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.47.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "7e16b9b1e7469a3e4c7c16fa9981897c" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.47.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7a25cd91321536032f61f5e462055e5b" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8188bc9eeb6bfe001e5fb91b39b62136" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.44.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.45.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.45.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.45.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.46.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.46.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.46.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.47.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.47.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "ce390bb4a7f08e20cd894e374b8ba568" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.48.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "196853257bb440c5eebbc1af5983ad12" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.48.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "dc648bfab11d9119ffeefce2eb19f48f" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.48.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "b897a2fd9033854ae7497568ed1af864" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.48.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e6d67bd675fbe16f684278d4e0c438c6" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d93bf54322de4c9c5057927690cd0412" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "127af0b08e887a4ac5e886ac3591dff0" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.49.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "dff523b59345eccd43f11528ad05a385" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.49.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "3ff6d0b3fba287a0a295336b14ae9e4d" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.49.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "0bcf570a36f404880e1a2f5b4a4049e5" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.49.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "fe183e45d7041652c3f9d0a64eacc63c" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a7d19bde5cd46c67622a910326632b3e" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "41ba57ed359c0e5fa5cd71b2134698ef" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.50.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "6909c91fafd3ea0bc7e784e1bbee306d" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.50.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "027636ff110b5fdb8bd2fa61a00ae030" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.50.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "0ede3e9ab1690654a5079f5baa18c4d1" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.50.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "1033a02ae539d5394b38e4fd7b238536" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "bb6b32c9c8e8c2bf2a16972d47bbd368" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.47.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.48.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.48.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.48.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.49.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.49.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.49.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.50.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.50.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "33d891a384e49b99f3fc9b0f2354109b" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.6.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "7f82fb64eac105c1058c6e22deb66562" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.6.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f33d5af42e9a0b2e1b02b3c30815fac9" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.6.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "33712dc572723f9be6e5aa9ced658a42" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.6.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "cc88beb6d1478b9e2ebc9034781f8df6" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "1d03bc90f41b87bba407126b9e725ee8" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "df6cd12208b791ee37222232430babb3" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.51.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "ed5103ea502c730b9e499127ceccea5a" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.51.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "11e0f417311c2f2eef46319dee4f9a47" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.51.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "8ca4024190b0ca7c9105162141371538" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.51.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "dc08df6079f1528f8c5fcfb1233951e0" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "2f33c9fc4d948b991333babe984ee85d" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "671befc9df60b1955ec41a036e72ab7a" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.52.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "61a033695f1f48ed44c131a896a1e6ea" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.52.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "cb840a007909a6282e8a0422a8d65a05" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.52.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "04bae73ca885a20617bac663656513ff" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.52.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "1701ef3e0931a116f38eac09d83cd014" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "15f71758249f22bc456bf7c613dbf2fd" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.50.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.6.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.6.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.6.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.51.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.51.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.51.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.52.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.52.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "24e2be609f4b53ab9ebf876e8dd659b4" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.53.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c5a3fc5c0d06fa3e81d5833fb5c3b649" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.53.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2aa0818b8a3ee9694ef9ef8a4b3e5040" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.53.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "45d8e0adf1ce7af45ab5c8720800bf85" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.53.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "48c3df8b14b0d8ee164350a68f847359" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e08fc202daa58984fec629e971819093" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "5263a4f23e819d5189c294fda8ed9375" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.54.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "e056f50cb9f87355f0d3f53080e53001" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.54.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "cc7e9047ba591710205543bc39d9f1fe" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.54.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "4200048f12e0370baf0b9a8ecc5384fd" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.54.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "30302c8a96f38bfb9b49f979d0a6bbae" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a50ad1cab808cc0ce15f2a8e8cb81572" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "3b1d980e31462b758447fa298e4051d9" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.55.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "8f49c803a2c7357452406425ed8f500d" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.55.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "35551943094f0b6d2e7ce2809c3e51d5" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.55.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "aa239436d8266e80c0bf3041162c21e3" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.55.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ddd46a4b52c306b1a7a0b396fc51cd5e" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "3a76734ddb3875974055fdd94bf41afd" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.52.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.53.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.53.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.53.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.54.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.54.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.54.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.55.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.55.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "4e7a991edd9ad0905de6b5a9a7b6fbfa" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.56.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "558638976deea622fe73b8e046af12f8" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.56.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "492a50e5e4d51a4b90442bc2b54295df" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.56.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "5d52ac18165df6b65dc6442bc0b485db" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.56.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3ddcafcb8aaa1dd45c03286ac38d3f49" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "908330b993f3093698843bc40fd7a927" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "6a3a521762d6a23d40cb9cc6460a8ec4" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.57.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "ca078b2cbeac59c324702927cd952d40" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.57.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "e5198d88a32adaef32543df07dfd9c29" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.57.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "1e83aff99b2f97b2d80341ad028bab49" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.57.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "52c15025316096f4bd0cbab24cfb2057" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8f2184360690877a6b487391f17738fa" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "cd374a18deddac16d57e2d3b4d9ca159" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.58.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "f84068edb8d103aea3014be2d34d89d7" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.58.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "1b010b022f66ee375e5e0fffd97db79e" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.58.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "d926a4c46b509b180c1a143d5c6c14e0" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.58.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5cd7eeb3e3222064a47e46f9ad56a3f6" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e073a526566d7f3f3b36070a2c589425" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.55.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.56.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.56.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.56.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.57.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.57.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.57.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.58.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.58.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "5a521a0697abbd7df68065871d1d88da" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.59.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "0bac5fed7a0f9ac0fe562a10e8fb90f6" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.59.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "814cdc802369ad4b61a3d0a3d0a50db8" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.59.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "a5299804d9c2810aeff0968acc27386e" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.59.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b9238b2973bfc74b1d4dce739e3741b6" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a156b96fab88b2c903d1df06d74da2db" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "c7949e14535e7bc6fbed3a8dfff21308" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.60.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "aba827073b7bd386d5d9af9c2377911b" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.60.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "07875154acc05674eaa5c909f5dd96b3" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.60.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "8cd1d39e03a0d7ceff97dad13bed3813" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.60.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5f57dd46f9da560de0a32581d30f54e0" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "66628df84423854f04c92d0dc05e95a9" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "86f128745e098b132bd19f57fcc1ad90" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.7.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "9377ebede57167ad5edc9b448d8dcba2" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.7.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "6b4dc432d93a6ca5f06cc73c3ecc1aac" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.7.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "79ef7139c4e5f77dac5db38a36f6c068" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.7.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "4ef70ecdca9da5ff62a72696b1f87906" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a59a126940059c8cb66aae41776c3e15" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.58.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.59.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.59.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.59.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.60.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.60.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.60.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.7.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.7.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "c7884aabe11e4cdf285417d005eb42eb" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.61.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "8368da4871ec1a93fa62028750c8f80e" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.61.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "46f134043c4d8aa01aea943edfba374a" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.61.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "beb6ba13266b186813becfcad01430b5" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.61.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d684a38d772976d2197d74c1987e4cc6" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "82b4aa3d68cc82ddec949f6b485eb895" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "b997f74d1159b97bbd8e1dede51c2946" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.62.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c767ed457fd31fc8c3890128b4920d3d" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.62.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5eb02828eaf438e8d184b57e93fda67b" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.62.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "218947d3dd467caafc8a3844b0bd80aa" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.62.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7fc7c9f4bcde650ef105421536dcd087" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "ffc331ff322388f2cb6462c6fba69097" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "79963455e31ea4f045c8b014e0f7584e" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.63.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "26d0c80443fd3efbd51878611ea0a4d0" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.63.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "4bc327bb13b5dd472a84f9c6f4e12538" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.63.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "6c3e6a496c9a0ab379c526991d0712dd" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.63.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b0df3d6faad0507c1430008be85bfc69" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f71b1d97277a3029de57fb26d7daddb1" + }, + { + "dataPath": "params_shard_382.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.7.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.61.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.61.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.61.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.62.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.62.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.62.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.63.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.63.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "72e07719c566fe537d65231435b7751d" + }, + { + "dataPath": "params_shard_383.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.64.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "a9b0dc8f6e00c2bd1376f722fe29592b" + }, + { + "dataPath": "params_shard_384.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.64.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "e1b3f2b35722c395322dbb537d3ce332" + }, + { + "dataPath": "params_shard_385.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.64.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "bef4f4f46822dbadbce1a688516c776a" + }, + { + "dataPath": "params_shard_386.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.64.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e2b3da75300a7aff5bb39b8f62209c3c" + }, + { + "dataPath": "params_shard_387.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.64.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "ebab4ad46a282ac8f47deae654ebc0e7" + }, + { + "dataPath": "params_shard_388.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "1ba0907555c3b4299324be3186b4d1a8" + }, + { + "dataPath": "params_shard_389.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.65.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "cfa8c2f578d954bc76293cc3391e7560" + }, + { + "dataPath": "params_shard_390.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.65.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9987ad9e4c1e47e06ab3301ef5a26813" + }, + { + "dataPath": "params_shard_391.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.65.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "0369eaccfb6be9e2aec8beea4eecaeb2" + }, + { + "dataPath": "params_shard_392.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.65.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "93ca04ba3dad800fd515bd0b6080887c" + }, + { + "dataPath": "params_shard_393.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.65.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "97f23eaa4a128dbf235dc5ee988b4b64" + }, + { + "dataPath": "params_shard_394.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "e90341f866885c90427e9f1b685b7fb2" + }, + { + "dataPath": "params_shard_395.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.66.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "29babffbe63fa005f13edfd2ed1fee38" + }, + { + "dataPath": "params_shard_396.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.66.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "0b64718fa6cc8c1c4047bdd657860339" + }, + { + "dataPath": "params_shard_397.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.66.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "41f1fa4d80bb1c9a5701f1fb34828929" + }, + { + "dataPath": "params_shard_398.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.66.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "25e0131a2db6ed7ccd44832023363303" + }, + { + "dataPath": "params_shard_399.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.66.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c2aba1e9aa635aace8e2ed74c02229f1" + }, + { + "dataPath": "params_shard_400.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.63.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.64.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.64.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.64.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.64.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.64.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.65.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.65.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.65.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.65.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.65.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.66.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.66.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "605e6b7673d2405fcd2dd1ae95363f6e" + }, + { + "dataPath": "params_shard_401.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.67.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "011000359610fea816a22b6fd9449d7f" + }, + { + "dataPath": "params_shard_402.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.67.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "34a9e8aca1cfd955e6a16270e984a4f0" + }, + { + "dataPath": "params_shard_403.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.67.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "0a4ea455fe5d076150c36bef66c55179" + }, + { + "dataPath": "params_shard_404.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.67.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3fcfb38e70a83b0e216fe34de7c5af6a" + }, + { + "dataPath": "params_shard_405.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.67.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "571e7e3337479624b95e332bfbc09f25" + }, + { + "dataPath": "params_shard_406.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "993b4bf5fcf95966f035123af9a05c33" + }, + { + "dataPath": "params_shard_407.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.68.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "6a0322da90d71f605f2e1a6a1a660801" + }, + { + "dataPath": "params_shard_408.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.68.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "b76bd32d431ceff27a1523afa22263ef" + }, + { + "dataPath": "params_shard_409.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.68.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "9b0c1b3b98789ae724c9099391b97091" + }, + { + "dataPath": "params_shard_410.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.68.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "517d4b0711129b0e9a419bea796104c3" + }, + { + "dataPath": "params_shard_411.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.68.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "4f0aa2a97f85e5401fcf02ec648b1dca" + }, + { + "dataPath": "params_shard_412.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "e403a3e46ac404454b5b32b1848efd54" + }, + { + "dataPath": "params_shard_413.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.69.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "e840e40d35071388a2f5cfef0e176be8" + }, + { + "dataPath": "params_shard_414.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.69.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "ad9311d1cfe692c130e0f3f89f0c55b2" + }, + { + "dataPath": "params_shard_415.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.69.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "0af7b69080ce40cf75ad5986561b1364" + }, + { + "dataPath": "params_shard_416.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.69.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a691c71ec610a2d59b1cde71fa79ec2b" + }, + { + "dataPath": "params_shard_417.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.69.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "21efff74d65058373bfc51ff1aae8679" + }, + { + "dataPath": "params_shard_418.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.66.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.66.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.67.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.67.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.67.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.67.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.67.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.68.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.68.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.68.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.68.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.68.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.69.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.69.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "f83479a1f7e328fea30404ac6b486ffe" + }, + { + "dataPath": "params_shard_419.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.70.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "ddded80b6614b34639dc63265cf20f2a" + }, + { + "dataPath": "params_shard_420.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.70.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "6b9f613d9d84f6a4036d7076deded009" + }, + { + "dataPath": "params_shard_421.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.70.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "38737901bc1c8f73e62f674ccf97a60d" + }, + { + "dataPath": "params_shard_422.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.70.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "42b7f0501a320f0aa58ed70bd1c991ea" + }, + { + "dataPath": "params_shard_423.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.70.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d5bb0550da5f7eb7552fb908357f5db4" + }, + { + "dataPath": "params_shard_424.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "ef68390b7a3ce606b7fb7ff5a1edfded" + }, + { + "dataPath": "params_shard_425.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.8.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "79ef2c1f47fb3da3b8546f02621fe11a" + }, + { + "dataPath": "params_shard_426.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.8.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "22f396af6474f0fef44f555cfd8082c2" + }, + { + "dataPath": "params_shard_427.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.8.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "67dc5e95e50bfda3cb500d107581cd75" + }, + { + "dataPath": "params_shard_428.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.8.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5c1921267180199439c295aa12f2759b" + }, + { + "dataPath": "params_shard_429.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "deba617c5d21becc91d43c52ebfa4b92" + }, + { + "dataPath": "params_shard_430.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "62155ec8dd46296a7609623dd0b56b7b" + }, + { + "dataPath": "params_shard_431.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.71.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "3046b8a3a883d4b19fb076f000ff684b" + }, + { + "dataPath": "params_shard_432.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.71.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "01fac5473c29654d05acd9f64d70dcd1" + }, + { + "dataPath": "params_shard_433.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.71.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "41e3f9afb2547c449a8c916e1d2940d4" + }, + { + "dataPath": "params_shard_434.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.71.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e69d2dd7e3d9f6e71e13be29026c42ca" + }, + { + "dataPath": "params_shard_435.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.71.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "90d956cafb210e688ca8c6bd1785c2a6" + }, + { + "dataPath": "params_shard_436.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.69.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.69.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.69.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.69.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.70.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.70.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.70.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.70.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.70.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.8.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.8.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.8.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.71.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.71.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "5859775ce6ae30ce1e6a8fe9f9e6aa83" + }, + { + "dataPath": "params_shard_437.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.72.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "9b47627e95b5fb7cac5e2d9ee32f89f0" + }, + { + "dataPath": "params_shard_438.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.72.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "fdac1098b11c77e5bd7f77a6b7fe49cf" + }, + { + "dataPath": "params_shard_439.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.72.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "8d1ab2621ba161361695454961a051f4" + }, + { + "dataPath": "params_shard_440.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.72.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c8b6a03203df9a2e1c8973d83e9422bb" + }, + { + "dataPath": "params_shard_441.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.72.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "aab01b61b015f354c4c13e3ce019f854" + }, + { + "dataPath": "params_shard_442.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "35ba2369f292c95da33c63b9310368b4" + }, + { + "dataPath": "params_shard_443.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.73.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "2a3059f0bc6b4f1b2956140570181a49" + }, + { + "dataPath": "params_shard_444.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.73.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "69f94b3a07ecb8dd7ac9dd840c0206a9" + }, + { + "dataPath": "params_shard_445.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.73.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "73e62316687090c3eba3bf966aa3dcb4" + }, + { + "dataPath": "params_shard_446.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.73.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a67858097d59d5ab0199e850fd9e6f1f" + }, + { + "dataPath": "params_shard_447.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.73.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "873dc6209b3b34625e096e9572813e0b" + }, + { + "dataPath": "params_shard_448.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "5c6c982621717167060fc7247ed97b0e" + }, + { + "dataPath": "params_shard_449.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.74.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "6e93af7021aa4ffce5bd585465abacdb" + }, + { + "dataPath": "params_shard_450.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.74.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "aa1a37fd57a7984ea69134190fae94bf" + }, + { + "dataPath": "params_shard_451.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.74.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "986f14cfbf76201119022dcbc4e88b53" + }, + { + "dataPath": "params_shard_452.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.74.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "afe5a56236fc43fc3313b4ddc107d66b" + }, + { + "dataPath": "params_shard_453.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.74.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "3108e5d21f6476b88ff6381620cae1fc" + }, + { + "dataPath": "params_shard_454.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.71.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.71.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.72.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.72.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.72.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.72.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.72.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.73.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.73.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.73.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.73.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.73.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.74.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.74.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "eee3b426d63f8b92d116f597fbeaad4f" + }, + { + "dataPath": "params_shard_455.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.75.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "87cc9d2954581654f2a1fc8079a5dbd5" + }, + { + "dataPath": "params_shard_456.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.75.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2fb2389b1ce2ce9c358f8729fb7ad3ba" + }, + { + "dataPath": "params_shard_457.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.75.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "e35808f37300322a82abc5c43565f311" + }, + { + "dataPath": "params_shard_458.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.75.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "22b64b082a0a2812d0241ae2fb5349a8" + }, + { + "dataPath": "params_shard_459.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.75.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e79f5ea95a53ade0a14297a23f0b925b" + }, + { + "dataPath": "params_shard_460.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "c83aed8927145606d3f276e7ed8a08b3" + }, + { + "dataPath": "params_shard_461.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.76.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "984dd48fc54340da60d9e9a95a2a1eb3" + }, + { + "dataPath": "params_shard_462.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.76.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5d9360453482b0cba82e5e8565fc3261" + }, + { + "dataPath": "params_shard_463.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.76.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "6e80fe2c6c951623df40d486c64015bc" + }, + { + "dataPath": "params_shard_464.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.76.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "75d985ead7f17e79d48306f30be0524d" + }, + { + "dataPath": "params_shard_465.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.76.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c6038ec30f0580368cc0cb4f96096dbe" + }, + { + "dataPath": "params_shard_466.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "d83775a9c9fab0f772b46959e33a04c8" + }, + { + "dataPath": "params_shard_467.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.77.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "4f6c5dc64984abbd3b04302ef32d5ae7" + }, + { + "dataPath": "params_shard_468.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.77.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "d5b08b779e9447c2897997148ea89332" + }, + { + "dataPath": "params_shard_469.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.77.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "933ab4629649f8681dedc8dc8c73e77e" + }, + { + "dataPath": "params_shard_470.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.77.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "814f041f1354aa5f2ff736c7397fcb56" + }, + { + "dataPath": "params_shard_471.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.77.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "e2b91df2d3777d65cd6da4730db12787" + }, + { + "dataPath": "params_shard_472.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.74.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.74.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.75.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.75.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.75.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.75.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.75.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.76.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.76.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.76.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.76.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.76.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.77.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.77.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "6d1a973d50415e100f6ad72edb25fd3d" + }, + { + "dataPath": "params_shard_473.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.78.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b09018ffb15893d130746f6952dc171e" + }, + { + "dataPath": "params_shard_474.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.78.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "08cb6b0d19938ad2796374671f201f12" + }, + { + "dataPath": "params_shard_475.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.78.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "e606a54043fd9815b0ce099396e6720a" + }, + { + "dataPath": "params_shard_476.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.78.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "486de45b38490f3725f30dd504489805" + }, + { + "dataPath": "params_shard_477.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.78.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "fc958aa7b87a1f13b12c009ffc5ab8d7" + }, + { + "dataPath": "params_shard_478.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "51b0b6c9488b28855ae85dd02273e9fc" + }, + { + "dataPath": "params_shard_479.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.79.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "16192b2bb468a659ffd8c026cefe66c7" + }, + { + "dataPath": "params_shard_480.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.79.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "031f90387fcfbb065f184c9f83ad30f9" + }, + { + "dataPath": "params_shard_481.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.79.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "3721a36d5ed6508c5ae3007dd2132e1b" + }, + { + "dataPath": "params_shard_482.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.79.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ad3d774a8c7a25d70c3883b6d1746f25" + }, + { + "dataPath": "params_shard_483.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.79.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b1caf6f5764114825ac80fb9bae7e2bf" + }, + { + "dataPath": "params_shard_484.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "65f5bd6ee4331cf3bc3860ce93959109" + }, + { + "dataPath": "params_shard_485.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.80.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "4eef14c77f8df4d9ed7412efefe70821" + }, + { + "dataPath": "params_shard_486.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.80.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "09de32d34f8c6000baeece3d86f0a130" + }, + { + "dataPath": "params_shard_487.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.80.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "05596be2d600f69793bd2ded746a09a9" + }, + { + "dataPath": "params_shard_488.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.80.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "02bc36a404aa76c060808b1de36c9243" + }, + { + "dataPath": "params_shard_489.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.80.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5df67c1e6fc25a0fb6a303c5eb6bc80e" + }, + { + "dataPath": "params_shard_490.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.77.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.77.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.77.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.77.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.78.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.78.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.78.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.78.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.78.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.79.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.79.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.79.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.79.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.79.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.80.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.80.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "26a9e745268753e64b3a46fe46053b68" + }, + { + "dataPath": "params_shard_491.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "265317a70fb385d46edcbb812c39e17c" + }, + { + "dataPath": "params_shard_492.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "1182d45627da0e71846de409f942dceb" + }, + { + "dataPath": "params_shard_493.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.81.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b9297a31fd6dda7fefe2ea2920679a06" + }, + { + "dataPath": "params_shard_494.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.81.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "b1b32b3954172991a3d029193c0ded6c" + }, + { + "dataPath": "params_shard_495.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.81.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "04ba50f79587bb80f9a4783d8734a244" + }, + { + "dataPath": "params_shard_496.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.81.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ecd4aac38555ae5d8ea7ca7e78e6dab0" + }, + { + "dataPath": "params_shard_497.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.81.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b780d6582de8e8af7af9975a807d07b0" + }, + { + "dataPath": "params_shard_498.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.81.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "1adbecdf23a4220c77f6bd129fcc58b5" + }, + { + "dataPath": "params_shard_499.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.82.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "212bd80423c411b2cfecf812836e3c3f" + }, + { + "dataPath": "params_shard_500.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.82.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "1eb8986d590f751dbe7039e0b4022e8e" + }, + { + "dataPath": "params_shard_501.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.82.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "6ac883583579c396e5a5850af0df9f5b" + }, + { + "dataPath": "params_shard_502.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.82.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e8a2dd6ccf7c34d00bf87c89267f3e21" + }, + { + "dataPath": "params_shard_503.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.82.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "1fb33570c19c645d8a017b64a1025466" + }, + { + "dataPath": "params_shard_504.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.80.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.80.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.80.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.80.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.9.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.9.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.9.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.80.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.80.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.81.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.81.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.81.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.81.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.81.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.81.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.81.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + }, + { + "name": "model.layers.82.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.82.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "a55feb4e828e68f53cb238fa6cf33338" + }, + { + "dataPath": "params_shard_505.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.83.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "1219a21c481ba2127170e0f2ac8c7e9f" + }, + { + "dataPath": "params_shard_506.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.83.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "41e95a29cbfb807eb4b905261fdc81b1" + }, + { + "dataPath": "params_shard_507.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.83.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "17a7c3f3bfbbceb26c5186b9b6fc72b1" + }, + { + "dataPath": "params_shard_508.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.83.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d00b6e4281788ba9b351559852d71e25" + }, + { + "dataPath": "params_shard_509.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.83.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "7fe007137b95dcff88e3719b862229ce" + }, + { + "dataPath": "params_shard_510.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.83.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "f3218577ec1b2ee7b0bac929683ef6f8" + }, + { + "dataPath": "params_shard_511.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.84.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "7be192c5697a10e2491b27306ea56cba" + }, + { + "dataPath": "params_shard_512.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.84.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "73dcfcd5a7194a478e9cc8312dbfd1fa" + }, + { + "dataPath": "params_shard_513.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.84.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "72cd6516efa4b2c79ab8fb7410277385" + }, + { + "dataPath": "params_shard_514.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.84.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "258ae54d491c5670b114cd145a138e81" + }, + { + "dataPath": "params_shard_515.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.85.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "2240e47fa8ea616d898a4263aa6a33f6" + }, + { + "dataPath": "params_shard_516.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.85.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f37153c779021c13be9e042934c3cc19" + }, + { + "dataPath": "params_shard_517.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.85.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "f2c88644641effa619ef7449f96213b4" + }, + { + "dataPath": "params_shard_518.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.85.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "b59d90447911ea8e84d9afdd01b95973" + }, + { + "dataPath": "params_shard_519.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.85.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "3b457b753fe0b54c32ecf3bcb84532e1" + }, + { + "dataPath": "params_shard_520.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.85.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "4b531da74e59a022f31ec94c68e335d8" + }, + { + "dataPath": "params_shard_521.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.84.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "b17c96e36545f148a4848f2c2bbbeee3" + }, + { + "dataPath": "params_shard_522.bin", + "format": "raw-shard", + "nbytes": 33342976, + "records": [ + { + "name": "model.layers.82.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.82.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.82.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.82.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.82.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.82.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.83.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.83.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.83.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.83.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.83.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.83.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.83.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.84.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26772224 + }, + { + "name": "model.layers.84.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26780416 + }, + { + "name": "model.layers.85.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26788608 + }, + { + "name": "model.layers.85.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27837184 + }, + { + "name": "model.layers.85.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27837440 + }, + { + "name": "model.layers.85.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30196736 + }, + { + "name": "model.layers.85.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32293888 + }, + { + "name": "model.layers.84.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32294144 + }, + { + "name": "model.layers.84.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33342720 + } + ], + "md5sum": "93a6f98a7aa4dfc8a645dba01f182b2a" + }, + { + "dataPath": "params_shard_523.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.86.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "7032efda6bf4cb73d5037ab66b6ba745" + }, + { + "dataPath": "params_shard_524.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.86.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "1490c992a46a4f480174e06ebaead314" + }, + { + "dataPath": "params_shard_525.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.86.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "0b7da9d3baddf7f57c567232c243c3b5" + }, + { + "dataPath": "params_shard_526.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.86.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "46f2b8b18be9b1195473a1af290b7bde" + }, + { + "dataPath": "params_shard_527.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.86.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "973d85f642a175c347054f8d16964c42" + }, + { + "dataPath": "params_shard_528.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.86.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "99e7fed59de6c9a4574e840951653531" + }, + { + "dataPath": "params_shard_529.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.87.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "07dee75d27babd858b071bc336e24983" + }, + { + "dataPath": "params_shard_530.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.87.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "011097798874d52f43a7e132b78349df" + }, + { + "dataPath": "params_shard_531.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.87.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "98d5f36e3b3af0a078ad2847759b0c36" + }, + { + "dataPath": "params_shard_532.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.87.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c7f8eb4432982413b85abc315443ad1f" + }, + { + "dataPath": "params_shard_533.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.87.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "39e5a28fdfe8890cd0d561478b967d4a" + }, + { + "dataPath": "params_shard_534.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.87.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "dea73fdf08661dd9596c2be0f851f668" + }, + { + "dataPath": "params_shard_535.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.88.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0ffc7c0f366917a4516de6d6f070151f" + }, + { + "dataPath": "params_shard_536.bin", + "format": "raw-shard", + "nbytes": 32294144, + "records": [ + { + "name": "model.layers.84.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.84.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.84.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.84.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + }, + { + "name": "model.layers.85.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233920 + }, + { + "name": "model.layers.85.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21242112 + }, + { + "name": "model.layers.86.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21250304 + }, + { + "name": "model.layers.86.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22298880 + }, + { + "name": "model.layers.86.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22299136 + }, + { + "name": "model.layers.86.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658432 + }, + { + "name": "model.layers.86.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755584 + }, + { + "name": "model.layers.86.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26755840 + }, + { + "name": "model.layers.86.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26764032 + }, + { + "name": "model.layers.87.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26772224 + }, + { + "name": "model.layers.87.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27820800 + }, + { + "name": "model.layers.87.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27821056 + }, + { + "name": "model.layers.87.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 30180352 + }, + { + "name": "model.layers.87.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32277504 + }, + { + "name": "model.layers.87.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32277760 + }, + { + "name": "model.layers.87.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32285952 + } + ], + "md5sum": "879bcb857305d3871849a0100fd054f2" + }, + { + "dataPath": "params_shard_537.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.88.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "5067e718387c13dbfbe18c933973e7a2" + }, + { + "dataPath": "params_shard_538.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.88.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "db5bf828d25d1401b4997fc2541fe1e9" + }, + { + "dataPath": "params_shard_539.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.88.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "ca824894e4c08ac34ae9e4c3e8b4583e" + }, + { + "dataPath": "params_shard_540.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.88.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "12bd88f068131928cd4cb69b847c8471" + }, + { + "dataPath": "params_shard_541.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.89.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "350e8d912ed8337d98213b8fa9c0bc11" + }, + { + "dataPath": "params_shard_542.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.89.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "4cd3e4cb24cb6a7f2822aba01935feb3" + }, + { + "dataPath": "params_shard_543.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.89.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "9428af54d94ff0eeb2384ab60d68a70d" + }, + { + "dataPath": "params_shard_544.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.89.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "6461a51cc69fa18a9eea7cdd898bd292" + }, + { + "dataPath": "params_shard_545.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.89.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "3e2cc6e0d681cc20645d74e69bd01f7f" + }, + { + "dataPath": "params_shard_546.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.88.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "135ad66f6869c2fb3ca1fa90fd2d87e7" + }, + { + "dataPath": "params_shard_547.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.90.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "49684b1c788bcf011f3255a0014594a2" + }, + { + "dataPath": "params_shard_548.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.90.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "0386232e021fa69b2b070fbb6fa388d0" + }, + { + "dataPath": "params_shard_549.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.90.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "189032a4ee37c3c1d633d267c81f591d" + }, + { + "dataPath": "params_shard_550.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.90.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "90b9519c0151371ca3625b2bd659b5df" + }, + { + "dataPath": "params_shard_551.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.90.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8bcd1ec4fac7f8f444593112135d6c8b" + }, + { + "dataPath": "params_shard_552.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.90.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "093baa21fbad23fd7f89831345e48873" + }, + { + "dataPath": "params_shard_553.bin", + "format": "raw-shard", + "nbytes": 33326336, + "records": [ + { + "name": "model.layers.88.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.88.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2359296 + }, + { + "name": "model.layers.88.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2367488 + }, + { + "name": "model.layers.89.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 2375680 + }, + { + "name": "model.layers.89.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3424256 + }, + { + "name": "model.layers.89.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 3424512 + }, + { + "name": "model.layers.89.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22298880 + }, + { + "name": "model.layers.89.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24658176 + }, + { + "name": "model.layers.89.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26755328 + }, + { + "name": "model.layers.88.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26755584 + }, + { + "name": "model.layers.88.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27804160 + }, + { + "name": "model.layers.88.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 27804416 + }, + { + "name": "model.layers.88.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 29901568 + }, + { + "name": "model.layers.89.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29901824 + }, + { + "name": "model.layers.89.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29910016 + }, + { + "name": "model.layers.90.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29918208 + }, + { + "name": "model.layers.90.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30966784 + }, + { + "name": "model.layers.90.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 30967040 + } + ], + "md5sum": "643f64b69f4dcc04bfe4558720b12849" + }, + { + "dataPath": "params_shard_554.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.91.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "799c9b173ea7f4e9c92565ce08fd46f3" + }, + { + "dataPath": "params_shard_555.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.91.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f16e46b90f3b4c0d45f5ec898433c2a6" + }, + { + "dataPath": "params_shard_556.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.91.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "eb0dbf73b51fa7315d55aa83fda0e07a" + }, + { + "dataPath": "params_shard_557.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.91.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5c23c345feb1097839f64c2a01ec4dc3" + }, + { + "dataPath": "params_shard_558.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.91.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "e873b847591bef8142aa24d38e5a0af8" + }, + { + "dataPath": "params_shard_559.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.92.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "542b413501fc49230b352414b4e41edd" + }, + { + "dataPath": "params_shard_560.bin", + "format": "raw-shard", + "nbytes": 16777216, + "records": [ + { + "name": "model.layers.92.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 0 + } + ], + "md5sum": "8cd67e98b4fc001f6fc742cf3fc484f1" + }, + { + "dataPath": "params_shard_561.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.92.mlp.moe_down_proj.q_weight", + "shape": [ + 128, + 4096, + 192 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b5535767bb03d45c4f84f950a0218950" + }, + { + "dataPath": "params_shard_562.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.92.mlp.moe_down_proj.q_scale", + "shape": [ + 128, + 4096, + 48 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "26566f06eaafca603c111587c2cd30c9" + }, + { + "dataPath": "params_shard_563.bin", + "format": "raw-shard", + "nbytes": 805306368, + "records": [ + { + "name": "model.layers.92.mlp.moe_gate_up_proj.q_weight", + "shape": [ + 128, + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 805306368, + "byteOffset": 0 + } + ], + "md5sum": "63c2a1d3717261a1e16da4382b207170" + }, + { + "dataPath": "params_shard_564.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.92.mlp.moe_gate_up_proj.q_scale", + "shape": [ + 128, + 3072, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c8cbe07aebfcba33f7d31be2d26cf106" + }, + { + "dataPath": "params_shard_565.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "model.layers.93.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "59f8973c9c8d36019e42a647cfe63a6b" + }, + { + "dataPath": "params_shard_566.bin", + "format": "raw-shard", + "nbytes": 33080832, + "records": [ + { + "name": "model.layers.90.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "model.layers.90.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 2097152 + }, + { + "name": "model.layers.90.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2097408 + }, + { + "name": "model.layers.90.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2105600 + }, + { + "name": "model.layers.91.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 2113792 + }, + { + "name": "model.layers.91.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3162368 + }, + { + "name": "model.layers.91.self_attn.c_attn.q_weight", + "shape": [ + 9216, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 3162624 + }, + { + "name": "model.layers.91.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 22036992 + }, + { + "name": "model.layers.91.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 24396288 + }, + { + "name": "model.layers.91.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26493440 + }, + { + "name": "model.layers.91.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26493696 + }, + { + "name": "model.layers.91.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26501888 + }, + { + "name": "model.layers.92.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26510080 + }, + { + "name": "model.layers.92.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27558656 + }, + { + "name": "model.layers.92.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 27558912 + }, + { + "name": "model.layers.92.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 29918208 + }, + { + "name": "model.layers.92.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 32015360 + }, + { + "name": "model.layers.92.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32015616 + }, + { + "name": "model.layers.92.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32023808 + }, + { + "name": "model.layers.93.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 32032000 + }, + { + "name": "model.layers.93.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33080576 + } + ], + "md5sum": "4572cbe6d2a09b7212038aeb793c84ab" + }, + { + "dataPath": "params_shard_567.bin", + "format": "raw-shard", + "nbytes": 21233920, + "records": [ + { + "name": "model.layers.93.self_attn.c_attn.q_scale", + "shape": [ + 9216, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2359296, + "byteOffset": 0 + }, + { + "name": "model.layers.93.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 16777216, + "byteOffset": 2359296 + }, + { + "name": "model.layers.93.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 256 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 19136512 + }, + { + "name": "model.layers.93.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21233664 + } + ], + "md5sum": "16513a2d97d2661e2a93a0781d1cef5d" + } + ] +} \ No newline at end of file