{ "metadata": { "ParamSize": 1227, "ParamBytes": 147102545920.0, "BitsPerParam": 4.071739133529121 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 311164928, "records": [ { "name": "lm_head.q_weight", "shape": [ 151936, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 311164928, "byteOffset": 0 } ], "md5sum": "49c3b0954736eb6a33ff3dec7fcaca16" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 38895616, "records": [ { "name": "lm_head.q_scale", "shape": [ 151936, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 38895616, "byteOffset": 0 } ], "md5sum": "345e9f36ec085efd96ac768f0beb1042" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.93.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "fd8225ed2a42829150e751f0002e1ecc" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.93.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5e8d593f2114523c5e1cd8fe505dc2f6" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.93.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "ebb49f6142625849d6b928fb985d62ed" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.93.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "219e8807d6787deceb3712f378354312" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 311164928, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 151936, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 311164928, "byteOffset": 0 } ], "md5sum": "8cad912eaedfefdf597a6f99cc382148" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 38895616, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 151936, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 38895616, "byteOffset": 0 } ], "md5sum": "adc47ce7e1a766a7f87164b59252d91a" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.0.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "d36693a6053dfb9f898e69d548fcffc6" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.0.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c8c36f853bf9509e033656929b553b79" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.0.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "30412dac1343f9d79676c6eecdcb1c8c" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8bdca16a3181822bf5be6df86bef3ffb" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "caa017fe25a224ce5288a23358f6bcca" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.1.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "8a4936af6bfcbf4889b694578ba8258c" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.1.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "89e8a1f2c575d32e636c1aa5b6443e02" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.1.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "7af20ffa343610cdf62738aad092aa9d" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8a56f3a0e359e1b2cbb4cf68ab45ba8f" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b9578ce9f00da1f66912fc9f73a35ebd" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "12d43cfa57cb8a4aeadcdd0769e05a98" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.2.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "838fb84680ee50526e3c02c8b7fd73b9" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.2.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "bf1d8bd3767078344bd10694b4149799" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.2.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "3b43af65dd340031faa235cc1b3bc90b" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "62258fd76c508818cd0af4c5a914edcc" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4058c5a2eee516ad7054755556758f5d" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "fea14b862b67aae448d5295cc241d567" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33350912, "records": [ { "name": "model.layers.93.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.93.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16384 }, { "name": "model.layers.0.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24576 }, { "name": "model.layers.0.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 1073152 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 1073408 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 19947776 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 22307072 }, { "name": "model.layers.0.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24404224 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24404480 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24412672 }, { "name": "model.layers.1.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24420864 }, { "name": "model.layers.1.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25469440 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 25469696 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 27828992 }, { "name": "model.layers.1.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 29926144 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29926400 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29934592 }, { "name": "model.layers.2.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29942784 }, { "name": "model.layers.2.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 30991360 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30991616 } ], "md5sum": "379cfe4eb8409de529d12841eb9fef32" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.10.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "c576a3b51ba8717fc4780dfe8b71bb2e" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.10.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9d831b22f30ab42f1ded997a750efa6f" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.10.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "cf78e958559217ad67bf73f8d7e804db" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "907159ed022363ea79728ef3840355b0" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.11.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "2b5e7376a5a8c10dd76f3b1ef801246f" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.11.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ae52f5d95f711aec9cd661650f96e20d" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.11.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "2237fac4ce3b5c0187ee9453517343cb" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "25842ab122f315362a74960fc5d1247f" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "0ff5347f8096af9427a3d853fa3ab14f" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1e765749a3af3f8b85f69a112fb16daa" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "9e1e36b8a9c0a5657ff13b8b0b9bafe0" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.9.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "10c459b7519fd0e6868ec63a94383026" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.9.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "781f050e77b5359dd94e6e1581200803" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.9.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "b4ddd104ff51a6a5aefb643caea93854" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2f1906f2647a5eb78e9ab8d97ad9f73c" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.12.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "223b09cdf56d2cd2fcce939ed3e7ef20" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.12.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "34ab923306c4452d3f8b568dc5a679b2" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.12.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "c8d5faf3088a589b204ab11aeb1ebf29" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "69411658bebd796cd43eaa37a710adaa" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "006398e9617842be0bb11e9617e9d617" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 33080832, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 2097152 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2097408 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2105600 }, { "name": "model.layers.11.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 2113792 }, { "name": "model.layers.11.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3162368 }, { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 3162624 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22036992 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24396288 }, { "name": "model.layers.11.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26493440 }, { "name": "model.layers.10.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26493696 }, { "name": "model.layers.10.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27542272 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27542528 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 29901824 }, { "name": "model.layers.10.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 31998976 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31999232 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32007424 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32015616 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32023808 }, { "name": "model.layers.12.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32032000 }, { "name": "model.layers.12.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33080576 } ], "md5sum": "2ea02397406661cb684ababf3969826a" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.13.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "7f1f01bc4ccca97df2761cc25a447569" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.13.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "51fdcb2a682dae7b5b33c02e64116767" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.13.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "9e9c3fe2081940de4764f65c9f818b3a" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3a484efed9a56d4a7f832c1e82c1f0f8" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "520a57b72516b46ce6ffd9f13049fc0c" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "c90c0e6b4a49da37db29226c465e404e" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.14.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "f7c00428193b1ddbb8af34996bdfdabe" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.14.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1210d0f0578b42bcb7aa59643b0f8369" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.14.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "b175857c9d2b2c6762c7604c498721b7" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "cde13bb99b0043db26d088f0a970eb42" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "cdded077d045679ac2c8fd71ea1934f7" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "90f42b5467eb02d37d6db45fcd91c700" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.15.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "d376e4d793c895c7fa1bb47a40ae4e9e" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.15.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "cf5cda0b5ed6f1382e29b7b6bc7f8707" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.15.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "90d3e8595e6f1f6b889b923f42b88d1e" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b7fa1c239dc8556124ffc8e7003c59c8" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "54aace7a88efdb3edf2c5e4434ad4192" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.12.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.13.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.13.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.13.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.14.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.14.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.14.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.15.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.15.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "81bd9bcbb7530a793139c6a5b875d882" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.16.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "700d6e83987e3dadd76accc290206835" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.16.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1b651fd0b8f5137ef0592a8e7ced71f8" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.16.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "792d0ef2b8a9f98bc3f877a9aed936ac" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "13b14228b8c39d3ca33cfd7d1a3f5953" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "3498e1f2f343405d952e72397378741d" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "1a253341b3f1a9f1129e16e6c614fae9" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.17.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "bcfc1ade5f2dd2ec80f2810642c313ec" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.17.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "013e7476547b0ecf7fe0775c609f2ced" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.17.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "c65eb8c7bdd1e98bf2cf47466cc099a5" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "20551fdd048a008087ecc24e49c97c0e" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9f62f8669c29057848728709241a72ba" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "d97c04f4d737b3c47b16f6e56522938e" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.18.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "2f50124b6f9b66fbdfdecb5c4e4f7801" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.18.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9cd3a8f2da2484d14a0f8c80d4759f5d" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.18.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "ec9834ea811eddce53859b5f5a469230" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0dc13913b4c73034143872739ad6acc1" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "3ee5711fcef5b3ca940447f1ac950857" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.15.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.16.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.16.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.16.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.17.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.17.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.17.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.18.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.18.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "9a28fff298231675e9a5a7b4ad1eaca2" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.19.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "0277c79221f0528841235631996f6ae4" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.19.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "15b4a108eef3bd5371d483d53aab4f4e" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.19.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "1de5e1be0f0fe27b40138811eca51f7c" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "44da44185da97e7a6486f97a267c6d2b" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.20.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "5641766fa3e489246ad5f60f249d5267" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.20.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "905aae6bf9cde42fff264777fdaf4811" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.20.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "d25ac5a995acf84a0f4e73ab59403698" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b745fe6e37808ea868154af32ed9de7c" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9c8f5a10b14594e51efecdcdcec02791" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "9667a323de6a69b2bf0c9111ea7b5b1f" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2a7b4c55b8f3fef7b00080fe0e7caca5" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "bec1b16401af586f5741a0d4e35003ad" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.3.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "629ad30cc1c8f1b593e10acf13db5fa9" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.3.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c10dd8a99f3478940aa5722e176ee46a" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.3.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "0574e9db8db4147edcd416633416c752" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8f1fa8d93571ae18ae11e5b60d92926f" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "332ed4906d7a664b8e56b50a557c53e8" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.18.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21250304 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21258496 }, { "name": "model.layers.20.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21266688 }, { "name": "model.layers.20.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22315264 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22315520 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24674816 }, { "name": "model.layers.20.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26771968 }, { "name": "model.layers.19.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.19.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.19.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.3.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.3.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "e55c6d34a5ba3de5c836f77bce075950" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.21.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "ca1b22535abcb03936d2e6712f8011ed" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.21.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "cd628f3c0ae4262d83ac6947f337e050" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.21.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "f302af2d1c7c234155005ce5c02b18b0" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "159da4a92040b84a4e94e900050a3336" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b4c6f17a11c486bb7a92006e40e8f816" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "d9653322480e21fe965aec029800d999" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.22.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "a6bc553d41c252c092ac1ea615b44527" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.22.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "82edc857cb8a49526eae7007d8157511" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.22.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "0e048f47ce348949ac6b11724e67dac1" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7e789cdfece66dee6dc0f8984d415a8d" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2d12638f940f561111f3df1879685811" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "5ffef5612b306ed206a392080cd2bf00" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e27dfb996c4b6edaf3d4346a5a66b6e2" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 32294144, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.3.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.21.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.21.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.21.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.22.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.22.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.22.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 } ], "md5sum": "21609c9b6fee318d882f9a3eb71ff032" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.23.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "c5bdb1279560e163233475a78583f12f" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.23.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8202393134643b721ebee0fc26a1d551" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.23.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "9fe058b2e9932d385b6479c2c3cc9616" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d4b07442e32938c6bd17289089d6311a" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.24.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "1419099aa5e154d3ea5ca9b34c1617e7" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.24.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b685f328a63ad3d6eb8fdec3acd3b8d5" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.24.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "a035dc2714222cbed8463c9b0e0b06c1" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7fdad350a013b0f24f5ec2bb6fa66252" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "acbb764e22e27c70c591d87126a7b63c" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "31c3d6970a040aeac955ef58c7c11d3d" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.25.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "ca6bf27078b81ae6071f0c241b3ffbcd" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.25.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "bf12cd07bb188046858634606d780f36" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.25.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "e5d910734db98d3998e10fa97023f9c7" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a2538f46fa31980f522182a94ca83a0e" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c5e4099cb9657bdf9ff6aab1b4b49168" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "a3c6b42693742594cae4d43a5105b0e7" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 33326336, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2359296 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2367488 }, { "name": "model.layers.24.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 2375680 }, { "name": "model.layers.24.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3424256 }, { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 3424512 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22298880 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658176 }, { "name": "model.layers.24.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755328 }, { "name": "model.layers.23.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26755584 }, { "name": "model.layers.23.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27804160 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 27804416 }, { "name": "model.layers.23.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 29901568 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29901824 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29910016 }, { "name": "model.layers.25.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29918208 }, { "name": "model.layers.25.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 30966784 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30967040 } ], "md5sum": "8c778bb87c295204e39a79a1d2cc2eb8" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.26.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "69f0f8d26e567d9f56322326029c2514" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.26.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7faa31e2563f0fe28e9ba94407698815" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.26.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "db4dd7be6af5b382094cef8ca4ad6690" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0781a544b62fd35d8693f6524ee16f8b" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "1c77dcd94cfadea6156744b17d4f4fc5" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5f4f69d141ac0667c57c5c43fc55286a" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "6e72fc5f8ee56fc5697b32e59e86d2b0" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.27.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "5154cfab16204e9b71080e97aeda200e" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.27.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a55bc23d4942027859f3eb9aab08dc6b" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.27.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "f949f92a144e2cae320b452547938095" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a8e770c3b044cf75c0f4666e53280d37" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.28.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "349ef4e3b817b1d0b48c491cef01ed69" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.28.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6553a8b15149cc8156b9142eb83bf129" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.28.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "90201da63053f54d32d674e02f9a1856" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.28.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "94d9f1da224a95d4fa487b4167b30cb7" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "77cdc8c09bdc4deb999d925500961e64" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 33080832, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 2097152 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2097408 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2105600 }, { "name": "model.layers.26.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 2113792 }, { "name": "model.layers.26.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3162368 }, { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 3162624 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22036992 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24396288 }, { "name": "model.layers.26.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26493440 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26493696 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26501888 }, { "name": "model.layers.27.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26510080 }, { "name": "model.layers.27.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27558656 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27558912 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 29918208 }, { "name": "model.layers.27.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32015360 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32015616 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32023808 }, { "name": "model.layers.28.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32032000 }, { "name": "model.layers.28.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33080576 } ], "md5sum": "f934c26c2f5e9ef950749c257e680139" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.29.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "ede0a10491dd0b9f9f0a430778fcae98" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.29.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "95e71a80beb8e2bb6f0a1de230860f33" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.29.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "84b214223244e8977e58966b71e704a1" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.29.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9908acbb786119cc7519c38c1a8a9274" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ac7fc9f48719ad0cd428aad2edcc560d" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "e53c1e1056d5099f83ecf2ec446aa988" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.30.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "4973c663fb57c89c2e70a7704ff8d458" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.30.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a4eba21d3179e64b7d3fba11ac9558b6" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.30.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "35cc0c813ba0c78bb885c622bcdc144c" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.30.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c1fc1fbbe2f79cb741b2fa7d46670cba" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "cc578f665f594e4b66125502ef94c805" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "5f9173962938728ba91651c4ea31e298" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.4.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "ebf6b221bb080b70f04e6c3c5dfee5b4" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.4.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "28499e064889c5b47a453615edefdad1" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.4.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "b388005a6319591e4cc4ab6230724141" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c470071127d43835b31853fe3e6499ee" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4d2b772df5011ff8d9cdda77d683fe16" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.28.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.29.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.29.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.29.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.30.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.30.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.30.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.4.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.4.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "56caa5a25aa8d847397ca68f5be5606e" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.31.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "882f137523bf9293f113a8ba4993869b" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.31.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "414eeb0818d46d8403c2d9bc0b74f78d" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.31.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "ea1a196d395e3a88d437ee79211106a6" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.31.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "04af4eeda1e9d785ec31175c338e5d47" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "19c5a48111293c755936232d7ea3af80" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "219133a8d78868ca3eab74d691cb179c" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.32.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "bcc4ba3022d264ee1e3ac65d1f236149" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.32.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3e51f57f2afa34cfe56531f83f3611d0" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.32.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "0e22cce52d6580d14c701639cf9bc919" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.32.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "de8ce4cbc17b217f50ab5a047d35ee49" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "123e5a44bad7a5622dca4f8b5d336648" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "22623179ee18b03c91ddb309f6e0edfd" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.33.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "d788c17b9d874f8d0fb21efa43b771f1" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.33.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "66a9fb15a9ebf402f168d42d15f5a75f" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.33.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "eae49b3250bc6539cac280ed71580119" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.33.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "acf4cb253a88a47a34dc5de93a5f7960" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6d31a0d0dffd23fd86a1997396b0a821" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.4.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.31.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.31.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.31.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.32.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.32.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.32.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.33.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.33.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "a8c77508b2f4d22a8fd9e827b1f117e5" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.34.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "1fc5e5fccde3cc58a90ac71792f79ca5" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.34.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "48b995dab26736b9895a9fd74d1f17ff" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.34.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "e12c69cbd5f78db6a9825597066453cb" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.34.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a2017894a1e4331f63a37f8f0206cbdf" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b6816347e3290747fc5febdbc26936bf" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "67d4e81b20a833979902f2619190e40b" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.35.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "081ed3d637640b084334cc1903a16b08" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.35.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2b06dfae638602320e50b26c15ab3ddb" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.35.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "08da7d1490be76ac476b1bb5f1b2a553" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.35.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "33e97e8267287332c82a3b7f3a99e57a" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "71207bc1a338d1779f79c33dd1cd4a6b" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "0fb3d43cb6601d67563468c331d01392" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.36.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "12ccaaa711018ca4b095b33302124644" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.36.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c861e93d19632db3df7ea5a113c672ad" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.36.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "be9839737f674f32c90f52fe5a18eb4e" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.36.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "935bafd32459c8c4cb34439a6e90b75e" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "51d350b820d566bcf6482807aded6324" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.33.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.34.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.34.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.34.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.35.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.35.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.35.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.36.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.36.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "fd26bae9c20ece533cf08efbfbb2a49c" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.37.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "7fc95edfd284f39ccafa6a8cb2259872" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.37.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "fc73f9f9a36af054698d51460437e485" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.37.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "30309c1e9bc1a66f85ec71a5c4ca63e9" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.37.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "32d0c77914f5eb2007f29d1be5c7cb7e" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b7b8dbaf476c3c0ec1dc50d32271e6de" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "73ca4ff4a8f259ffb480471a8ec7716e" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.38.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "c3912a18a8a21c21d63d87866cc929f7" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.38.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1426bb98bdd4d7690a511bd6b2c4f362" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.38.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "68fff7bcb2bead08ac50d62769695b71" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.38.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3613ca08241fb0599c0ade67ff5b8e59" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0685ab38e3357522ce91fbf8890fd265" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "ac7bb93df129f9ab9c478d847d9ba548" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.39.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "cabc7a4c29c9b74919c56dd88f13c474" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.39.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0fc30a5a2d62eadd67f0368026f97174" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.39.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "1f3bee5416306daa92fdee9139867dd7" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.39.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b1d2d47a2b8726cfbf54e12d66ac7265" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ca382d80116eed53a3dcb1310b5ef3d2" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.36.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.37.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.37.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.37.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.38.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.38.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.38.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.39.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.39.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "c5f8739d6e13eb36028d9d888d6a9080" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.40.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "20ff9d1faf3c61d94221d6531fbdb395" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.40.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "04f6ac3c6e95187ee2afb4d2555d993f" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.40.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "8515fb10b86ca9f1fd6e21f71d4ac2cd" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.40.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8f9b240ee5f81b113e3c26d14518173d" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "32b506795d489a897fb24aceaab4641f" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "2ebf5a53b9c13fde30cc187c5c4b727a" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.5.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "78110ae4984a8ad74214b110b627e376" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.5.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "92e2865d3ab678b57cb90341f9bd00c9" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.5.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "877ee055f84a75a5471b889ad712f3f8" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "52aa56ce64a33417a15c1cc8447e4ff0" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e5a512fb10f8b07d300cfe389cd20c5e" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "b8808089a267bafabfe2b4f93add2761" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.41.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "9918af051dde08e546a91512a071f1b5" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.41.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d6d1220221508c6d11ee9e0a56b59cf8" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.41.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "b59512d04438f415bc980da9b3746798" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.41.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3f0f830f02c7553324409853d9c57ec2" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d8eacd9b3671cd710eca4c1a4339536d" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.39.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.40.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.40.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.40.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.5.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.5.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.5.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.41.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.41.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "a1fa1c3c7895eca27b6a64a0c3ac15de" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.42.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "e78b8c5eb3d27cec6ef3fa757e98a36f" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.42.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "28e9880b4c36926b9a5009cd984232ca" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.42.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "03fc70f04a6cd0f0c75875a588f5f4d5" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.42.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d4f0ed5c8edcd991e37848da0211c90c" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0e4ab065bd0e17ef380cdd0a450ea76b" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "a1abbce8a0fb8e5d7b4f481fc8047c17" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.43.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "d9c4818ef07902ea0db4d762aa6494e1" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.43.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "48614cec572a38dc7669315b5fbfe0f9" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.43.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "65192a7688bcee2b797fc1829197bada" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.43.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fa593cfe15167f092209835f28afa696" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f28151533932dc403b8d1a500b9f228e" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "a85b5b40b13229322ef902c7249febee" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.44.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "b86425c6c8eb25581ce2067559a0e30e" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.44.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5af765539e8b2ddd8182bbf482c5c3f2" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.44.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "c7cce45ba60307ef85fa5af10e4298d4" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.44.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b0c51b283b12db598cfae034a0dea1c8" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0f7aabea29578b8ccfd9641e52379347" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.41.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.42.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.42.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.42.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.43.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.43.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.43.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.44.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.44.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "30a6decfde25b6f13a8c506078fa5196" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.45.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "ffb7311ad5aa3284f20372c6e56adbd4" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.45.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "52303d667c170fdb30dcfd2d5a0e3bc1" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.45.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "10c503ecf6354cb0326de21db4861d25" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.45.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e8500afb1b26830c3c476cbd82a68636" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4d9d25cc24463f82cfe690fe51c25721" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "7e63c4ec65789082ff7e340aa813e46e" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.46.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "da57448197d768de1cbe782852f1e7dd" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.46.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "4bb64f36eb20b2052c7d98709e37e0ae" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.46.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "81fcf8bc795ca1a1629b0153b9569170" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.46.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b232c989ce7025540a7ed40de8733562" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "7301702e04cb7fb6fe0c5ed864ff8025" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "32a9b09925d1679cc599bc484326e1a1" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.47.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "938ba593c5c35b71c065bd2b61d2df09" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.47.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b35b48ed57eb81761f45f5d22de79364" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.47.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "7e16b9b1e7469a3e4c7c16fa9981897c" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.47.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7a25cd91321536032f61f5e462055e5b" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8188bc9eeb6bfe001e5fb91b39b62136" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.44.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.45.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.45.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.45.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.46.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.46.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.46.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.47.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.47.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "ce390bb4a7f08e20cd894e374b8ba568" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.48.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "196853257bb440c5eebbc1af5983ad12" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.48.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "dc648bfab11d9119ffeefce2eb19f48f" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.48.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "b897a2fd9033854ae7497568ed1af864" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.48.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e6d67bd675fbe16f684278d4e0c438c6" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d93bf54322de4c9c5057927690cd0412" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "127af0b08e887a4ac5e886ac3591dff0" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.49.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "dff523b59345eccd43f11528ad05a385" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.49.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3ff6d0b3fba287a0a295336b14ae9e4d" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.49.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "0bcf570a36f404880e1a2f5b4a4049e5" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.49.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fe183e45d7041652c3f9d0a64eacc63c" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a7d19bde5cd46c67622a910326632b3e" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "41ba57ed359c0e5fa5cd71b2134698ef" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.50.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "6909c91fafd3ea0bc7e784e1bbee306d" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.50.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "027636ff110b5fdb8bd2fa61a00ae030" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.50.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "0ede3e9ab1690654a5079f5baa18c4d1" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.50.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1033a02ae539d5394b38e4fd7b238536" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bb6b32c9c8e8c2bf2a16972d47bbd368" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.47.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.48.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.48.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.48.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.49.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.49.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.49.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.50.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.50.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "33d891a384e49b99f3fc9b0f2354109b" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.6.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "7f82fb64eac105c1058c6e22deb66562" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.6.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f33d5af42e9a0b2e1b02b3c30815fac9" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.6.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "33712dc572723f9be6e5aa9ced658a42" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "cc88beb6d1478b9e2ebc9034781f8df6" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1d03bc90f41b87bba407126b9e725ee8" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "df6cd12208b791ee37222232430babb3" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.51.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "ed5103ea502c730b9e499127ceccea5a" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.51.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "11e0f417311c2f2eef46319dee4f9a47" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.51.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "8ca4024190b0ca7c9105162141371538" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.51.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "dc08df6079f1528f8c5fcfb1233951e0" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2f33c9fc4d948b991333babe984ee85d" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "671befc9df60b1955ec41a036e72ab7a" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.52.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "61a033695f1f48ed44c131a896a1e6ea" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.52.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "cb840a007909a6282e8a0422a8d65a05" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.52.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "04bae73ca885a20617bac663656513ff" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.52.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1701ef3e0931a116f38eac09d83cd014" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "15f71758249f22bc456bf7c613dbf2fd" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.50.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.6.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.6.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.6.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.51.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.51.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.51.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.52.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.52.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "24e2be609f4b53ab9ebf876e8dd659b4" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.53.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "c5a3fc5c0d06fa3e81d5833fb5c3b649" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.53.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2aa0818b8a3ee9694ef9ef8a4b3e5040" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.53.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "45d8e0adf1ce7af45ab5c8720800bf85" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.53.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "48c3df8b14b0d8ee164350a68f847359" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e08fc202daa58984fec629e971819093" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "5263a4f23e819d5189c294fda8ed9375" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.54.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "e056f50cb9f87355f0d3f53080e53001" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.54.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "cc7e9047ba591710205543bc39d9f1fe" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.54.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "4200048f12e0370baf0b9a8ecc5384fd" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.54.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "30302c8a96f38bfb9b49f979d0a6bbae" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a50ad1cab808cc0ce15f2a8e8cb81572" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "3b1d980e31462b758447fa298e4051d9" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.55.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "8f49c803a2c7357452406425ed8f500d" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.55.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "35551943094f0b6d2e7ce2809c3e51d5" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.55.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "aa239436d8266e80c0bf3041162c21e3" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.55.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ddd46a4b52c306b1a7a0b396fc51cd5e" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "3a76734ddb3875974055fdd94bf41afd" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.52.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.53.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.53.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.53.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.54.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.54.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.54.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.55.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.55.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "4e7a991edd9ad0905de6b5a9a7b6fbfa" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.56.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "558638976deea622fe73b8e046af12f8" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.56.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "492a50e5e4d51a4b90442bc2b54295df" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.56.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "5d52ac18165df6b65dc6442bc0b485db" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.56.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3ddcafcb8aaa1dd45c03286ac38d3f49" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "908330b993f3093698843bc40fd7a927" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "6a3a521762d6a23d40cb9cc6460a8ec4" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.57.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "ca078b2cbeac59c324702927cd952d40" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.57.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e5198d88a32adaef32543df07dfd9c29" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.57.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "1e83aff99b2f97b2d80341ad028bab49" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.57.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "52c15025316096f4bd0cbab24cfb2057" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8f2184360690877a6b487391f17738fa" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "cd374a18deddac16d57e2d3b4d9ca159" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.58.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "f84068edb8d103aea3014be2d34d89d7" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.58.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1b010b022f66ee375e5e0fffd97db79e" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.58.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "d926a4c46b509b180c1a143d5c6c14e0" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.58.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5cd7eeb3e3222064a47e46f9ad56a3f6" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e073a526566d7f3f3b36070a2c589425" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.55.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.56.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.56.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.56.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.57.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.57.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.57.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.58.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.58.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "5a521a0697abbd7df68065871d1d88da" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.59.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "0bac5fed7a0f9ac0fe562a10e8fb90f6" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.59.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "814cdc802369ad4b61a3d0a3d0a50db8" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.59.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "a5299804d9c2810aeff0968acc27386e" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.59.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b9238b2973bfc74b1d4dce739e3741b6" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a156b96fab88b2c903d1df06d74da2db" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "c7949e14535e7bc6fbed3a8dfff21308" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.60.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "aba827073b7bd386d5d9af9c2377911b" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.60.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "07875154acc05674eaa5c909f5dd96b3" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.60.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "8cd1d39e03a0d7ceff97dad13bed3813" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.60.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5f57dd46f9da560de0a32581d30f54e0" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "66628df84423854f04c92d0dc05e95a9" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "86f128745e098b132bd19f57fcc1ad90" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.7.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "9377ebede57167ad5edc9b448d8dcba2" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.7.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6b4dc432d93a6ca5f06cc73c3ecc1aac" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.7.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "79ef7139c4e5f77dac5db38a36f6c068" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4ef70ecdca9da5ff62a72696b1f87906" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a59a126940059c8cb66aae41776c3e15" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.58.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.59.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.59.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.59.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.60.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.60.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.60.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.7.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.7.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "c7884aabe11e4cdf285417d005eb42eb" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.61.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "8368da4871ec1a93fa62028750c8f80e" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.61.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "46f134043c4d8aa01aea943edfba374a" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.61.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "beb6ba13266b186813becfcad01430b5" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.61.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d684a38d772976d2197d74c1987e4cc6" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "82b4aa3d68cc82ddec949f6b485eb895" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "b997f74d1159b97bbd8e1dede51c2946" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.62.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "c767ed457fd31fc8c3890128b4920d3d" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.62.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5eb02828eaf438e8d184b57e93fda67b" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.62.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "218947d3dd467caafc8a3844b0bd80aa" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.62.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7fc7c9f4bcde650ef105421536dcd087" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ffc331ff322388f2cb6462c6fba69097" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "79963455e31ea4f045c8b014e0f7584e" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.63.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "26d0c80443fd3efbd51878611ea0a4d0" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.63.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "4bc327bb13b5dd472a84f9c6f4e12538" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.63.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "6c3e6a496c9a0ab379c526991d0712dd" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.63.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b0df3d6faad0507c1430008be85bfc69" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f71b1d97277a3029de57fb26d7daddb1" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.7.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.61.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.61.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.61.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.62.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.62.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.62.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.63.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.63.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "72e07719c566fe537d65231435b7751d" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.64.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "a9b0dc8f6e00c2bd1376f722fe29592b" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.64.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e1b3f2b35722c395322dbb537d3ce332" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.64.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "bef4f4f46822dbadbce1a688516c776a" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.64.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e2b3da75300a7aff5bb39b8f62209c3c" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.64.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ebab4ad46a282ac8f47deae654ebc0e7" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "1ba0907555c3b4299324be3186b4d1a8" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.65.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "cfa8c2f578d954bc76293cc3391e7560" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.65.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9987ad9e4c1e47e06ab3301ef5a26813" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.65.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "0369eaccfb6be9e2aec8beea4eecaeb2" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.65.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "93ca04ba3dad800fd515bd0b6080887c" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.65.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "97f23eaa4a128dbf235dc5ee988b4b64" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "e90341f866885c90427e9f1b685b7fb2" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.66.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "29babffbe63fa005f13edfd2ed1fee38" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.66.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0b64718fa6cc8c1c4047bdd657860339" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.66.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "41f1fa4d80bb1c9a5701f1fb34828929" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.66.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "25e0131a2db6ed7ccd44832023363303" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.66.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c2aba1e9aa635aace8e2ed74c02229f1" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.63.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.64.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.64.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.64.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.64.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.65.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.65.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.65.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.65.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.66.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.66.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "605e6b7673d2405fcd2dd1ae95363f6e" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.67.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "011000359610fea816a22b6fd9449d7f" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.67.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "34a9e8aca1cfd955e6a16270e984a4f0" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.67.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "0a4ea455fe5d076150c36bef66c55179" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.67.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3fcfb38e70a83b0e216fe34de7c5af6a" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.67.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "571e7e3337479624b95e332bfbc09f25" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "993b4bf5fcf95966f035123af9a05c33" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.68.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "6a0322da90d71f605f2e1a6a1a660801" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.68.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b76bd32d431ceff27a1523afa22263ef" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.68.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "9b0c1b3b98789ae724c9099391b97091" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.68.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "517d4b0711129b0e9a419bea796104c3" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.68.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4f0aa2a97f85e5401fcf02ec648b1dca" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "e403a3e46ac404454b5b32b1848efd54" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.69.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "e840e40d35071388a2f5cfef0e176be8" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.69.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ad9311d1cfe692c130e0f3f89f0c55b2" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.69.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "0af7b69080ce40cf75ad5986561b1364" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.69.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a691c71ec610a2d59b1cde71fa79ec2b" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.69.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "21efff74d65058373bfc51ff1aae8679" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.66.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.66.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.67.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.67.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.67.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.67.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.68.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.68.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.68.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.68.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.69.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.69.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "f83479a1f7e328fea30404ac6b486ffe" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.70.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "ddded80b6614b34639dc63265cf20f2a" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.70.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6b9f613d9d84f6a4036d7076deded009" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.70.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "38737901bc1c8f73e62f674ccf97a60d" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.70.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "42b7f0501a320f0aa58ed70bd1c991ea" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.70.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d5bb0550da5f7eb7552fb908357f5db4" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "ef68390b7a3ce606b7fb7ff5a1edfded" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.8.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "79ef2c1f47fb3da3b8546f02621fe11a" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.8.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "22f396af6474f0fef44f555cfd8082c2" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.8.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "67dc5e95e50bfda3cb500d107581cd75" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5c1921267180199439c295aa12f2759b" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "deba617c5d21becc91d43c52ebfa4b92" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "62155ec8dd46296a7609623dd0b56b7b" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.71.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "3046b8a3a883d4b19fb076f000ff684b" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.71.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "01fac5473c29654d05acd9f64d70dcd1" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.71.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "41e3f9afb2547c449a8c916e1d2940d4" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.71.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e69d2dd7e3d9f6e71e13be29026c42ca" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.71.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "90d956cafb210e688ca8c6bd1785c2a6" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.69.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.69.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.70.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.70.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.70.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.70.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.8.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.8.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.8.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.71.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.71.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "5859775ce6ae30ce1e6a8fe9f9e6aa83" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.72.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "9b47627e95b5fb7cac5e2d9ee32f89f0" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.72.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "fdac1098b11c77e5bd7f77a6b7fe49cf" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.72.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "8d1ab2621ba161361695454961a051f4" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.72.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c8b6a03203df9a2e1c8973d83e9422bb" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.72.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "aab01b61b015f354c4c13e3ce019f854" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "35ba2369f292c95da33c63b9310368b4" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.73.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "2a3059f0bc6b4f1b2956140570181a49" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.73.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "69f94b3a07ecb8dd7ac9dd840c0206a9" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.73.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "73e62316687090c3eba3bf966aa3dcb4" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.73.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a67858097d59d5ab0199e850fd9e6f1f" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.73.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "873dc6209b3b34625e096e9572813e0b" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "5c6c982621717167060fc7247ed97b0e" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.74.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "6e93af7021aa4ffce5bd585465abacdb" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.74.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "aa1a37fd57a7984ea69134190fae94bf" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.74.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "986f14cfbf76201119022dcbc4e88b53" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.74.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "afe5a56236fc43fc3313b4ddc107d66b" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.74.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "3108e5d21f6476b88ff6381620cae1fc" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.71.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.71.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.72.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.72.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.72.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.72.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.73.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.73.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.73.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.73.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.74.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.74.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "eee3b426d63f8b92d116f597fbeaad4f" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.75.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "87cc9d2954581654f2a1fc8079a5dbd5" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.75.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2fb2389b1ce2ce9c358f8729fb7ad3ba" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.75.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "e35808f37300322a82abc5c43565f311" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.75.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "22b64b082a0a2812d0241ae2fb5349a8" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.75.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e79f5ea95a53ade0a14297a23f0b925b" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "c83aed8927145606d3f276e7ed8a08b3" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.76.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "984dd48fc54340da60d9e9a95a2a1eb3" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.76.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5d9360453482b0cba82e5e8565fc3261" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.76.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "6e80fe2c6c951623df40d486c64015bc" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.76.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "75d985ead7f17e79d48306f30be0524d" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.76.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c6038ec30f0580368cc0cb4f96096dbe" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "d83775a9c9fab0f772b46959e33a04c8" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.77.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "4f6c5dc64984abbd3b04302ef32d5ae7" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.77.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d5b08b779e9447c2897997148ea89332" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.77.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "933ab4629649f8681dedc8dc8c73e77e" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.77.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "814f041f1354aa5f2ff736c7397fcb56" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.77.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e2b91df2d3777d65cd6da4730db12787" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.74.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.74.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.75.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.75.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.75.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.75.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.76.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.76.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.76.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.76.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.77.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.77.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "6d1a973d50415e100f6ad72edb25fd3d" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.78.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "b09018ffb15893d130746f6952dc171e" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.78.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "08cb6b0d19938ad2796374671f201f12" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.78.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "e606a54043fd9815b0ce099396e6720a" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.78.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "486de45b38490f3725f30dd504489805" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.78.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "fc958aa7b87a1f13b12c009ffc5ab8d7" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "51b0b6c9488b28855ae85dd02273e9fc" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.79.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "16192b2bb468a659ffd8c026cefe66c7" }, { "dataPath": "params_shard_480.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.79.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "031f90387fcfbb065f184c9f83ad30f9" }, { "dataPath": "params_shard_481.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.79.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "3721a36d5ed6508c5ae3007dd2132e1b" }, { "dataPath": "params_shard_482.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.79.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ad3d774a8c7a25d70c3883b6d1746f25" }, { "dataPath": "params_shard_483.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.79.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b1caf6f5764114825ac80fb9bae7e2bf" }, { "dataPath": "params_shard_484.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "65f5bd6ee4331cf3bc3860ce93959109" }, { "dataPath": "params_shard_485.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.80.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "4eef14c77f8df4d9ed7412efefe70821" }, { "dataPath": "params_shard_486.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.80.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "09de32d34f8c6000baeece3d86f0a130" }, { "dataPath": "params_shard_487.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.80.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "05596be2d600f69793bd2ded746a09a9" }, { "dataPath": "params_shard_488.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.80.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "02bc36a404aa76c060808b1de36c9243" }, { "dataPath": "params_shard_489.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.80.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5df67c1e6fc25a0fb6a303c5eb6bc80e" }, { "dataPath": "params_shard_490.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.77.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.77.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.78.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.78.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.78.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.78.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.79.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.79.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.79.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.79.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.80.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.80.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "26a9e745268753e64b3a46fe46053b68" }, { "dataPath": "params_shard_491.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "265317a70fb385d46edcbb812c39e17c" }, { "dataPath": "params_shard_492.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "1182d45627da0e71846de409f942dceb" }, { "dataPath": "params_shard_493.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.81.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "b9297a31fd6dda7fefe2ea2920679a06" }, { "dataPath": "params_shard_494.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.81.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b1b32b3954172991a3d029193c0ded6c" }, { "dataPath": "params_shard_495.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.81.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "04ba50f79587bb80f9a4783d8734a244" }, { "dataPath": "params_shard_496.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.81.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ecd4aac38555ae5d8ea7ca7e78e6dab0" }, { "dataPath": "params_shard_497.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.81.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b780d6582de8e8af7af9975a807d07b0" }, { "dataPath": "params_shard_498.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.81.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "1adbecdf23a4220c77f6bd129fcc58b5" }, { "dataPath": "params_shard_499.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.82.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "212bd80423c411b2cfecf812836e3c3f" }, { "dataPath": "params_shard_500.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.82.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1eb8986d590f751dbe7039e0b4022e8e" }, { "dataPath": "params_shard_501.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.82.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "6ac883583579c396e5a5850af0df9f5b" }, { "dataPath": "params_shard_502.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.82.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e8a2dd6ccf7c34d00bf87c89267f3e21" }, { "dataPath": "params_shard_503.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.82.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1fb33570c19c645d8a017b64a1025466" }, { "dataPath": "params_shard_504.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.80.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.80.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.80.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.80.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.9.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.9.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.9.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.80.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.80.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.81.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.81.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.81.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.81.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.81.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.81.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.81.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.82.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.82.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "a55feb4e828e68f53cb238fa6cf33338" }, { "dataPath": "params_shard_505.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.83.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "1219a21c481ba2127170e0f2ac8c7e9f" }, { "dataPath": "params_shard_506.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.83.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "41e95a29cbfb807eb4b905261fdc81b1" }, { "dataPath": "params_shard_507.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.83.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "17a7c3f3bfbbceb26c5186b9b6fc72b1" }, { "dataPath": "params_shard_508.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.83.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d00b6e4281788ba9b351559852d71e25" }, { "dataPath": "params_shard_509.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.83.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "7fe007137b95dcff88e3719b862229ce" }, { "dataPath": "params_shard_510.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.83.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "f3218577ec1b2ee7b0bac929683ef6f8" }, { "dataPath": "params_shard_511.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.84.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "7be192c5697a10e2491b27306ea56cba" }, { "dataPath": "params_shard_512.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.84.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "73dcfcd5a7194a478e9cc8312dbfd1fa" }, { "dataPath": "params_shard_513.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.84.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "72cd6516efa4b2c79ab8fb7410277385" }, { "dataPath": "params_shard_514.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.84.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "258ae54d491c5670b114cd145a138e81" }, { "dataPath": "params_shard_515.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.85.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "2240e47fa8ea616d898a4263aa6a33f6" }, { "dataPath": "params_shard_516.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.85.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f37153c779021c13be9e042934c3cc19" }, { "dataPath": "params_shard_517.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.85.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "f2c88644641effa619ef7449f96213b4" }, { "dataPath": "params_shard_518.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.85.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b59d90447911ea8e84d9afdd01b95973" }, { "dataPath": "params_shard_519.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.85.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "3b457b753fe0b54c32ecf3bcb84532e1" }, { "dataPath": "params_shard_520.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.85.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "4b531da74e59a022f31ec94c68e335d8" }, { "dataPath": "params_shard_521.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.84.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b17c96e36545f148a4848f2c2bbbeee3" }, { "dataPath": "params_shard_522.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.82.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.82.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.82.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.82.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.82.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.82.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.83.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.83.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.83.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.83.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.83.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.83.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.83.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.84.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26772224 }, { "name": "model.layers.84.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26780416 }, { "name": "model.layers.85.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26788608 }, { "name": "model.layers.85.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27837184 }, { "name": "model.layers.85.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27837440 }, { "name": "model.layers.85.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30196736 }, { "name": "model.layers.85.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32293888 }, { "name": "model.layers.84.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.84.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "93a6f98a7aa4dfc8a645dba01f182b2a" }, { "dataPath": "params_shard_523.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.86.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "7032efda6bf4cb73d5037ab66b6ba745" }, { "dataPath": "params_shard_524.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.86.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1490c992a46a4f480174e06ebaead314" }, { "dataPath": "params_shard_525.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.86.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "0b7da9d3baddf7f57c567232c243c3b5" }, { "dataPath": "params_shard_526.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.86.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "46f2b8b18be9b1195473a1af290b7bde" }, { "dataPath": "params_shard_527.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.86.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "973d85f642a175c347054f8d16964c42" }, { "dataPath": "params_shard_528.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.86.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "99e7fed59de6c9a4574e840951653531" }, { "dataPath": "params_shard_529.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.87.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "07dee75d27babd858b071bc336e24983" }, { "dataPath": "params_shard_530.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.87.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "011097798874d52f43a7e132b78349df" }, { "dataPath": "params_shard_531.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.87.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "98d5f36e3b3af0a078ad2847759b0c36" }, { "dataPath": "params_shard_532.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.87.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c7f8eb4432982413b85abc315443ad1f" }, { "dataPath": "params_shard_533.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.87.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "39e5a28fdfe8890cd0d561478b967d4a" }, { "dataPath": "params_shard_534.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.87.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "dea73fdf08661dd9596c2be0f851f668" }, { "dataPath": "params_shard_535.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.88.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0ffc7c0f366917a4516de6d6f070151f" }, { "dataPath": "params_shard_536.bin", "format": "raw-shard", "nbytes": 32294144, "records": [ { "name": "model.layers.84.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.84.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.84.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.84.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.85.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.85.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.86.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.86.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.86.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.86.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.86.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.86.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.86.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.87.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.87.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.87.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.87.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.87.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.87.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.87.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 } ], "md5sum": "879bcb857305d3871849a0100fd054f2" }, { "dataPath": "params_shard_537.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.88.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "5067e718387c13dbfbe18c933973e7a2" }, { "dataPath": "params_shard_538.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.88.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "db5bf828d25d1401b4997fc2541fe1e9" }, { "dataPath": "params_shard_539.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.88.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "ca824894e4c08ac34ae9e4c3e8b4583e" }, { "dataPath": "params_shard_540.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.88.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "12bd88f068131928cd4cb69b847c8471" }, { "dataPath": "params_shard_541.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.89.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "350e8d912ed8337d98213b8fa9c0bc11" }, { "dataPath": "params_shard_542.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.89.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "4cd3e4cb24cb6a7f2822aba01935feb3" }, { "dataPath": "params_shard_543.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.89.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "9428af54d94ff0eeb2384ab60d68a70d" }, { "dataPath": "params_shard_544.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.89.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6461a51cc69fa18a9eea7cdd898bd292" }, { "dataPath": "params_shard_545.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.89.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "3e2cc6e0d681cc20645d74e69bd01f7f" }, { "dataPath": "params_shard_546.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.88.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "135ad66f6869c2fb3ca1fa90fd2d87e7" }, { "dataPath": "params_shard_547.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.90.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "49684b1c788bcf011f3255a0014594a2" }, { "dataPath": "params_shard_548.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.90.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0386232e021fa69b2b070fbb6fa388d0" }, { "dataPath": "params_shard_549.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.90.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "189032a4ee37c3c1d633d267c81f591d" }, { "dataPath": "params_shard_550.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.90.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "90b9519c0151371ca3625b2bd659b5df" }, { "dataPath": "params_shard_551.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.90.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8bcd1ec4fac7f8f444593112135d6c8b" }, { "dataPath": "params_shard_552.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.90.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "093baa21fbad23fd7f89831345e48873" }, { "dataPath": "params_shard_553.bin", "format": "raw-shard", "nbytes": 33326336, "records": [ { "name": "model.layers.88.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.88.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2359296 }, { "name": "model.layers.88.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2367488 }, { "name": "model.layers.89.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 2375680 }, { "name": "model.layers.89.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3424256 }, { "name": "model.layers.89.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 3424512 }, { "name": "model.layers.89.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22298880 }, { "name": "model.layers.89.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658176 }, { "name": "model.layers.89.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755328 }, { "name": "model.layers.88.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26755584 }, { "name": "model.layers.88.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27804160 }, { "name": "model.layers.88.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 27804416 }, { "name": "model.layers.88.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 29901568 }, { "name": "model.layers.89.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29901824 }, { "name": "model.layers.89.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29910016 }, { "name": "model.layers.90.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29918208 }, { "name": "model.layers.90.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 30966784 }, { "name": "model.layers.90.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30967040 } ], "md5sum": "643f64b69f4dcc04bfe4558720b12849" }, { "dataPath": "params_shard_554.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.91.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "799c9b173ea7f4e9c92565ce08fd46f3" }, { "dataPath": "params_shard_555.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.91.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f16e46b90f3b4c0d45f5ec898433c2a6" }, { "dataPath": "params_shard_556.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.91.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "eb0dbf73b51fa7315d55aa83fda0e07a" }, { "dataPath": "params_shard_557.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.91.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5c23c345feb1097839f64c2a01ec4dc3" }, { "dataPath": "params_shard_558.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.91.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "e873b847591bef8142aa24d38e5a0af8" }, { "dataPath": "params_shard_559.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.92.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "542b413501fc49230b352414b4e41edd" }, { "dataPath": "params_shard_560.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.92.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "8cd67e98b4fc001f6fc742cf3fc484f1" }, { "dataPath": "params_shard_561.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.92.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "b5535767bb03d45c4f84f950a0218950" }, { "dataPath": "params_shard_562.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.92.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "26566f06eaafca603c111587c2cd30c9" }, { "dataPath": "params_shard_563.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.92.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "63c2a1d3717261a1e16da4382b207170" }, { "dataPath": "params_shard_564.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.92.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c8cbe07aebfcba33f7d31be2d26cf106" }, { "dataPath": "params_shard_565.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.93.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "59f8973c9c8d36019e42a647cfe63a6b" }, { "dataPath": "params_shard_566.bin", "format": "raw-shard", "nbytes": 33080832, "records": [ { "name": "model.layers.90.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.90.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 2097152 }, { "name": "model.layers.90.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2097408 }, { "name": "model.layers.90.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2105600 }, { "name": "model.layers.91.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 2113792 }, { "name": "model.layers.91.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3162368 }, { "name": "model.layers.91.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 3162624 }, { "name": "model.layers.91.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22036992 }, { "name": "model.layers.91.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24396288 }, { "name": "model.layers.91.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26493440 }, { "name": "model.layers.91.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26493696 }, { "name": "model.layers.91.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26501888 }, { "name": "model.layers.92.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26510080 }, { "name": "model.layers.92.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27558656 }, { "name": "model.layers.92.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27558912 }, { "name": "model.layers.92.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 29918208 }, { "name": "model.layers.92.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32015360 }, { "name": "model.layers.92.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32015616 }, { "name": "model.layers.92.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32023808 }, { "name": "model.layers.93.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32032000 }, { "name": "model.layers.93.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33080576 } ], "md5sum": "4572cbe6d2a09b7212038aeb793c84ab" }, { "dataPath": "params_shard_567.bin", "format": "raw-shard", "nbytes": 21233920, "records": [ { "name": "model.layers.93.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.93.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.93.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.93.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 } ], "md5sum": "16513a2d97d2661e2a93a0781d1cef5d" } ] }