{ "metadata": { "ParamSize": 1227, "ParamBytes": 132312161280.0, "BitsPerParam": 3.6623472527700525 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 311164928, "records": [ { "name": "lm_head.q_weight", "shape": [ 151936, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 311164928, "byteOffset": 0 } ], "md5sum": "0ea644e6a2d6ffe999e3436b3a99537f" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 38895616, "records": [ { "name": "lm_head.q_scale", "shape": [ 151936, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 38895616, "byteOffset": 0 } ], "md5sum": "cef00afef23c0509d7021c40f1aa2337" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.93.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "cadf7e68b234ad8d2a84f49a7ca473c7" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.93.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b31ffd2e2f0e0df5313b8ec101cbf3dc" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.93.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "55d443396bda589d72aff5db3389cf82" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.93.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7f2fadf07bd80667bbccf88cd947c420" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 311164928, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 151936, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 311164928, "byteOffset": 0 } ], "md5sum": "aa2172ae902369ac56cbd18f319c716a" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 38895616, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 151936, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 38895616, "byteOffset": 0 } ], "md5sum": "5e642dcf66c7a6875c9bb6682181e215" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.0.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "0bd32a1efc905750bd7b817279c0903d" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.0.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ee92ffae888798470e9c9dd3759acbfc" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.0.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "0b8c4edbfe636125d12a614a08c0cd06" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "577902e0bf8335531575fe1c5ddae005" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "aeb91bea3998d9d60cc3963e83b8338e" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.1.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "545e2f6d347f8c01745bc81ed9c484dd" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.1.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5df62be4666329b30fe613672dd105ec" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.1.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "32bc6d871b9c8a82c3ddf91365208843" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9dcd7831db3409d4997f853bef2ede74" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "235788bb051dbdc56670f03bd0a6a843" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "63f17d6d1029d0a1fc72cd71a8c3b1b6" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.2.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "b69b862ab5718b164848767a7856a389" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.2.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b1044bf5044af86bf46b14e24de0cbf5" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.2.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "dace624c53c26d72b88761586490da5a" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "01f7b7b1ca4b6890d52daa07971b5d22" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4cbec3326c230930a579deb730e32b7e" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "0d7cfd8c43e85a5e4ac388eb8444b83a" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33350912, "records": [ { "name": "model.layers.93.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.93.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16384 }, { "name": "model.layers.0.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24576 }, { "name": "model.layers.0.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 1073152 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 1073408 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 19947776 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 22307072 }, { "name": "model.layers.0.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24404224 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24404480 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24412672 }, { "name": "model.layers.1.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24420864 }, { "name": "model.layers.1.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25469440 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 25469696 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 27828992 }, { "name": "model.layers.1.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 29926144 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29926400 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29934592 }, { "name": "model.layers.2.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29942784 }, { "name": "model.layers.2.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 30991360 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30991616 } ], "md5sum": "60c289fa1e9897e871b162eb46f098ca" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.10.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "31133479b128bc41943524cf4636fe34" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.10.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7bfa32bedde2e8e3642f100ffc4b75e5" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.10.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "baad333304432fe98ae42e936899877d" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8839618b700689a2f161a9006d8934a5" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.11.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "d16bd0251cff97b4ad71df7f46680c35" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.11.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "449b8410fb6cf6ead4699e3846560911" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.11.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "46390189b44b46a1fed5c15b41ac8d34" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e8c6514f92e3ac09c7ce122e2928400a" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "32d00a2817f566b7b3b634f3d354300e" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5dd1b6ce007e46701f90cc54f964d688" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "5751f382c2803e449eebce0cef124953" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.9.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "cc1df5c124ab24354b6859252e40e8b4" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.9.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "dbb2abf6c17c9071b2985067b530a189" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.9.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "eded8ab1d0390d9b2d1dcbb07f294edb" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "83e2b67f783be9310a682fbf92b68096" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.12.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "f85e08cb710842a01d82a006ce2ed718" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.12.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5447bc2228c247188e44be998be53d63" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.12.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "140da2e7af47799d10d793c09468e016" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d66ceea871b532dbe75814bffe0f6bc9" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bd51748506c93133689780f73b4c2620" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 33080832, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 2097152 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2097408 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2105600 }, { "name": "model.layers.11.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 2113792 }, { "name": "model.layers.11.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3162368 }, { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 3162624 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22036992 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24396288 }, { "name": "model.layers.11.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26493440 }, { "name": "model.layers.10.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26493696 }, { "name": "model.layers.10.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27542272 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27542528 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 29901824 }, { "name": "model.layers.10.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 31998976 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31999232 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32007424 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32015616 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32023808 }, { "name": "model.layers.12.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32032000 }, { "name": "model.layers.12.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33080576 } ], "md5sum": "93541bd537065b75e060b55f86177292" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.13.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "cd80f0a9a931f588beefe075dc78d37c" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.13.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "503a0ed2b7fe0cd42217d1f94c5d27f6" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.13.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "12df992e36211dfe2513efdde08d8335" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c5297760321df3cad165ed26b93dfa92" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d05077a672659cfdb1dc3df86d11d7dc" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "ab1614facf03f8fa9bdba96ff99fbb49" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.14.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "1fc6bc644bc1969892b821f2d16bf241" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.14.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "32729fc9a029f0534bd94d29ca334c7c" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.14.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "d204e7a0fe4ffe5462b02c81de85515f" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fbd12f1d0e973ab826c4b1f3c9957bc4" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "dc4b2501a03b8b8aa19d332cb35c6291" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "1395f1f9a32cfcb66dbfac93b64d1c61" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.15.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "feb2e520ca5458596ff731a5bc084422" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.15.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6ec5193d25da9fdb77c488d32473ea9c" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.15.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "68084e802b88fcd23bdf75360acd5b0a" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f3e24caf5950759306d82ae0ad7752be" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b6474c1b5fca92ea6d445b7a539fafae" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.12.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.13.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.13.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.13.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.14.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.14.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.14.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.15.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.15.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "61ed234f15ff55882ddea52123f0b819" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.16.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "33ddd7ac5cc6512b4f693f3ab61d6b0e" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.16.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "66f9f127e7d99bb10b25996e4d689898" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.16.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "1f48cab117ee52b768d538e939a40525" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b3c30e47c3fa000067017375ce44e8a8" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "113a7311cb86e6b840f01211dfa01872" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "f8528264dc77713e902d3e11e8435c18" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.17.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "b0ee5297d6d9917d03c3311218359589" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.17.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "fe30c15508bc5a4a61a32a4d89ffebb7" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.17.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "9eb84fc5da4f58a58c2f2b208d46d060" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "61ddc5910f2be1647e23a2c9e2d34dff" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "49407d230865a2b26eef119aaa1e4172" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "fe3724b5c287450c30dee1c969762950" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.18.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "80dd45d7c182a943d5201464e440b5b0" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.18.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3666fa5ee7debe4748191189e495d21e" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.18.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "f28b22ab2f75ac4345b2b9a01e9b759b" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c787d3c5cb4763bc705797df79682990" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0cfbc6ea239f614d090cacb0285911f0" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.15.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.16.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.16.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.16.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.17.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.17.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.17.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.18.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.18.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "ca5bf9b9762967155427110fc6fdae05" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.19.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "0c7a8e6e04efdf6976cd0d8eb556a154" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.19.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ae6a491b3259b519ab222f3323ab7704" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.19.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "23c228b31063f106661fdf2391012e0f" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8aecaab74bd390ddfb93249bbb76270f" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.20.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "6be193b90c48c2e06d9713db7d733cbe" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.20.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2f5b3d7f2b7c8c606df205b28e2a4748" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.20.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "aea898715c5203d76b6d171ff976d8be" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "162a757540792ce4580602e1692c2264" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5a158d7f40a771926e3683c17a2091da" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "f41997bc91e005573e7144d83ab3c885" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "173d0c61984f6406a27910435bebaaa2" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "b2231ad17f8d58b3bffe9734f44a3977" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.3.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "5d592c99cb566ea66405342eff57bbb3" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.3.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2be8149c114443ea6426f2717514dc93" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.3.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "b1b4170532ff3b29b022872c587becaa" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "922fb4c09e4c889d9c80717ba1fcd15e" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f187e0b01fdefc525125a85502c53c68" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.18.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21250304 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21258496 }, { "name": "model.layers.20.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21266688 }, { "name": "model.layers.20.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22315264 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22315520 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24674816 }, { "name": "model.layers.20.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26771968 }, { "name": "model.layers.19.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.19.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.19.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.3.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.3.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "e5ea6b8d4ae00adaa79b4c04cc4b5fb6" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.21.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "c3ef424b8a28f067cb6e524d2e300d8f" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.21.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d6f9131199ba91afe85e882eef41b870" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.21.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "ce87a7d3b8250c299f868fcd5ca988f9" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a65a998700945ee428e29748f13cc6c4" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "02c5a9f3315e875b58e0807ec833ff1f" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "bde0b1b2e58fde8233a969a7e6832ab6" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.22.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "d466aa84f717900d1ba78e0ad964c62f" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.22.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1110a61e48383c846965a5f154dfd12c" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.22.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "6e404a497568b2a2b9a91d4f77694f9c" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "add0b4eda720bee42c69947c5ada3687" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "94b90311495ce1bb408b98b297e031b6" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "b34cd3d81378bbc56eb1e122a2800878" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0fb976b8154d2dd405d3e8467469fb4a" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 32294144, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.3.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.21.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.21.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.21.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.22.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.22.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.22.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 } ], "md5sum": "362ebe7c1a3b8b553b1e528d81bac211" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.23.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "e9075a4f6fa4f9cd3ef37535b15950ee" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.23.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "96772aecd7a25a1cc9612d72056ed335" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.23.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "a6b31af1334761f2c0a6379c07d42850" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "46a5f7c84402f47b1c7772e2b1b47eb5" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.24.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "c3777473012aea81d70aab91bfd0c019" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.24.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a75fa45c2c90b9db3dffdff75cd7c93b" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.24.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "a694fb30e517b5a5f7ec0eceea4d2f79" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "74f0ce65d0ac1451a8ed9e8bb8635020" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "143ff36fd6b0b619c77014064b5d9bb9" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "143b63abeb294e1c578a656315620e1e" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.25.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "d09132c3204efa3806bc0af255da76dc" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.25.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9e001888e76fb56038828d214606deed" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.25.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "00bce5f21260016ac58d0c69e6d3d7e9" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "14087ba004a1253875cd49e1b2a7a25a" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "00a01730aef8d2d46c290173f352d5a6" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "d2c6c50c01e7dc4413df9f872f2f0b1c" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 33326336, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2359296 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2367488 }, { "name": "model.layers.24.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 2375680 }, { "name": "model.layers.24.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3424256 }, { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 3424512 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22298880 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658176 }, { "name": "model.layers.24.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755328 }, { "name": "model.layers.23.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26755584 }, { "name": "model.layers.23.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27804160 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 27804416 }, { "name": "model.layers.23.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 29901568 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29901824 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29910016 }, { "name": "model.layers.25.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29918208 }, { "name": "model.layers.25.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 30966784 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30967040 } ], "md5sum": "0d9ac8f4f7f9cef7ab753ef85f93980b" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.26.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "4b9d9e26969cd2ce561671d88d098855" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.26.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ab6d6cdc0ea650a1d9b067044358e14a" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.26.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "f264ba5268e8c73e1fc8bcd59ba45af3" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "56dda089e0b0e5432b175d153e650ced" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "b4e5e66c646026b48ecfacb6bb60dda2" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f07067b0fbb7d96e52506a8dadac9661" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "ec9d6cf5dc919d978013ffb98d9e4e12" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.27.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "9052c4001a9a659c3284867a02659131" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.27.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "17e7e73e4b5d319466316328aee12901" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.27.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "ea554cbafa6a89846c4a3fbba337e97b" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0f029d9a5b0ead41c35b7ca6e2d786f4" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.28.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "0eed9f85337b1ff2271d2aff1eaa98d3" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.28.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ffc00b484a74bc41720ff8e8885458c6" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.28.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "d4fded3793f3ba6c78b5e333c05fe46e" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.28.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "15f6846daababbf2ea8c29b46328e75a" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8e54b4a920928d2f55545e6ec91ecb9a" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 33080832, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 2097152 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2097408 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2105600 }, { "name": "model.layers.26.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 2113792 }, { "name": "model.layers.26.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3162368 }, { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 3162624 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22036992 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24396288 }, { "name": "model.layers.26.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26493440 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26493696 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26501888 }, { "name": "model.layers.27.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26510080 }, { "name": "model.layers.27.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27558656 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27558912 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 29918208 }, { "name": "model.layers.27.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32015360 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32015616 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32023808 }, { "name": "model.layers.28.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32032000 }, { "name": "model.layers.28.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33080576 } ], "md5sum": "2573eec0d2f7562b29d7d12d782c3bc8" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.29.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "6698351297c3c85e0c41d4f6bb48a4f7" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.29.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "37bba13115ab0db0a45e62922aae551e" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.29.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "dd49137e11f91c534f1e5f675c533364" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.29.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7dd6d96a55283c24ff126b0b0b3995ca" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ed16d7f660f58a2d7025f61dd93c9d41" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "4a10b5b41fc427b3bacbf8a60009fc9a" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.30.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "36164c6852f2511351a59c2b18632adc" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.30.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "cb0318725c0abac491907cab775aff31" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.30.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "d1b944c13ca3ac82ee2c394ca684dc4c" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.30.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8cd71eb703dbf593a35f3b6de5b59202" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f459dd7903680b7381fd941c9369843f" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "10a558a74b494b5f37aa8c736bd0a5f1" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.4.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "f7c4a82472fea77abf86b07d150eea2b" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.4.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "43bcbe42710b76960a1cce2d32836a6b" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.4.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "478ca60bf1422d0cb48b60cdf3c638e7" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9eade7c431a8c2a25abb204698825b7d" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a7caed0dd2bd5ddfe3e098339b0e2c6f" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.28.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.29.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.29.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.29.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.30.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.30.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.30.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.4.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.4.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "439a8f36d09dfb68e6f0c3465decc365" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.31.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "2206df6a11151c154b56c6613102a9f8" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.31.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "233c473ab64c68ff5e83622f54ecc90a" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.31.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "0fcd628ee9a879c90110fa295ee52f3b" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.31.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4118d84d01403b505903f28fce953aaf" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "107885e989a5d9060d4d9d74e11ca16f" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "c85956a49d47c69a17d9fe6b4159f119" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.32.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "54fa02f05828be0ee0220b32f51800e7" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.32.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3846cdff742fb3ceb0373ff4050391b4" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.32.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "4c5c1f5042d2d6d3b8c8db7847676fed" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.32.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "397a9f239579b84b72976be13d1e625c" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b8c95db484793dc4dd7d08026c52f1b5" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "a5e3e62639cd08e130135f5ca3c36364" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.33.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "5d7b26dc40a184f40a1182e551cabde2" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.33.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9c91a631f7838ad15d4f3cfdb5a76fc9" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.33.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "cc68787b252ea23c04911be484543b72" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.33.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6c16f0adb3fc296ee8c2f607da1d2b24" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "340117e751ce0da7ad330a166c2842d0" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.4.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.31.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.31.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.31.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.32.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.32.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.32.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.33.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.33.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "87da2548e16cbab97186313f1bad9004" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.34.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "b8d70fdb7caa802fe9e6ad5d26d9219c" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.34.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c468203b28a39d1574a15f27647393b2" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.34.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "5ec57479236f7b2ae6336723cd9de9ba" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.34.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9909670be62f90afc8ec1116782ab8c9" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e7559f7b382f1b970179a92490bd4e39" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "99dbb71db1736b8540f3a89d84ca8ffe" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.35.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "fe140c9cda6d22322479de56e6577a5d" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.35.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7727bff413119e165d2715f9c2b53512" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.35.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "363272eea43d8010182c42471dac9a02" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.35.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "23a606888a5466f009c1f254394d565f" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c71518763c5568190e5f843aae366c75" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "d3c9a705ca442be201c1fc17b7763edd" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.36.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "0994d8f895ab5ff4f108bff1cc988d2a" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.36.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3be420678d80ed5b39c20d6d19737a74" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.36.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "6a496b49b82a09f0b8134494278153fa" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.36.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d348951345c0501bd1e1b8823e8ca60f" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d908336c9e4dfd29f4a9d6d4baa36625" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.33.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.34.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.34.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.34.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.35.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.35.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.35.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.36.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.36.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "dfe855331b64e4185ecc390a64379078" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.37.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "3d78161b7d381dbfbeaa9682d8f6829d" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.37.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "45d1097a84d78e1b5112b61cb2ee3dcf" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.37.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "4a96b5c4895a67e57a1aa11c9259d69e" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.37.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9a5b8b32ac7d9e64217808bd291470f9" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c1a87640440ea556556917ebcb99e386" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "fe3d6c64cef233c76735f7d98dbc1573" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.38.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "930dd91e0cb661985bd7c36632bc546a" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.38.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "843b85be76e723ff965375f94605b0d9" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.38.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "79b4bdc19d7d1c0fcb3a360fad38da2a" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.38.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4a306614a68ee85560fe1bb23b64408e" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "97b761babeabd716d0429a51c4c15144" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "f9ed56c5dd48be976b0d9be3f80aecc4" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.39.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "baab414742869a0c6b8e05b09dbc5e4f" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.39.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3dc682615e93409abce328d8caa940cc" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.39.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "cf81d853859c08a14bebcf94037e7e59" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.39.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1fa7d228cf23cd8296fb80111d7230f9" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0f5bf5f0c73b82a4ad564695a0027cc3" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.36.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.37.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.37.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.37.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.38.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.38.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.38.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.39.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.39.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "b17643de9503ebcb7696be74aca06003" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.40.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "2c28af83331be37176d9bc08b535ed04" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.40.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e89a1010dfd74b06f48ac9eba6fd613e" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.40.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "91a0a2b2f403791aec1976f226f36981" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.40.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7e59f44c307f6eebc8ab852a4fbd37b7" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "cfd609472acc638238d306d14fb08cae" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "cd045a93c964c928e3804fa1f2acc76a" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.5.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "89f0978e635ed0d1ed3244f33e8c8de9" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.5.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "165ff84a5f915b55d9ee98720d74456f" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.5.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "980058caf9db7afc8ca37f1e062456b8" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6c61f9dbc8fe2127789862f04a36f268" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "00b37f88fddf0831f59289fb03797681" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "648beb102201693edbdaac1a3b043aa3" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.41.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "0208838ddd5ebe5eef2f790939f3926a" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.41.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "eb50d9c3f0a8b9bbadba33c045a1487b" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.41.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "c5feffce4e1ffd12213a939ea9c88098" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.41.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fc538ab01834059dc39ad562589cb44d" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d8c08bacf19201ab410ac4a8055b6b82" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.39.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.40.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.40.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.40.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.5.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.5.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.5.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.41.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.41.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "7ae5be13fe596c0fac50be95cb0536d7" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.42.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "efde3a433c8e183bea914bad7fdedb0f" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.42.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9df5bc0c1314824faca7701828a80704" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.42.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "f48006b4569cbca761e7e28f19680717" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.42.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "600c0fccdd496cc60615cc4b730a8c21" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c96bb2be36303e8e0b807a8d3ee42fd2" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "e71ef8c352973c66a5ca005831981e47" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.43.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "7aedd85575efbc20ca6bf6df259f5263" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.43.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c016814b90a9ecb6683bf6f428a59468" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.43.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "df49e865df9c2318faed54f1d70d82a9" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.43.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "04b123d0459931618e423da84c38b807" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8176365fa2d35d80f9f40fba9ae9814f" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "48b74d1a9322a413fd4c598bf0ab2db5" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.44.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "54fcc0116be099473f6b4572843f223c" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.44.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b42c6fd65cb81485ea686dfda2b15954" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.44.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "e099f9cc4dbe388a6375cb74e87b322a" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.44.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5f0698322f505f97781d2a7bb83f4db5" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "828643f99cb993eddd4aff9759974d91" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.41.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.42.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.42.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.42.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.43.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.43.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.43.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.44.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.44.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "7060a9eee7326acf07df1972fd2cba7d" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.45.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "b3d38df3f7ddd45be10840d3831f30da" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.45.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "63988a26de6d71dbbd3c2250ce7b9909" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.45.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "7e419c013ff1ee12bef10e615b8f6800" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.45.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "185ddeead759bb4e069c7128d9a45c64" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "13d8478dee3cc7e60f9fbf5aac60a963" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "0050c3a7ad594f8cd53df82fd8fcf044" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.46.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "90e15691168f7a62410a57331213185f" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.46.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "89558b5bf0c15a533f2b3c8a76f94768" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.46.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "f4728509ab65a59f9ddebae429cee577" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.46.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c240d05f2377a87fe00536adb80d216b" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1945c7c93bf4a00618d0fcb06aca8d38" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "a8f2e1a59c4545dc411f3d6709fb5a4a" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.47.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "b889af37a5fb0105435ca153b2b74fa9" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.47.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6c3a81f74885f00c80004873e2c2b6df" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.47.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "1c31eda4ac3b3493a71bc86eea664dcc" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.47.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4c3588215d5ea68dc4f49bae7fabbad2" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9f38e8cb6030fcbceec7b8aa27f74c1d" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.44.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.45.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.45.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.45.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.46.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.46.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.46.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.47.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.47.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "702fc4d097d6808655c0f6b3bfd205a9" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.48.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "aa57f9f00740a1ba9e9aa660b95d167b" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.48.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "febaab83594b552040668d3c071974b1" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.48.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "841d509f8c715b3100121a74e76669d3" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.48.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "bd3ac88ae51289d72bb4a7a269c20e60" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f0cb566c7a5d0cae3511dfee53bbc8c6" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "252ba19d15c17d8b72e2e5939d94f52c" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.49.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "dd58d256aa49ec4b0bc40e6a3a3466a4" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.49.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "499489aabf4fc68e1eb527d69d289c61" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.49.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "18af62608aa7a0e930dc8e0697063d26" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.49.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "959601adde6d3c2ba959267ccf86868a" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "faf128d1dea21e2726cc1939f8e918d8" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "89fe4b2e23495253aaa6667d62a6e8fd" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.50.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "f0073a0e65b260dd156403fda8d9c566" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.50.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "bf30c7f496141ba39463665b5dae2a88" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.50.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "f45d751ea100e6e5c33f40edf42bb63d" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.50.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ccb54192f41c8b3df6b553ad6e7b924a" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "82c1d7719e9552a0579339ac88f0f0ca" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.47.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.48.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.48.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.48.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.49.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.49.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.49.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.50.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.50.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "deb94cb67ded22d933974ca05d62efda" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.6.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "e588b4427e8871f9ccc46aed24360489" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.6.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "eccd3b279f6a68b979794e5ce174c42b" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.6.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "b55017fbb7d82c5d2903d6c29295fa81" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a522def4607bcdbf7d38bd4bd000eb76" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "66800f3f87233edad930dfc995af7c7e" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "7e3b1769c1fc53035e23681cde8912f2" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.51.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "73c0118d167d4f31faa2d6e388e8e0ae" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.51.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "af19f7cd3b8a5077a997c65f4ba3d88d" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.51.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "d24ea0a3e9a38a7f427f6215f63fcb59" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.51.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "89808d4e5daf1927d69064f2e27aaf77" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "27def8419a317deddcce89e731442cee" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "8721d79c1cc363cd85e1dffdc15a0273" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.52.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "766e227be50fa27502a2e8d87d38fb75" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.52.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d938818fa936d80635a7e3a0dfe9e05c" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.52.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "96fe0c958aad3aaaf2850cb822be5ddb" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.52.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "674f645dafbf24ff1fd665d5e8ffecd1" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "db5d967ba79e5ee19b59af805768239c" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.50.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.6.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.6.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.6.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.51.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.51.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.51.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.52.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.52.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "586babec6dd22899c3dddf1a142b00f3" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.53.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "5e8219db441366da66d0e9ddc4dd59ab" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.53.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "dfadd16b84cecc779eb2c4d5b626bcde" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.53.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "c13860965457ff8d363e6d69b17ca8d6" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.53.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6cdeda34a164110c951aec36abcc3995" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0c55a69082f8fb1688610e3b8d53d84c" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "b5304345cb3b3023a3ac18934e002656" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.54.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "349227bbbef41ca9d9a71dc95a3b5b09" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.54.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e2c20a739165245e6f94f22f20e407e1" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.54.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "8d75199146a4d18256dbefc26b212c29" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.54.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d499a9f27534d2599cb658652860a716" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4d7467e60cd5cd2084900865ae3d5d58" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "f8742935aa0c8dc280f6aa2defa581ef" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.55.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "a106b9bb89f384adaec172145dcf0219" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.55.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f44bcd5825fbc8111c08fc0dcb2bd756" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.55.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "f763b9046fcc65bf730500a47ea146a5" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.55.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "bd803a87f95ac8cfcda50ab02b00a7ce" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "92e6f3849375116068af0dddf812cc32" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.52.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.53.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.53.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.53.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.54.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.54.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.54.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.55.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.55.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "f3bc18c3e131d65b6f2380567d4f4619" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.56.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "dc173bdef57e57fe7367ad45820d7eda" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.56.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "196b6d5efdfb9b17d0ce2ac27967f75e" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.56.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "2fd70ad0fdd927699439e5026c8af900" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.56.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "dc8100931a5cf66dfa51a75880968913" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "af52e3957a6bbf0ac329f570e4984d33" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "df709b5b9b5df2c042b863c4eba15191" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.57.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "8a05222436082e055c033e7b35a04503" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.57.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f9a8f2cb17e6fa6d2626fd0a15bb5736" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.57.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "d18fcc4a823e38f23d21d0e44ec18167" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.57.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7698fcfb21edfbe28d2d87cf9fcb460a" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6ce0eadcad1ac97a29725cc7a836b45d" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "e67706f8d853aa23d0105af1179d944c" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.58.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "b001f4db0cda0e7a947ffb081e25dca6" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.58.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "40eab6b7955e28d34257d0223028d215" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.58.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "729817000e3208bd7627e707c69013bf" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.58.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "cec98c448eaaa894b4b648d259e07a05" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ab7b281d0230c0b77809675bc50fd404" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.55.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.56.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.56.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.56.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.57.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.57.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.57.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.58.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.58.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "f288cc50111b7585f1a73b21d4512da4" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.59.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "2a4dc6f25772f2696cec807ca9c89b67" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.59.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f3c88404956baa97d4b23cd45aa796eb" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.59.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "3e7d276cdd2b2c4dc00f50027486f6f1" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.59.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "da6b890427ab1af5192ad6d1e540191d" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "676a79a45e070c77434118a8e76b7c18" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "d7f665e7435729ea8f6b343da68b234b" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.60.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "a0d22cb41fb04b5c16509b12cad32c1d" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.60.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5df380f6897f7e41fce478b61804d611" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.60.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "6c01577a0ff90b459bf6f03235e40d0e" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.60.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "edd934f89c15e26c95d8be351071302c" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8df40a2d728a89081ed73ba793af5c0c" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "93663283e898af9b8c2fdf766d798ac0" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.7.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "08268c0d6b509f5faa6e061d651526a4" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.7.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "78004031f19a6a8c4cee492939e98a6e" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.7.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "50da49e7d829e08c547137cc551a46c2" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "bed8e524765d45ac285594ea74a68c62" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "cfd9afcf87f06476f51b14a11df0a6fb" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.58.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.59.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.59.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.59.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.60.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.60.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.60.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.7.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.7.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "215c3c3ae31840679bd618db5659ae01" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.61.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "7f80695b20e0a25f9416808fb2c89e4a" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.61.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "4f5ad0cf7f61d2f01d58d03ef9a3eded" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.61.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "e79e94d3e9ec72b74b1bda5b263bd08a" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.61.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e9285f678a5128d7524765bfac8b9fec" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "68d6e6d755a4898360c0df369d83c29d" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "1259ba4cd9398ae8e79b9ce80b85fbf0" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.62.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "35980331f8b9e6956f17f04d5996245b" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.62.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2b1867b00c2e2160ed5439ea072aeb89" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.62.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "a883b47f16c1c28384379e1411447e81" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.62.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e6fa90e53cf160924c165ea831adae9e" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "237a35bbb0227c59f888d39894474ba4" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "57b4d8ed15975a6b75babbfb67cd5021" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.63.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "6ce9334286d457c26b1219b2f503915a" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.63.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1227dc2355aa2ccb839ac3d160f1f9f3" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.63.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "58f115b7d936962d7a9b4bc01aaff5fa" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.63.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0cd5f8585df367db0f8fcd8c52e2bf80" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9acc3e1ef207241b9ac6ea048325fc5a" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.7.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.61.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.61.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.61.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.62.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.62.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.62.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.63.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.63.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "b33ade8285b320097d4627edc30622f7" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.64.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "45a2cbb2425b9c73870657551ede48fd" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.64.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "addceb52c1b8ceaa5147b4c408f6456a" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.64.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "5fa62cd5261a830a1f113db92b8a78a9" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.64.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8149f2a5327d247a315a49160e0c8026" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.64.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "55a8d6fa8dccd8aae6e03cf8bc1a86be" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "cfd0334c13de89ffd4306dc1d466f1f1" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.65.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "80b32fbad9378428a0105bf61d479d5f" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.65.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "010bc540c411b25ac6b3cb399a57a24e" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.65.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "f4b5705244d3fcfd17ecf606d693e8fd" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.65.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9d1ae776846d5b9c722920a1ae57c17a" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.65.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5868c12686aef0c7fd2ff8e3757d7958" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "6984289678c5c36a8e7e13af9f0205fa" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.66.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "446c747cc7f57b675105ca378a0af318" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.66.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "92f8d078059e05e3c3c445d171298ac0" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.66.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "9180aba3845a6a3b6e89757a67f9a328" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.66.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c9ee65f3d2cb6e5b15f77ba030d8d618" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.66.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "998369ba772a6e196efab1f12cf80079" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.63.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.64.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.64.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.64.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.64.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.65.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.65.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.65.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.65.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.66.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.66.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "b8b2054d3d98d1f13e410f899abe5f5b" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.67.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "96ab247267e2194db226df9e6b83e79a" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.67.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5536735dae23fb6d69738c04f7dcf1c2" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.67.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "07dbbd4941f8b0cfdf3fc4360b5cbf04" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.67.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "104c42acdb235cd538a5e145b0e7427c" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.67.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d2cc1c5604c0626b2250cd6447629c61" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "da9576da9b6205724cedafd392cb7737" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.68.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "d764563dd3ec30ef9be7ba6e1c7defe0" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.68.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "45ad826e2c0d0c1dd234be258373c690" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.68.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "78ed2d73841c3d2e548a57e5ab9d1811" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.68.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e9bd0b555105c0a76583fa282ed3cae0" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.68.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "16dd398fcc920838d671b6c054846d2d" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "43576ab08ac07857cba12352582f1e8a" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.69.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "860e36eeacd705292f356815cb003ebc" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.69.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "44a8149aa493c3f74e1ad65bee2cfe01" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.69.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "8c36f423a9c9d998b4c67e79c372229b" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.69.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1e96e466d583c7771a6fe025c6d493cd" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.69.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "96da3d37c294d35ffa8905f5d3b34b85" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.66.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.66.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.67.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.67.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.67.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.67.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.68.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.68.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.68.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.68.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.69.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.69.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "3ab8aa917f36c4741b022a73fc09ba68" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.70.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "c188c60eb5d1d368dab4b59e675acabb" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.70.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "807445e1730784729d64f055899dfd31" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.70.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "c98b486c3edf8f89b7f8461367057664" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.70.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3894c0d913c0fe604483f7ad5a444dbf" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.70.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "3e7644b5776cc15b57303d71cd69c7b9" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "5b3627bac61ee2206d17e363d09e0147" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.8.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "ea150c9cced68769c89e054dc6030969" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.8.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "73ef3d3df93b2831b303d8140d2acc23" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.8.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "327c9f8ecfc6a3614aaa0d4bd6b86507" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "842b483385fdd85c61cd389b031f61f7" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "589bcd6e7c05d2d933d9f2af072260dc" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "d0fb60acce356036cd522f64976597a4" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.71.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "92bdbae71f086e10d637d418ee64b4a9" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.71.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "96b33b7fb19439188f8ceb42bb99f046" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.71.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "02410f9e46bee2f5776890f55f932eae" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.71.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "7d6c316f01e39118ade41c41620fa169" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.71.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a355d3a22e4c761b421f467159d84f84" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.69.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.69.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.70.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.70.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.70.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.70.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.8.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.8.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.8.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.71.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.71.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "5bb24d1b9e515bc6ccbfc0e81c48f9f8" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.72.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "811feba0c4bb4346ae5dda3351d31f8f" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.72.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "4df85d52acbfedff1415b1cf0cebaa90" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.72.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "5a87ffd9a51ba86184f0cc290a7cd192" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.72.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "881f2b9fcbc875a2a03ab793fbcd6ae1" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.72.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b70c2858db3c54388d7cebf9ffdbc90c" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "18006aa9313023c532ab840e641aaff7" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.73.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "0b47a3ee399c7063d012b269d06b003c" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.73.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "91834d1fe783fca1bacc03fae7e6e061" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.73.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "14ef62e53ed09afabe7c32c05812242a" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.73.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8241d56b0a76b363f67c6e02ee43ebff" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.73.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "529728641d400d91d8375366aa500508" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "ef87e8876282c3937f9308f382d44da9" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.74.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "b5bcce72834d3d225fc940a2f44dd32c" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.74.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "cfc7477a0ab1d0b1da17af904555e7c9" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.74.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "4f953097fc598dc91ec775bbb92d73fe" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.74.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0ea20fbd5f9e6f2904f5b790f6306e62" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.74.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e94833d5605b81ccf593a56977db141e" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.71.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.71.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.72.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.72.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.72.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.72.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.73.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.73.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.73.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.73.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.74.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.74.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "a5bcb383f00b6d0e5f4615f8cd79a90b" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.75.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "b4a7353362ef6b611a841237bfb9b1d1" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.75.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8362f6e99bb250902c336a250e013083" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.75.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "6c350a7df540ab15aedf0615954e80bd" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.75.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d1252a0040614d6a82e80b6566dcf765" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.75.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "395feadbd5c6753a65e393d731a43a4d" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "89c113f9e1ae0b976420f07657dcb0c9" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.76.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "ff7583787682692d0bd0037d8b3aec14" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.76.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d994d25f1f0abc8457707e7e20ceb3d5" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.76.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "673fdbd09d90370bb249dbe0a4fc6c7d" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.76.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b5cd03c143684775f0ff37e800427ebf" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.76.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "532babc6d43f8d454f13f5818355f7e1" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "002c7937257f755579e593f7a070a0c7" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.77.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "316227d30a8702e2df80ea41f8a4fd30" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.77.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b71343f188906aa95490c366f7448aff" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.77.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "0752aa53e7f752ef2834c1255c6ac95f" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.77.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "80e1bdd4d41372c08ae1fda1c154ab3b" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.77.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1d906c087e97efffbd778969354f866c" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.74.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.74.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.75.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.75.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.75.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.75.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.76.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.76.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.76.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.76.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.77.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.77.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "1771d7a857451caa5102fdee34a199e1" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.78.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "18b5a5965e1d88ad9b482ef4f86b84d1" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.78.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8276908cafbdd3c6290d2911950aab2f" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.78.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "3169022dac93a8c2b81221f5842406a4" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.78.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2fefe279b372be7206a3d8592211b09b" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.78.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "be1e590b7228beaa090475c457769a6a" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "6641b1a00b3c1200d7087787af4c60f4" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.79.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "0a367a4bd34c7b31ac467a30d87ca4af" }, { "dataPath": "params_shard_480.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.79.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1811d56d75a5501e805f6b9d084c7c57" }, { "dataPath": "params_shard_481.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.79.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "6dabf02ed459d276d3d5ab844bb3b0eb" }, { "dataPath": "params_shard_482.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.79.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6c14a1afddfef7a0419f0123acf9b6aa" }, { "dataPath": "params_shard_483.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.79.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2150846afe6e2caf501bdaaed620ef7f" }, { "dataPath": "params_shard_484.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "756ec023c0eebc57a130387e7c079e06" }, { "dataPath": "params_shard_485.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.80.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "9d666ca5833b4dda70ea5af5c36699ec" }, { "dataPath": "params_shard_486.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.80.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f0b9db737baec399ba5844764f21475f" }, { "dataPath": "params_shard_487.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.80.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "05830a64179ffe4e00edaa3fc4877f95" }, { "dataPath": "params_shard_488.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.80.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f25c15b16970242bededd3e699caaa89" }, { "dataPath": "params_shard_489.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.80.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "37a0f7bb0f46152c47507e1a3d14a938" }, { "dataPath": "params_shard_490.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.77.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.77.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.78.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.78.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.78.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.78.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.79.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.79.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.79.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.79.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.80.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.80.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "fd8ec90cefe89f6a1d1d10466e21f4e2" }, { "dataPath": "params_shard_491.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "73fc7086be658425e30b036d711f6ef4" }, { "dataPath": "params_shard_492.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "456a1c41205895933abeebb1eda398a4" }, { "dataPath": "params_shard_493.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.81.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "355d647c0bc6fd6b2875f6dd9ddcef1b" }, { "dataPath": "params_shard_494.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.81.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "69d178b2d8a9cb3ea9861e7342735bc7" }, { "dataPath": "params_shard_495.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.81.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "adcdb42018a5181d7cd46093ca0fd044" }, { "dataPath": "params_shard_496.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.81.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "46485b13cc7fc22c10e677896d88e6c6" }, { "dataPath": "params_shard_497.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.81.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f846339b02538a0cff51de71afcbf610" }, { "dataPath": "params_shard_498.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.81.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "fb3d26a8277c88243f54bad3404e0d35" }, { "dataPath": "params_shard_499.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.82.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "4cecfd49962fa4a5900b559aa5af2abd" }, { "dataPath": "params_shard_500.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.82.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "060d0c097a77bdbd6cdc3fef23f278d0" }, { "dataPath": "params_shard_501.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.82.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "defd719d86e87a80a9adcfe840a2d826" }, { "dataPath": "params_shard_502.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.82.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1b6471d3d8e7bd070812f4c99e70be1a" }, { "dataPath": "params_shard_503.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.82.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "34aaf8860e74e5bd91b228374a357c1e" }, { "dataPath": "params_shard_504.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.80.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.80.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.80.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.80.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.9.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.9.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.9.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.80.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.80.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.81.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.81.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.81.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.81.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.81.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.81.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.81.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 }, { "name": "model.layers.82.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.82.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "591ef88665355a9ffdc9ecfcf5ea0ce3" }, { "dataPath": "params_shard_505.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.83.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "e2305f7af1f6ea33451c52607a70201f" }, { "dataPath": "params_shard_506.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.83.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ad6177b24fee9cae678c81414c5342ed" }, { "dataPath": "params_shard_507.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.83.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "08621a2c02db5798910f26be42b37f81" }, { "dataPath": "params_shard_508.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.83.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2575201363d1beca17f0cb3e226ddcb5" }, { "dataPath": "params_shard_509.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.83.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a18aa96516b3b8713b5e9f10b4e3f72f" }, { "dataPath": "params_shard_510.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.83.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "7cc4830a4ded2ce118f5b86a4b2b4984" }, { "dataPath": "params_shard_511.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.84.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "f50f9b1b2358891d6029a6de972a1020" }, { "dataPath": "params_shard_512.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.84.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9fa1aa70b0104ad89098503fe2b78dc3" }, { "dataPath": "params_shard_513.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.84.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "c6e55a7a41d4adec0ffb158a14c85002" }, { "dataPath": "params_shard_514.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.84.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5cf9890f67194487bf4526d09e3dc179" }, { "dataPath": "params_shard_515.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.85.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "527f1db05809b35b91fb41041dea3995" }, { "dataPath": "params_shard_516.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.85.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "889c23772eee6dc8d6ea66278c6078b2" }, { "dataPath": "params_shard_517.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.85.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "1b07decced9a97572da2bbded4f6139d" }, { "dataPath": "params_shard_518.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.85.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c3dd0df8236f9761829143602c544496" }, { "dataPath": "params_shard_519.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.85.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2692c0ed932d6bbae5f646855d6496db" }, { "dataPath": "params_shard_520.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.85.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "7532b96a1af5d74057e146818e876abd" }, { "dataPath": "params_shard_521.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.84.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "fceb650b8f0f453c3fd7e46b93b8c88f" }, { "dataPath": "params_shard_522.bin", "format": "raw-shard", "nbytes": 33342976, "records": [ { "name": "model.layers.82.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.82.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.82.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.82.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.82.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.82.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.83.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.83.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.83.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.83.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.83.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.83.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.83.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.84.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26772224 }, { "name": "model.layers.84.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26780416 }, { "name": "model.layers.85.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26788608 }, { "name": "model.layers.85.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27837184 }, { "name": "model.layers.85.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27837440 }, { "name": "model.layers.85.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30196736 }, { "name": "model.layers.85.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32293888 }, { "name": "model.layers.84.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32294144 }, { "name": "model.layers.84.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33342720 } ], "md5sum": "83797c11e4a42d55303d3b1421a51409" }, { "dataPath": "params_shard_523.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.86.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "7e69a4419a258c510e0f3b527048fb26" }, { "dataPath": "params_shard_524.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.86.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "86f49b3138d00749d6d011b8e416d5e5" }, { "dataPath": "params_shard_525.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.86.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "d9aff5cbdc5959e3c1a95a1ca5537f70" }, { "dataPath": "params_shard_526.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.86.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8717ec9f8916aa33c78d995195ce545c" }, { "dataPath": "params_shard_527.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.86.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "dabbaca286bb2cc5a31e3eae1ba11a1e" }, { "dataPath": "params_shard_528.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.86.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "1dc7f57d09c5e505877ccc9d9b60a0e7" }, { "dataPath": "params_shard_529.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.87.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "574282f97aaa9029d5b8e7f890a80456" }, { "dataPath": "params_shard_530.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.87.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "62f1216382576f1eef0e4812b32deab3" }, { "dataPath": "params_shard_531.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.87.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "8d795cd54ed3c0b47660d9b7ba6e36b3" }, { "dataPath": "params_shard_532.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.87.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "fb3d65ece1e9eefd7250f5ecd93be8cb" }, { "dataPath": "params_shard_533.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.87.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "111c591351c7021ee5b2ac8a71bf29ff" }, { "dataPath": "params_shard_534.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.87.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "ac153a107b0d361c47d8227649a9bbd7" }, { "dataPath": "params_shard_535.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.88.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "aef53a8787247586bb31bafee7b0b48b" }, { "dataPath": "params_shard_536.bin", "format": "raw-shard", "nbytes": 32294144, "records": [ { "name": "model.layers.84.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.84.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.84.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.84.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 }, { "name": "model.layers.85.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21233920 }, { "name": "model.layers.85.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21242112 }, { "name": "model.layers.86.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21250304 }, { "name": "model.layers.86.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22298880 }, { "name": "model.layers.86.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22299136 }, { "name": "model.layers.86.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658432 }, { "name": "model.layers.86.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755584 }, { "name": "model.layers.86.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26755840 }, { "name": "model.layers.86.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26764032 }, { "name": "model.layers.87.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26772224 }, { "name": "model.layers.87.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27820800 }, { "name": "model.layers.87.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27821056 }, { "name": "model.layers.87.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 30180352 }, { "name": "model.layers.87.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32277504 }, { "name": "model.layers.87.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32277760 }, { "name": "model.layers.87.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32285952 } ], "md5sum": "350459cd425eeda6700c14980076af78" }, { "dataPath": "params_shard_537.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.88.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "e55d3f4465167b577f06b4a9940e3a95" }, { "dataPath": "params_shard_538.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.88.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6da165d5ab02152d088a412bb11cab27" }, { "dataPath": "params_shard_539.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.88.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "68af7eff4b93209b32d8873454f52928" }, { "dataPath": "params_shard_540.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.88.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d216f3fcbb2b4bf1d0d479b3b9ea342b" }, { "dataPath": "params_shard_541.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.89.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "05ed46a801c80866397864c969793948" }, { "dataPath": "params_shard_542.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.89.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3fc5cea2ea44dc8f2e8893a78608f254" }, { "dataPath": "params_shard_543.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.89.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "7c07f682d9e574a372e8e94a8fe9bf48" }, { "dataPath": "params_shard_544.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.89.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "bfe23babe45c8875a0ed306691c79188" }, { "dataPath": "params_shard_545.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.89.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "f280a508fb7c7cbb7c4a52f94ee5d00b" }, { "dataPath": "params_shard_546.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.88.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "1299e32b34a3e80aaa76512dfffef273" }, { "dataPath": "params_shard_547.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.90.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "81b9d6888588377dfa5e13eb77f179e0" }, { "dataPath": "params_shard_548.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.90.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f6d1662190f1c0f6bb283a691d84bead" }, { "dataPath": "params_shard_549.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.90.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "80251503662745003e4910a456ff7b51" }, { "dataPath": "params_shard_550.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.90.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c463df0d45c452cf031e89880fec1eec" }, { "dataPath": "params_shard_551.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.90.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "41f8a593031e30d568c805253c4584a2" }, { "dataPath": "params_shard_552.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.90.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "daadb86da4e59e8472e80ed71a4d6935" }, { "dataPath": "params_shard_553.bin", "format": "raw-shard", "nbytes": 33326336, "records": [ { "name": "model.layers.88.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.88.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2359296 }, { "name": "model.layers.88.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2367488 }, { "name": "model.layers.89.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 2375680 }, { "name": "model.layers.89.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3424256 }, { "name": "model.layers.89.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 3424512 }, { "name": "model.layers.89.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22298880 }, { "name": "model.layers.89.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24658176 }, { "name": "model.layers.89.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26755328 }, { "name": "model.layers.88.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26755584 }, { "name": "model.layers.88.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27804160 }, { "name": "model.layers.88.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 27804416 }, { "name": "model.layers.88.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 29901568 }, { "name": "model.layers.89.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29901824 }, { "name": "model.layers.89.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29910016 }, { "name": "model.layers.90.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29918208 }, { "name": "model.layers.90.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 30966784 }, { "name": "model.layers.90.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30967040 } ], "md5sum": "b2f819debd99fe42fa470bbfcb872a6d" }, { "dataPath": "params_shard_554.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.91.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "c45c3de2f356bc80640ba6a3326cd605" }, { "dataPath": "params_shard_555.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.91.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "753a7bc9fb489fe771885d40e6ef5e2c" }, { "dataPath": "params_shard_556.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.91.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "803484fc0443df1fc70fd669290bdf25" }, { "dataPath": "params_shard_557.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.91.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1119621dc43c7f39e6e9f6aeb2221331" }, { "dataPath": "params_shard_558.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.91.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "85f1fb09bd3c74456391b2ea41c56ecd" }, { "dataPath": "params_shard_559.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.92.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "491651e433fb36906d50eef04d78bf4d" }, { "dataPath": "params_shard_560.bin", "format": "raw-shard", "nbytes": 16777216, "records": [ { "name": "model.layers.92.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 0 } ], "md5sum": "882fe900b62a682f15cb4bf5ca5331e6" }, { "dataPath": "params_shard_561.bin", "format": "raw-shard", "nbytes": 402653184, "records": [ { "name": "model.layers.92.mlp.moe_down_proj.q_weight", "shape": [ 128, 4096, 192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 402653184, "byteOffset": 0 } ], "md5sum": "9263d0f119930ef96feac9f9862d83a1" }, { "dataPath": "params_shard_562.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.92.mlp.moe_down_proj.q_scale", "shape": [ 128, 4096, 48 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "88770d9696e2962bee5afb8de0c8b6dc" }, { "dataPath": "params_shard_563.bin", "format": "raw-shard", "nbytes": 805306368, "records": [ { "name": "model.layers.92.mlp.moe_gate_up_proj.q_weight", "shape": [ 128, 3072, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 805306368, "byteOffset": 0 } ], "md5sum": "db9bafb9887fd619e67e4e1c707098a2" }, { "dataPath": "params_shard_564.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.92.mlp.moe_gate_up_proj.q_scale", "shape": [ 128, 3072, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1745c78e8a5ddbc0d851b9db2b5f580d" }, { "dataPath": "params_shard_565.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.93.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4bb944adcaa58c8b2c753162f945d861" }, { "dataPath": "params_shard_566.bin", "format": "raw-shard", "nbytes": 33080832, "records": [ { "name": "model.layers.90.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "model.layers.90.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 2097152 }, { "name": "model.layers.90.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2097408 }, { "name": "model.layers.90.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2105600 }, { "name": "model.layers.91.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 2113792 }, { "name": "model.layers.91.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3162368 }, { "name": "model.layers.91.self_attn.c_attn.q_weight", "shape": [ 9216, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 3162624 }, { "name": "model.layers.91.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 22036992 }, { "name": "model.layers.91.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 24396288 }, { "name": "model.layers.91.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26493440 }, { "name": "model.layers.91.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26493696 }, { "name": "model.layers.91.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26501888 }, { "name": "model.layers.92.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26510080 }, { "name": "model.layers.92.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27558656 }, { "name": "model.layers.92.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 27558912 }, { "name": "model.layers.92.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 29918208 }, { "name": "model.layers.92.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 32015360 }, { "name": "model.layers.92.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32015616 }, { "name": "model.layers.92.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32023808 }, { "name": "model.layers.93.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 32032000 }, { "name": "model.layers.93.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33080576 } ], "md5sum": "874098213ef8f8041fea24116e1632ad" }, { "dataPath": "params_shard_567.bin", "format": "raw-shard", "nbytes": 21233920, "records": [ { "name": "model.layers.93.self_attn.c_attn.q_scale", "shape": [ 9216, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.93.self_attn.o_proj.q_weight", "shape": [ 4096, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 2359296 }, { "name": "model.layers.93.self_attn.o_proj.q_scale", "shape": [ 4096, 256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 19136512 }, { "name": "model.layers.93.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21233664 } ], "md5sum": "6dd44a3bc4f69b2c9b322e003ca5d6d6" } ] }