{ "metadata": { "ParamSize": 325, "ParamBytes": 3631664128.0, "BitsPerParam": 2.6739310072364444 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 211365888, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 211365888, "byteOffset": 0 } ], "md5sum": "74c693f00b8f2ba35f34247ccef52613" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "9e29f8b9b20c9f07483abb746d594030" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "881806b9f10c44c7afb0b67d3891694f" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29369856, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 26420736, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26420736 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 26428928 } ], "md5sum": "c0713d8a7d8611b57dc353e4d43bc45b" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "7dac7bc148bf1a4b5d78ab826a874a43" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "7c437c0095157449edf3d53bdb5fc09f" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 5914624 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 12664832 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 13508608 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 23633920 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "d8a2d4243e437d4e9de05b08f0d72fe4" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 211365888, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 211365888, "byteOffset": 0 } ], "md5sum": "d4ae5780baecc9445e48e502f4866e59" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 26420736, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 26420736, "byteOffset": 0 } ], "md5sum": "ca49e6f3357151c10d2c1141d3513a32" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "4c2c5aae19fced25ddf642a97b9ce137" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "96e94f66828327db30cc35bb3218c315" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 27856896, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24907776 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24915968 } ], "md5sum": "ce5294f0e1e403dd82d542666ce3264e" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "b9138567609334a749320908c3698a8f" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "46ac599ed73039141a8b995585846b26" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "bf880c2d5a3b3419a44df37707948854" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "d4c47ef37b73a0feae4f674b628a3dc9" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "e0e8dfb252f81ff1e601c2e111771944" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "c47a3457217c3239264280a2796af6d7" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "8821de14848eb7d398f578cb56af4eaf" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "08eca2331dc7cc4a77a6ef9fea5ffb3b" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "090c0d142fe58b663526bc45584e7b39" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "a4c572c8d0f9b21a1bf228fb73cede0c" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "39941071c72909733e067ab8824eae60" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "f415d56f93a4232f9829e0f218ac9706" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "bdae284530214727554767de19f2e11c" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "7f09fe75ec8846e0e6a70f8e2cf8cdd5" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "cdcd409de33deb42a101d26f37be788c" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "7dff571b70f7790329fefd5d6d97c4d7" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "47cf53b77720a634f93c1f12ceb54f51" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "606e75b1c355099ed953f029d3932e93" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "ba469c33dac3453515f4643d5e000e13" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "e397d9373ba1103094402a20eb2377e9" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "5045ba8734af9590de4df9d11136a621" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "4d8e1d1ad4b7f0f3064e02d6aaa1dbbd" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 30806016, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 24899584 } ], "md5sum": "f71b107643d6fa289921be823a0a5232" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "57977ed3aff8dfd879db32229eb3f3da" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "91b3849df5511b37a7176579a90445a7" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "a28689207a72932a90b2ce4197647d01" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "5bc4a9f6b85d7da8246ecdbd48f6a49d" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "eca886ed37e879ecf06e3a907f0b119c" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "dd142687b34920a58784140978d957dc" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "7dc3c3e67d95156d0d867da5f8ce43d2" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "f161a16b7ab6656374a1e03091f31f01" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "f3299495c581a84986b3a4964cd51069" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "fc5cb5c4ed7a4fc455152e78927fb438" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "5327084916b361ed309a5d25b388e78f" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "28740240f4657743e7726ab0c76ffc49" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "ff39295e4119ee63bd02b0b68563694f" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "e9152605ef87d34ea13ae3f8b3a26011" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "a41cf6329b1f814cc8e9f8c7801aec0d" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "f4d59599390dd599e590a0a4eade8a3e" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "dd4c47ef739dc57fb432c2aead9a3626" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "6e64e5951fa6d7c5206357302e5be61e" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "aaa6bb1e9ae719587438c429ffbce784" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "72f1ad86d26669cdaf160a4a383083a8" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "37f44f491a96d629a75f0955db0a67c1" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "e8eb1a1b7966474fd4d2715839fb3397" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "41a5b423a85d93d41c79efdfb99df191" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "3a63b974356eecf8214afe31eb0b15c1" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "e2fac52dfdd18169c7e57ac62686ae9f" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "12adc9e42cc91a8389b5f9db26292f88" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18984960 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 18993152 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 21934080 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27840512 } ], "md5sum": "a6b8a929b2243f1b9f183c1069ff622f" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 30375936, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 10125312 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 11390976 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18141184 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 18984960 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 29110272 } ], "md5sum": "dde45d7c917c68446f725aa86102aa5d" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 31129600, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 7602176 } ], "md5sum": "3a786a8b38e1ea43bfedf4bd45328af1" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "08ff017411d7934f2db37f09682ec928" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29425664, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2940928 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2949120 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 2957312 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 26484736 } ], "md5sum": "a9fa4477fbefacad0ced3431fbc1f8a5" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "d94c7c21b56fb3c15381b6b515bc811b" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "1633cd176a49763f7399b91a12b3ba21" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "e298b5ed2d2ed68882035b3b62a7cb94" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "3f8106fc02afcda677e9dc9098b39176" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 32391168, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5914624 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 5922816 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 29450240 } ], "md5sum": "b2133b7461f057761f5fe637183b531b" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "a71983edabc5c0dd830b7df7147c6831" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "dc98a14fd0629c44aa455e00880db306" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "b5c307131b9a6c90ef422392dd4b0694" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "31b410ab5c7b2a0af0d9f64d08979095" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "39a63620221823514390b4b6fc8d77dc" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "0ff9253fd17bd3eff9482f542c7dd976" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "99fd896efed7299043a7ae37b01605f8" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "0b4badab19412a6d9f9dafedf34b493c" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "de937dbeafe1cd32f51c61e58ecc664d" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "6cf17f449e196261c03cc6e8e7d661b8" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "d2b74221435345ade17ba884b8f176cf" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "372f18d368b6c34d940618a12de64bcf" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "994e6d137917c31a41a2bf1b33ee8987" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "ded82d26bcac1fca9cad0848c612e77b" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "7765ec26c24b2fc584cb5e77f0d1315d" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "723a3d53c5eda451a0d9e01eea33e614" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "7343bf43372d68951ebd2390dc327500" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "169bcf5488214d6384e73d968586bb22" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "4943cada5e2af6d7be0427b85b6ae64e" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "0da5039a9f4aad7e43d3f36ceba1fef2" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "2b8ec535dea52a8705d6fe6084a2ee74" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "d57acf76c316b0164705e807bcf0e840" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "a602461d6440a1357d5bc82e71b5e72c" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "9bdf3221e36afd5be72197d4664f9c9c" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 23527424, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1436 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 23527424, "byteOffset": 0 } ], "md5sum": "6a0a2df8b68d3d69ff461cc32f1645e1" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 47251456, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 47251456, "byteOffset": 0 } ], "md5sum": "5fa117291153a2ed57fbf4f0c0d21528" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 27848704, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24899584 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 359 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2940928, "byteOffset": 24907776 } ], "md5sum": "c4521db6603a9dd4fe1aa2be6e8fd720" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 24899584, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5906432, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5906432 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10125312, "byteOffset": 5914624 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1265664, "byteOffset": 16039936 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 17305600 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 24055808 } ], "md5sum": "4fd5c98a121caa707605b09876c566c2" } ] }