diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,12543 @@ +{ + "metadata": { + "ParamSize": 805, + "ParamBytes": 38801408000.0, + "BitsPerParam": 4.353045149919394 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 131137536, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32016, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131137536, + "byteOffset": 0 + } + ], + "md5sum": "f68301ef46c99536104a3e26a2f2aafa" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c129ac4f2894069da009a1f4d017ed1c" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b2ecd90c25c41e387ac5f9ea00c58e30" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7e3f57b099f49bc47e94b6a3873be32c" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "68a49e5e6d583f4e2bda4820a6f146fd" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 31105024, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32016, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16392192, + "byteOffset": 0 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16392192 + }, + { + "name": "model.layers.78.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 16408576 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31088640 + } + ], + "md5sum": "48e737fab525b156c7d560ee411690bd" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c0de3fdd56ff785bdf4c9910a42d9916" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "77fbeca47d7ddbc9ffe40cf41ef70c85" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "82ef37febbb21b181375548562e5592e" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.79.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "437008b8043d1ddee85f9dad997e92ad" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ac7ef3cb97777411324663f8a7548113" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 131137536, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32016, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 131137536, + "byteOffset": 0 + } + ], + "md5sum": "076b005efa5749ad67e0326b15fabd4e" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 28360704, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.79.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.79.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 18907136 + }, + { + "name": "model.layers.79.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24150016 + }, + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28344320 + } + ], + "md5sum": "1911ca3a258714e6830abeaaa488d790" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d717539cc3e8a3acc1c5735e4bfaf3b7" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "19d6039ca4f5e1ddfff29e48fb18fab7" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e67d70fb6a1b0956171315007fecd006" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "bb2e7bffe749155852017d7bbfc7dc06" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 31105024, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32016, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16392192, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16392192 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 16408576 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 31088640 + } + ], + "md5sum": "bbadf7307ef0cf03ba2fac7cb6a14d43" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2c2e70b8d58e49adbde5e65f528433c7" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "30f1eeafcb0896ad0eb6aa8dcf6098e5" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "0b7d8072665c117c77d43ba12c05ce08" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8cab1e996e59c6e05f59f5092f8d5454" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "45c76ce0f7c7ea3af33a5fc7c77ebbd3" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8a302dc27fe5c90ff76a9a1b9b830976" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24150016 + } + ], + "md5sum": "edc68eecfa137a013031079635241472" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7fc8330fe1b87104c32642abe6a41761" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "023becebf0dc7962df865f96b8f38f0d" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1cc93c6704594b0a365d9359fdf2342f" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d3c3c9753be335c001e3cd19aaccca1e" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "46c06e078dcc2f6503740c86d1534376" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "29a9f71dff3dfbfb98eaa360083b5c5a" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "81fbbe59a3a60dee668c8976eb3c6ad6" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c6a159b36ca7812a5f9757157da7dd9a" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 28344320, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 4194304 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 9437184 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 13631488 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 13647872 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28327936 + } + ], + "md5sum": "66f8438e5cbb0ce69379471734876f77" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "10e7ae1edec3903e4e57801dd18fe6f8" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "370dbf384ba569553133a5ecacd35925" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7035cff8362986fed36165a95f55d00c" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "73b4ae179f56bfd77a0c1ba56bb90f4e" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "70ec2c3f2ec1f787b5fe416386d2c3fa" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c6f5f01974a759cc8867fdac2f7b2a59" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 28360704, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24150016 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28344320 + } + ], + "md5sum": "ed1001ab2f5ce2b664686dbdd244e2b2" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "cfce0f87d63de99cdf162d1a5a8a0af4" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6d593e6b1dc6a93a70670afb750dce50" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "55f5e09e0adf25c54b924d224d85cf8b" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "84902848b7765e11e120a792fc7e25c1" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "8c6e62f069cbfe16e1a9bcca0fca4571" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "610eecd49d9ef4de0c84d6094a14a2b7" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "179d0011a19fe6396366e368eb8da854" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9cef4b49724acd5911b3bab1cc714fec" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7582fba33e880db99f8cf212fb2a1816" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "85877e878ae29cf3ade7575099a9a514" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "f1da12159685da1e6e669664e0424ceb" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "b81eeaf4a9797a37bd4a4223fb96acb1" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b883ecb8d2f932c32cf961807e93a72a" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "be1b7fb378573ce4edda2072d1213d0d" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "38896edec1f933c3607c9493f2d41537" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "aa6eaa9c7b554b843814f29952e4c585" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "6e10852fda4fc6c7af706ad34c395b32" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "91c9768c7e7111a199010a9a36b8a9ff" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ec96f4fe136472a6877fa84d7c1a0d33" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "943b08fd24a632ba29721291d5f67569" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "25f4eb715132243eacc7e1547ff3ee4c" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "110ca1b2f10b28bc34da3261c58a3d81" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b05f79b3e6d183bded9920a2d901dc17" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "a799610f07ff2df672d6b82703676c6f" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "3c9b66aa262bdd2158b4c3b992cf5de5" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "5ca81f933a218c08e091af66806ac686" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ea8fb3d22a19d95ce4af57a563e41668" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e6ffeb0fa91752ba4893665010cd2d6e" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c81b3bb60a7892b72ba399cf3f8b16e6" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "1d8f004b922e8700a8bc4394157ff221" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "cae9345fc6c31864835eca3e2200713c" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "66bdd12d1f33f3eb07c7a5d78adb3adb" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3c4a27bbf4cebf692d347dd448181c12" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a0e34ada0bdfaa7f2b59d12715cb3805" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "34e22e4baf35bb0c372cfee7d741a08a" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0ba81c19b12846747c292a254abb09f4" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "56380ae00d1681ab2d08651365fbaa7f" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "574b306835262747979eb1ef05a515d3" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "5e8916f6144a289fbbf63ca5dfa350e1" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6fd11765541107afa5f4fea59254df7a" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "05860c8fec6d9778af2bd88f4c999c42" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "cccf064ecf16760afa8695972ccceff0" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "51703154a5e5ef9261354beda3849aa2" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "50b5a6b46488e724c5caba6a831ee9c5" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c422cce6b433e459f474e2d2c5864436" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "31432dc3d148effa17fc8338868664f7" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "5f21838cb200ccfdfb2ff8ebb7819eee" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "dd41f6ac34275bb9569da054ccb18956" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "72b680c25aedc0446a55193014af5a1b" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4e2b2e5379ccbd9ccb0c6451e7342ad6" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "276f45d5c9d4dcf334e3ab7e0c13884e" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "350a60af172107d86b196f60748426c2" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6de4618e18aec308ffda9d3a0acf7e4f" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bb21a3b3cfb7957686573e39d32dc1eb" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "dae9aa5c2cbff8ce864ea2e8396693b8" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "ee9cf56e3f71cf12c000490ddd946f35" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "bd060cf9299534c6b671d877d1d7c509" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0534ff466d9aad058d5e53dd892f47f4" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e592eaf3be0b08ea61449ebaea039cea" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "c0d6800256a8283f4a19806d3b01e6b9" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "2c6f16a2b173450118326c9e7ff4b1b5" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1b9b905ca02f5d0f59c83b42ae7b3a26" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "028ceb80ebfb8b288346bc1b671ac564" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6c14f43932ce325f5bf506b0747c5cbd" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "de2bdbb1c441574a6a51325471a2ed57" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "700989761ad7cdd1d0f11cdbb964dfa0" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "9ee25fcbb8fc7acc71f08fd5a1d80546" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d9b990b6d3e30ddd6a5e9f4144e27113" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2361e06d35953c59ab47e4edfcd36cec" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "41105ec883671430eff7d4603df500c5" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "df344061ae4a36e28a38a8c5c506e50a" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "163c407019736c958cdad257b59eeb94" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "a987fd31e12f2ee2cbc63614f15bfc2e" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b5600fb3fa4cb4ae8ed8a137721796b8" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "6d3ca717481db7d28324d2eb4fd04ebf" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ac2eb6048089874d423fd7a30e5df18c" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "4b35f08f837edc73c8b449b88233b8ca" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 29409280, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14696448 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 14712832 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29392896 + } + ], + "md5sum": "cc9d4e85af3a6a3e63b35e2f8d8ce988" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "823981a51dbce9b57cb46e4e0c55ce1c" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c036d4aafd33a844c5ac1c46188de657" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "090034cb8c969e56fc06c86d12ca1758" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "afa053773584173882bf8b2419b6de39" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ba7209cabf148b1270d46655d1ac69e5" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "25635700abbfd9c80e514d130b5d2f05" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24150016 + } + ], + "md5sum": "f54882d7f8ff540ff4438cbe4be116a4" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "81938d589c96255d3c32646d16f73b56" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5f0c2b5bcb15908cfb296ec4c2cdf4fe" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6c87b4f54dbd17921ac656ba218762d8" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "3291acc343d290fb922be6226b19916a" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1cb0100e3713fc25ef72c7404ed7d433" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8dce0503fe55fc13e6b54ec1b6a97722" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "fe5b90f840ce58f7fbeefd58ab4f4aa1" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 32555008, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 4194304 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 13631488 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 13647872 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28327936 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 28344320 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 32538624 + } + ], + "md5sum": "a8051c9a5bf50fac301b7eb2d36b77dc" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4c289e280b1f443949d7e0629ccb9c42" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b1507f9fbe2975e4a24b0b7f1973a18d" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2aa28e8384986800bbb000f4ec2cc446" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7f475534a546b1026442007a8fd79f9b" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "aab3ed0637cacab88e59250f92c87be2" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "f1e3611b1b5a53f12eaff059c1720645" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d2ecf6daf32929a04c0567f6dfcd22e8" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "082b77e75e2190ec3ad202ddb1f5432b" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "da9c9d240880469e8f72c32281b073c4" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2d7b39840691a76f3034254402b7a702" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "952a9db53715e21826299c7b66e3af3f" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "44fe9a612a0eb139f49e7d68bb0bb600" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "12399be75795eecee8c395e3795bcfa7" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1579386faea6e7c3b3f535116b622b15" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b0911b7c4b28279dc02665bec59efdc0" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f77c515962fcb775d573f1a56a2c286b" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b8b868441c9bdf7b022ed1b892aadf84" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "045b0cdb2bf98b80bc61e1f0d90bf8ea" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d4a18bd2575775830751ed79c2f8dcbc" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "8d17beb605898bf453fd092f629b52c9" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2985113eb7900f4df0e31de195ff2aa4" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "171f58347780ac93af5403aa3f1d1fca" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "396c795504a1272adc2ebb102d4df5f3" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "da256d907e5476e8ae80a2d473106775" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "65361f546d26c53fbb46578d4be01337" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "59e719fcf8051556fcea6d3eff9583f1" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "3b5230bf65a13ff7c83ff3dff46c6c29" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "fbc64e5fac572e5786ca96335ed723f5" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "7956bc930175de7c2625f60027145914" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "12aa10852958f8efdea05a3b1d7c8537" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "5274da442ac09b8f11fe140524ff0402" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4b9511e12213cb7881b27ac2187b5c3b" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3e0b87f487baeafd90648b0d082f2977" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7134de978308803a5144ea7b1ccfdfbb" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6b7668634d6b5741e5a52d2832a71120" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "4ed50090119ebb82167907e272dc5e4c" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "6cf722ad7083174a18c73f2d5bbd9092" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d4b322daf6bffde3bdc83a7f27156b94" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "45aa23c7a118064051809e4acd0f34ab" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "33979a22e977211db2978e87cf4accb5" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "00aa9353f1b7a320edc28d55fa718d7e" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "361396e22479d49f49f9391a556d700b" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "bdcaadb215260cdcf645a8a1c357af13" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1b5ea99e567be421938f27d4f76f381d" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "bd65e6ebadeedce6ce4c5f5530010195" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "778206e9fd2d1baef8f1b6f9bcdd9358" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "11adeb13096377cef6349609e32a32f7" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "10f17005698203907ea1388d80bb9589" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7e421bc35c29cc4469ce73e6a749ba74" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "eea49419984441757d5676694b7a2f86" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c4681bbf681f0254a9366f927a215a6c" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1ff6d7e63d79eff3085202507db63e13" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "7127e4f8aef58853409c492ee53e5405" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "d2d8dbef5a0dbe57c603afa755e8a0c5" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "1a19fd99ada7a2a63a6853bf9fa4fdeb" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2fb8cd9171223197c120b6163cd327e5" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ccf2d4ad95ecd319901bbe62cd4dcdf1" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "339daff3ce9192688f02d32907842ce0" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6ea21499119811f373baa999e336d4d0" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "13e50edb95a97582135f2bb9e0472994" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "d6211522dbc27fe8dbd6768e279854fc" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "38491ca80d2d40d485b672e119875f83" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "742d9b0ee08fc8f94c4046f957fa16ed" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d5123be020e30334134c14c1eb2db67b" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "f166b261af5835fbd6d6a3ea7dad0ac9" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "bee0b14a203e8f4bd0fb0af0087feef9" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "eaaf1c71c5fbaf9bdc015e0ec6ce521e" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e8aebad0a8455d11c7eb847c841239fb" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "09f04e0bdd685d4d53910a51b251f4a2" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f2f33daa04eef891d8b9dd74c2425f5d" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "4e09b0b2f9bde70e29383038dd6dab9c" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "0618b30f0f7a24486e184e321f7462e5" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "534befd8538670b7a37066c2d5c95dd3" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "badc319bdcc94e03c0d2c69f373ecb05" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0ab10e976d25646ec4baf5ec251aa284" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9bf9fbb029db24886327abfca60b8eb6" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "cf5a7c8732f2764ca0d015ed3c26d202" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "dd62dd7ae2e663ce08752528a9e9c44b" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "2894b1a59f58c334e47b5c0f4ff836f5" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "1b3e0ee8220227a2f6980aad49af97c4" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "f53458e6c8156b54033ee375b6573bd9" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e6ecafd7d4e4e041b4b22d769ac357be" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "23bf9e128fb516a232577ff738febcb5" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 18907136, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 14696448 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + } + ], + "md5sum": "c13b98f5e4c06030b193d0ab6d7df840" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "ebf0fb05e20c6e992450f38d7a8be53c" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0d894cdf7b2d21a9e6b308a4088673b7" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f2ef77e54c11cdb1d712f08a8e7d836e" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3535690fc54837fbd4c09f8b8148343f" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "76ad20d28d3d76ea50fafdfdc7e64a7f" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "f2e0cfa5a896a5c0990d277747693c7a" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "f8f5a2f5d628b322254a6c21baa4abc8" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "79e65dc552352a1aad3a378b629ed914" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "abac8cdb2394eabfccdd3bd44316306a" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b02d55d5b30ca382b39b4929bb81760d" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "edecbb1a721cb1611685371b6b3d389e" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "42c202cb34044a35bd220879b804f1f9" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7c097c350085ec8e84b2fa6a36caffcd" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "eea4d5548857520c5e8150a772d05dbe" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "29c6fbe588dbe392266f92eac521f7b6" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c384f1967dfa24f11745e220c0fe532a" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "3dd380c40c03fedf338bae764c96df33" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "5c08a9332c44e8172124f5c0c7a53c4e" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "ca5efc925bf0b10cc380afc010f31288" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1b4c2d5e6352b1af71f19248022cb8f9" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8cca38def09e092d2a905a353930b012" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3d089a9b77d355a01fe889f5efdc9c82" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "94ad9cd55a427f04443c37d865fd9e2f" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d27f06c75bd618e6455a673b7eaf383e" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d3adf70c7bf02e229b68e7a960f2add2" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9e383f71b2da8370b08393140ed7ee1c" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.40.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.41.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "44835158f1e5202909c869fa213708a5" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "792df725935c28ebad05c374d98e221e" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0dd5257332a397da3556af4a8f9b6c17" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "bf7a7c1ce084c81e99c73149106c6c88" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "e14eff2b156d94d5a0e671870fcf4335" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b89316616a0ddd4ac273d922ec1d6f44" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ddde9a430cf85536dc743000038b7eda" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "64fc405f6bda6c3316e64af45e424956" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a2f4340734f38722a2c2a8c6d1916df5" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.42.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "83c0d8918d6f6774e804d3f38883a13e" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c8be9911a21d30e0ee22b29ab40a7b61" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1b6c0774f86c9ba488f9c55b8086a199" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "86b4080389dbd902fb0e9284a3a555ec" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "459ca3065905f304f1f21329b7331db8" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4e276d8b09236227a32dbf68ac87658a" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0957bd974dfe9b177305a32accafb330" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "97aeaa68888054494325cd5acd965e4f" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "681e7f52cc807ba9caf3aaece0e9bbdf" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.43.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.44.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "26c0cc541b161c9814c00ba5d72daea1" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "98b8ae3d69ac7bde41fb2adba15fbf4c" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6c7c953a524f7654299d0bf9b0acaa38" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "68267bf6deae706ad2a93209b735feba" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7554b24898db17e7f518e8ebbe298fff" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "76202a3a2dadf3860544ed051421fad5" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c5f4a68e8f90975ea88d32b7e5d1d363" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ec1f3a78fe24361ea1b07c37f3b7c8bd" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "19432a04b85c11701359469845da9a43" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.45.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "4d3704f02ca3c78ee5c9053075e01142" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "73888af2985f9165c301695dfa44d50d" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0c9636897fe543fde5f8b166e264bbf5" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.46.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "25ecf7eb7a178b83adbd61e1843f4624" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c0e233c371602e6a460be84ab5327b76" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.47.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c4a6f9f0ec5fd948adfb7a7b8e3cd64a" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d677d089c9d938c3d15aa37607c80244" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.46.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.47.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "0f73745f5e1d67355c11293340d85f46" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d9d12ffeb61912868bc90ba7fc7d6554" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "0f7c66fa6c5104564cbf16bcdeeaaf3e" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7aba5358043bd122aab2d318e94060d6" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b6340bb3b56c3f7db376253b40171244" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "8d0a9d492414b201565cdac55f41885d" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "027557c1faa6cf510ef7d79453a51944" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1a0c6aabe2f585d17f61abd75d6b77e9" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.48.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "34d0c35c13773916db5ec038d04b97d0" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6e46949b91bb29440ffe3f62c4150285" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a84792956bde9cefe6ad447af8363fff" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.48.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "c03edeb9fc7643eeb20eb8846606e208" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "0cfba93a6f76de5f5062eb025ab90bfa" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7996d4ce6343983a8e5db69358cad335" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.49.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "99d56fb203a4dfd651930887442f0468" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "db39c08dd269241cd9122013a1440847" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.50.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ddd7df9ee041953a6370c64096fe585e" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "88fa040f0c5f8bb59ce780249644d842" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.49.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.50.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "4c92abd7c3ef3324f3a2279635bf6b95" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4f334421289384e5b0ff151187ecf1ce" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "09f03342ef2c5c080d19e65cd377b2c7" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "43d7d31f1fcabe701c87deb2efdcd752" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d7ca7688d2446b4e14a0c84fb17ed293" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "20e438d0c6aff6b33856a13fc4f89121" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "cab942531a8f36d2be4ba9e333846fdc" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 29409280, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14696448 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 14712832 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29392896 + } + ], + "md5sum": "da9a283684efbcb0480df94d12452ea4" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6c700a1e80c8500ff58cc010014455c3" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "3e6f7e6c53b79024c8b45fab7285c789" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "606cd2aaa2fe7d38a369e58a4f305277" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "360463d7e0c639c3e918651c54955490" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3e014b6f7bb72fa311900c5c2018e919" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e2452df6e6bbe2b9a0b03aff48a73dee" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24150016 + } + ], + "md5sum": "c35197eaafcf4655ddd571f12d62e108" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6ee1475e493412cfe7563b1130479143" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "36933659e3d98e8fe2857751e6e4ad3d" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "5cc619fa1aa6d5b44e981347eecb7a44" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "39c561402da1c2cd8ea37643d2c20a11" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e9ff5d4d80e5fc4af4938870fc7db44b" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e3ce09abe9061804bfa686f15e990bd1" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 28360704, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 4194304 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 9453568 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24150016 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28344320 + } + ], + "md5sum": "65da55ef23cbe203d41b6ba0b9256a61" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "82b288c3572dfb5ea84e27ff322037d2" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "15a4dc55d296e60176cb3355f90a3cd6" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.51.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8024db08e975b53ec8f77e5384871c3c" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "dfce8243056d607f81a90d2860e83d75" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "80aeee8f7b4436f34ee53af6085f1622" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.51.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "26e88aff09fa336272c0be8d07b7a0be" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b89c49874a7a8902acbb2406c15264da" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c8374c0c41588e0689245af2cbf2da60" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.52.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "54083bdf12ddfaa77df1e62f9aa968cd" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "45e75cc29ae8e0aea3b8b26b08e2f0c7" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "246c712e1f3ff137358dbb04e411a994" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.52.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "1c6039d8a87f6828aaa3f337fd7c718f" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "5f18828ecd6d819b19f7056c8ef5626a" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "7eb6089d47e8e0483bcbfade8957603f" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.53.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "868cf484ee66de9ef25c55cd32a8d6a5" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2a4d4fb5654bc88431cf188e3eb3f20c" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "17f84aa5d152a2d33f633e9b69181b86" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.53.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "3ad19a75c68eff63908f3cccaaf64661" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "3df6ce5ec451a6e5bb20b3bacd131928" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "28f209082177c9cc722dc7730aaa558a" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.54.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "bcb10fdb9fdda9a1605731ca00cd0a7b" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ec09de7f33d7d3e48f4500f083f041c0" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b37ec873a0c3fb38d378d822f8cef3bc" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c2b0d026a2c3ae138c7b7f5025611708" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.55.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "cd76c5d78faa52ce4a2f03133762d70d" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "dfb9b575281764c84ee10fc19e550ad5" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.54.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.55.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "34b0030440d8d7ac2f51a7227cdc51f4" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "982ad2ab08ab4b5ae5f82605e6beb32c" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "bcb5ccae322885fc3671c93cfd5c565c" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "04f15af8da92a0cd643faee1c1ef5200" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "22fe62dcc7e91ab89c907a3565b4badb" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b279c8900d3dcc289f3018f2135ae184" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.56.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e5e5793f0ce397c0c723bf7f495e00c3" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9e979d92ec30db5d00660afc1c41e4d1" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c6d7f5dc6949dc9892486d2a4a19c260" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.56.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "301bedb3bf3c6076b22f70af7af6a88d" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "cbd155f17fe5ee5fee21466492e797b0" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d746f361de393bf7b2bddb3986820491" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.57.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0b2860309d0df36e092c71952a597cf4" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8763ce7c38aca082218cbe03f09be81a" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "61405657e89d712a39a0979725f94580" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0c00161f4a8a23f54eae977f22241ee0" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.58.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3e9a8c2eac1b47c6aac99ae9979cb2ce" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d1ba2915ef2a3561df9cc29ea2e22528" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.57.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.58.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "77068a102f523e351755f10546639d6e" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "dfed2bb6b7c74adf1cbad348a7478458" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "83a5ef0ffd442b1513b9b389c4e72b56" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "76325eef9428146b5dac0958265b103e" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7c14aa8333b7b7de9ff9c7f4990ca01f" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "adf9354b4e4080318216b91b5f23b78f" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.59.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "891d4441079463df0da197a8fee27014" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ba066f00a0d1197946ecd8f5017d710a" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "1a1eb9824c02189cb17c5d3ec0140cbe" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.59.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "a24b16a605c204efa0dffaa9b9065d83" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d3486b2bf90b3400f0d1cd112c6bcc63" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "20b9ac67d44a29c255bacdaa85c63d4d" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.60.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b0116c69f33b8b04132a242d61dc2bda" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "87b2cf301b10e60a53149467f638cf46" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.61.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "32e99960c91c90938234ed0a6bafc04c" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "dcdd86e6c720ace03b40a2dbcee05fcb" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.60.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.61.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "90bee1ca4402497984d054c4eb0a4a81" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6d1028badd991e87f1d854567b9c3544" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "62b404ddd4dec7e3468c7e0e39e338cb" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "61bc2f2d573c518e0df9a051008a0197" + }, + { + "dataPath": "params_shard_382.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0ff878404228014ddf51151942ff1776" + }, + { + "dataPath": "params_shard_383.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "4066405c7168401adebfa19981a79968" + }, + { + "dataPath": "params_shard_384.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c992720c03708c22b01323e89c1b8d31" + }, + { + "dataPath": "params_shard_385.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "614e4315704b3f7ccaf087fe1677cac8" + }, + { + "dataPath": "params_shard_386.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.62.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e1f52641dccd86972b88a8591cd7eef5" + }, + { + "dataPath": "params_shard_387.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3029644f9daa0601479ca43fbbe31e49" + }, + { + "dataPath": "params_shard_388.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "cf478133c7fe2de5ed8483d053ee85ae" + }, + { + "dataPath": "params_shard_389.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.62.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "c45e675f053b37324dcb7364ab3288a3" + }, + { + "dataPath": "params_shard_390.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "dd4be38696fb39389a93b538af56e003" + }, + { + "dataPath": "params_shard_391.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "83d1362632db77509069fe4cc194fed4" + }, + { + "dataPath": "params_shard_392.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.63.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e96740402d63995941da33bebf52d9a8" + }, + { + "dataPath": "params_shard_393.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1e6865adf9273750d4f5b83af6916a7e" + }, + { + "dataPath": "params_shard_394.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.64.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "38190de66b2434c6ae1c77fad5e3f41c" + }, + { + "dataPath": "params_shard_395.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6d5f98d657a7d0780d6b7b3f04040e86" + }, + { + "dataPath": "params_shard_396.bin", + "format": "raw-shard", + "nbytes": 29392896, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.63.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.64.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 29376512 + } + ], + "md5sum": "92795ec4e40dc28966f3af6ba1050437" + }, + { + "dataPath": "params_shard_397.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "72d99c207eff9c7866e947f10437b19d" + }, + { + "dataPath": "params_shard_398.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1f3c47a035a573bc491b1adf98543ef6" + }, + { + "dataPath": "params_shard_399.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ee77c503dc919297178f91144561eb26" + }, + { + "dataPath": "params_shard_400.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "730affed71c78ac24d75203bcae881ac" + }, + { + "dataPath": "params_shard_401.bin", + "format": "raw-shard", + "nbytes": 18907136, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.64.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 14696448 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + } + ], + "md5sum": "1ef55046055ecd0c2157df255e8b1654" + }, + { + "dataPath": "params_shard_402.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "da86ce433ce69a65c6a6f4c3e63278db" + }, + { + "dataPath": "params_shard_403.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "1265b9e767603fd22148ee38305e8500" + }, + { + "dataPath": "params_shard_404.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.65.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "640e3e7e9f8a1732974569b1102bc2f4" + }, + { + "dataPath": "params_shard_405.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d387a1a4ce0a005be553964dff6695c0" + }, + { + "dataPath": "params_shard_406.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "ca2b2f61501b7ec635ea91f0f8ecd6ef" + }, + { + "dataPath": "params_shard_407.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.65.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.65.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "435c83591aed21e40953eb84a32869eb" + }, + { + "dataPath": "params_shard_408.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c6f151f6303652af92359d21c314e31d" + }, + { + "dataPath": "params_shard_409.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "b6787b73f9846e9104af9a53a4ea9a1a" + }, + { + "dataPath": "params_shard_410.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.66.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "20e54f80b420df5f97f122f3ce05c2e5" + }, + { + "dataPath": "params_shard_411.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f740a394bf0ce04df4e3ebac93a37cc8" + }, + { + "dataPath": "params_shard_412.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e6dcf972e94a9bae21346dd0494bf9e2" + }, + { + "dataPath": "params_shard_413.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.66.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "c4df3647083a0434c23943f18935d991" + }, + { + "dataPath": "params_shard_414.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "078b8b6c56f9e636080cd1a7ff17c0cb" + }, + { + "dataPath": "params_shard_415.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6b21f5f72a1446c6cbc35b6acc2f5ec2" + }, + { + "dataPath": "params_shard_416.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.67.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "52a2dd195839ad7ebddf375ed6488092" + }, + { + "dataPath": "params_shard_417.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6468037a893886d4546983d84b193253" + }, + { + "dataPath": "params_shard_418.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0dab0cfc15f0721455db7abef0d4f8d9" + }, + { + "dataPath": "params_shard_419.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.67.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.67.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "9ebd485d7f2066c5674efb37c4cd960f" + }, + { + "dataPath": "params_shard_420.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d8e88c2b7bd0886abc49c86cd5a693aa" + }, + { + "dataPath": "params_shard_421.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "e5bdb8e0c0d86e00c5a398a3985d9d19" + }, + { + "dataPath": "params_shard_422.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.68.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "fdab17818eb36733dc193377ee14959d" + }, + { + "dataPath": "params_shard_423.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "342bf9715e6c015f1221495b017556a4" + }, + { + "dataPath": "params_shard_424.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "0491145da1b6016bbb792790a55a0eb9" + }, + { + "dataPath": "params_shard_425.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cb387678317fa6636ce9efade182fc4d" + }, + { + "dataPath": "params_shard_426.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.69.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8402246e5baa1b64774f8603f6162cfc" + }, + { + "dataPath": "params_shard_427.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e9b72b82c6b5838e5f1ad3629eee29e9" + }, + { + "dataPath": "params_shard_428.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.68.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.68.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.69.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "57278665bf3004b170620b14deb0eee3" + }, + { + "dataPath": "params_shard_429.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "27ced10b94f190d62031cd4adf6eb3cc" + }, + { + "dataPath": "params_shard_430.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "00dd54c33e7bf60ebc7ba02cf73f1ad9" + }, + { + "dataPath": "params_shard_431.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.69.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "4384d2c5bbefc929c821c759e094662b" + }, + { + "dataPath": "params_shard_432.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "fbdeee4eebcdd48a9479fe3465bb8217" + }, + { + "dataPath": "params_shard_433.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cf8872f74f4a6af5a5396f4f2220b959" + }, + { + "dataPath": "params_shard_434.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.70.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c3b491c85c42fa193c10158f96c9f594" + }, + { + "dataPath": "params_shard_435.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "54c8c9c9a23b933cee2c29cb2cb77af1" + }, + { + "dataPath": "params_shard_436.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "290f3795d597b85f6b1c70feb4d9361f" + }, + { + "dataPath": "params_shard_437.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.70.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.70.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "71db32570c1cd251ec95643aeb9f6f27" + }, + { + "dataPath": "params_shard_438.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "16af5a0f5b0d7d40790db62b82a95c31" + }, + { + "dataPath": "params_shard_439.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3196b58071b3d666dc8b3b99c8d4f8f2" + }, + { + "dataPath": "params_shard_440.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.71.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f2ca8769c5dc688bb63abe9018ee16f3" + }, + { + "dataPath": "params_shard_441.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e529aa15277b6f49a9d0ed705e9a49c1" + }, + { + "dataPath": "params_shard_442.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "371c7018abb4a7da38bef56c2c130b42" + }, + { + "dataPath": "params_shard_443.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a30ddc9187c2289237e137a53d1e69e6" + }, + { + "dataPath": "params_shard_444.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.72.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "934af5daa25e4ec2b6b466d1c5dbe374" + }, + { + "dataPath": "params_shard_445.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6d3cec1ea39c85bfbbea672e2870a0e9" + }, + { + "dataPath": "params_shard_446.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.71.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.72.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "12809b0f99ad2f661f7d8d19153a9ec9" + }, + { + "dataPath": "params_shard_447.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "f006489f9cece4396fb34c3577709d8e" + }, + { + "dataPath": "params_shard_448.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "3f541f457bf15f6bc5194878600023da" + }, + { + "dataPath": "params_shard_449.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.72.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "3710616c273fcf0ab9a43f5c3b9b1e57" + }, + { + "dataPath": "params_shard_450.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "ab84729be519fd2fc174868a22929798" + }, + { + "dataPath": "params_shard_451.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "3c59ded060417638e73ae830c7e92ad0" + }, + { + "dataPath": "params_shard_452.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.73.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f55584cc44d50fa9e7a12027439aec2c" + }, + { + "dataPath": "params_shard_453.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8d30f20c4cc9d25bc9b938f0930cdc9d" + }, + { + "dataPath": "params_shard_454.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "89eaca988615e44fb3766ef1441d1c31" + }, + { + "dataPath": "params_shard_455.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.73.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.73.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "1685aa67fb8b0fa067086f74ac4054fd" + }, + { + "dataPath": "params_shard_456.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c858168ba609f6b213719331d0303900" + }, + { + "dataPath": "params_shard_457.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "db049bfb0eec25fc0b9389f2d2e357d9" + }, + { + "dataPath": "params_shard_458.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.74.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "d5aa2aa2627c9c28bce25ea986c90175" + }, + { + "dataPath": "params_shard_459.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1b2b2ef5d5253ad9d5b89b13009af957" + }, + { + "dataPath": "params_shard_460.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.75.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7ebb0acfa73b68e349272c81c2fb9b85" + }, + { + "dataPath": "params_shard_461.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "39f124156c1d4bd1f80380a67739b5f7" + }, + { + "dataPath": "params_shard_462.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.74.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.75.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "00230d733d36e0de0647019d02d0ffa4" + }, + { + "dataPath": "params_shard_463.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2d3ebb388052f9e333def58cc9f1ad7c" + }, + { + "dataPath": "params_shard_464.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c477e4a0cfd9868485264704a27030d8" + }, + { + "dataPath": "params_shard_465.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "a567d277e73a29591d1515e8429e2eb6" + }, + { + "dataPath": "params_shard_466.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "1f835e3c82970feb8dcf19242ef7ee20" + }, + { + "dataPath": "params_shard_467.bin", + "format": "raw-shard", + "nbytes": 18923520, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.75.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 4210688 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18890752 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 18907136 + } + ], + "md5sum": "032b7b206b28c4e525a9281dcfdf2aef" + }, + { + "dataPath": "params_shard_468.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "25589f2421cf47b56da880cf14bd3505" + }, + { + "dataPath": "params_shard_469.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "c43495b819713f904ecf9b9e0a7f5ff8" + }, + { + "dataPath": "params_shard_470.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.76.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "98fb62884dad3835cecaff290286d5c8" + }, + { + "dataPath": "params_shard_471.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "77f9a1c6e6ff311d07c76f562729ccc7" + }, + { + "dataPath": "params_shard_472.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3584 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0bb2f00f763fb3b42ad31dae4637324c" + }, + { + "dataPath": "params_shard_473.bin", + "format": "raw-shard", + "nbytes": 24150016, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.76.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.76.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24133632 + } + ], + "md5sum": "f38e2f147b715e98bd2cc250ce2877c5" + }, + { + "dataPath": "params_shard_474.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_weight", + "shape": [ + 57344, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "69a7803ef8bb9ce44bb36e4ff4d12fbd" + }, + { + "dataPath": "params_shard_475.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_scale", + "shape": [ + 57344, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "03050760332e70e5d3f2dc5c6b39ccc9" + }, + { + "dataPath": "params_shard_476.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.77.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f7b63171bed5a7895cb452e478f6f038" + }, + { + "dataPath": "params_shard_477.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e50c8889e11a4a1352a6143627676bee" + }, + { + "dataPath": "params_shard_478.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.78.self_attn.qkv_proj.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3d0c265fbb0f80e6c0b95dc7ff8cd179" + }, + { + "dataPath": "params_shard_479.bin", + "format": "raw-shard", + "nbytes": 29376512, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_scale", + "shape": [ + 8192, + 896 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14680064, + "byteOffset": 0 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 14680064 + }, + { + "name": "model.layers.77.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 14696448 + }, + { + "name": "model.layers.77.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 19939328 + }, + { + "name": "model.layers.78.self_attn.qkv_proj.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24133632 + } + ], + "md5sum": "2d203dda4ee0d5e60cffaf0fc9cd3fd3" + } + ] +} \ No newline at end of file