diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,8545 @@ +{ + "metadata": { + "ParamSize": 563, + "ParamBytes": 145412407296.0, + "BitsPerParam": 14.781631589720977 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 2491416576, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 152064, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2491416576, + "byteOffset": 0 + } + ], + "md5sum": "14d764c6880f37a4807f92ea485f1a0e" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "441b344007cfae1cc7a32ea0a79dc419" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7b7a89e70c024ea2e26f64a5785d1cfa" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 2491416576, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 152064, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2491416576, + "byteOffset": 0 + } + ], + "md5sum": "aefbe69dc0f57bd8324e1c5ca3f43073" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "40721ea9c9a79bb0090c2a6b5b66cbbc" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b256902d4296c6c0d56dafa2f4adf635" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "487b5576f42b97ae884b7b746a2d001d" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "f1f1f2889f0b615f9dfa87b771a31be7" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "bf4f78c5a6e9cba9a331dfc566538229" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "db47751125f5aa7ca9e888c96b977db3" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "1a275bfee89f3041a2c1f312d0902d1e" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "7d9033afe8a6ff6238c8deee063d934a" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "d7e87e75e89fad2a209526d8d940ef7e" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "d3e92a3a6578bc1c7ca3778ca4b962a7" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "799989f1cbb1c57163c4ff3cee2cd01d" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "7193dadbeca208e6f11a06c150f40f97" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "7a5c7593e8a881e3eb33c3c5c660abbb" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "68f75010c93376ce5b1a71144282031a" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "f2d8cd64aa358477b0f5d0792eedc0d8" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c163c49afb03f314e7794bdc14893ac3" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "a32841d5d965a60d150bc8a6020ba128" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "0ae3db3dbc1ed68c8d7766d33e981144" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "65fec1ceaf1030773b8b4e4bd35a7974" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "18e907de8da94a592f736363bd20aa10" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "57cd05f0f7c10ffb52a6f11144c07152" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c3b0f0a6d04e9f761214a5f8240ddb84" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c5e36035ce58323f5c464ff1200fa90e" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "64bdaccb09a42a0e5309e1040e51411d" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c670620d4eab25da823f4a922d064960" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "cb9c06105bf63e1a635cc143c764bda8" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "9ca545c992c80794b507c809a6331a78" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "a8184ce1581a6bb02ceee984b670f753" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "db97ec76e22579356c650ed09cc5cb3e" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "e4201b82e1a12a26909f7fab4be8e8fa" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "e61acc3174ea23178160dc8d6ee567a0" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "eb5ce4cac98ac90ca3375bdc900328f4" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "5d478b689190a83fa95f2484a6a59023" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "8b4f894e6576e8aec4ccf240a6c01eec" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "fd72205d816d25f9214de923861dde09" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "8bd218519edcfa6abc16150969dd5cfd" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "ff93068515d6e3772bf35fb9d12d0ce1" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "1a83d58d28c49ea4a54160eed491d904" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "9e528024ebf20fb96ff09f0d8e9f79aa" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "132e5aebabfa6cbfce207f2170024aab" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "9e4279bf0cc6b180be6b3a50eaf0e5b4" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "4d47ce0359e63e46c8ca2b800ddd2c2c" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "025372a3fc64084224e78b8971023db1" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "0169b4981c09f7a1c5876ee9f7cfc16d" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "194cc23b83b3d2004a47df8b86079021" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "cba6b11968ceec995a9700300dde2dd9" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b62ef0bd01a348702c8068fb56a30c66" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "b92bdefa87f00b17756bb705a45c08fd" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "3bb82c743e4006de80a61134b412b67e" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "b44056b808af9a25a1e5a0552e4da49b" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "89c4ddf8765e63a81f3dfc8b0378a19a" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "ab38f7131fcba7c71a1113dd2929c569" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "3b562c82bee44ad16decd208ac54ea06" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "3a2bd6b879ea26f4dd8806287dfe287d" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "079f7c8a85f1598a1e8c4c07e88bcb2f" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "a1afc8cf6ff6b6527d5b53ca6f867e9c" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "15c280051676aa26b6db21d524e1e7b1" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "ea2fca425eef7a596504554f6a4334aa" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "bc695080349fafcc716831da6712fd8c" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "1349255ba0c0b984958ee6d6dbade040" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "9e4262c74e2bf2b83a4bd1e468ed98fd" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "98bb1efa08bd9a94c3a85f7409b1a186" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "75498c2a02c48a78d95b254e9ca2fd71" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "cd055c96c6b4693834dbaec5eab1bfdf" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "652ac8767019ca77c3e546c163fe166e" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "f3adb3762d53dc38dc37a21c6ceb7252" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "53aed4d0d7005b6d0215f12905c32c56" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "bfa9bb343c1eedf20ff96d2b686fe68e" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "921f525b50c896efff58ca8012685243" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "fc481bd6da15a89c0411c8c032567516" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "6ec050ee239fd2731fb1646fe5beff05" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "8063c2b53c18d8ffffa7f3c5d8601905" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "0dd95fdb5599781023809e564f3ba484" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a5cfb860d13b151899700469a034e96f" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b040aaddcb8aadbc51c39316c4d488b1" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "99372d83a936cd2c7bc92e7f621cb870" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "f454fc23f1ddd6d1523082fa0dd3481b" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "b36d03367395d7804f1384a714ae1a54" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "25a6ea55f094c2b79deda77f0bd3c593" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "8f6fcfca11fef010432be08bbf4cec21" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "f8db7877f937927dccb41ec63268347e" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a2852e725eadf7d149f91db349f2f8c4" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "c05d9c3b753347fac5c0d429ef8edb34" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "e5111f2d04cd28ca1e4af1715d27bb14" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "d12f5f89621a2a8887d6fe942714d8ac" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "96b6fa68a0a4f66b1459564aa3bf9107" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a1dfd460cd65ab606b27fcfbbe0f1634" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "ca352a9951fdce8c5980609db3f96748" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "02ee30d1164ed2af1dd489a7838abfbc" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "b33213fcffa050a78c7acf4d6a46616d" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "acd58b41a335f547d77f05b4c3f5b757" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b5bea61014f64c65c61b9af230a8eb7c" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "0b119b2da004d97e10fbf0910ed10667" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "1f8ce9c48ece8b6d2cd859adaf88e966" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "008beb17c4c3537796064397bc6f1659" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "6350ed5778c75d61394a757dc91d89b7" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c109678fa93f166130778f3d6a9af0ad" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "5f8e8ddc69af67fde2da19255e235372" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "045fbedec95d362437c86f5f3b485972" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "509b29143ccc80e43b7035403489fad0" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "c30eee056a617f00564add6a6a1eac34" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "142daa467f0c8a1e742da827a42e3586" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "f5b73c43d5f0ac4e9c58d4dea18bb3bc" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "3e74e542ba76bb0e9e6e9d44ae1de074" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7f614ee9b93e54ddebab81490cb8d037" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "e4b2c15c8e24ad4d489922e8377cdaa7" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "8a510bd5c2ec37b3cadb8bd7d9e3b9aa" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "6409d1380d8a5ffcdf1e693d60917b4d" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "e1b84c5c42af4bb9e799f932a2707027" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "cebc1c6ab31ed62dcf2c4eab343ee800" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "5bdc51f19454d40b937eb6c2a1dc2a40" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "9e6ed9f2f638f1c99ec6414761786959" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2f8723c896c8d95708c21b2df005143c" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c57abd3d616bdb18f2c5165bfd45b839" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "eb601188732a9ea8e312a27ba72c299b" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "92c5e3f6d9208b681bf0603105601927" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "785cca45a5b645bd7d64cbfd5a9b50bd" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "57c92b2557b110fdb04947885018d346" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2020499cf53369c502dfc406384f445c" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "60ea24e60fa04ee175b9958ebb0fe1e0" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "8c6a540473f148700fdb02799ecbce30" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "72decbd3a8fd416fe2a7a0ef2dc8625d" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "7a4caf8a96fa94a35bc59bf595718bae" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c54e8c9c04d8a9bb335bfbc7f66214f7" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "1ccc07672e5766ec868f44613a0c21e6" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "9aa69ea005434b087cd96d9254a5f1da" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "96e0582f58b09ef58573230964d037ba" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "94f71eba8914fb3667bceec5ad8638ed" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "cd7c2b7606db103593ddb6353bc083f0" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "474b39ea880fd31e0125743a890db2bd" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "b6fdb53fd852536d22f0bd76d0e6dbf3" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c124c1933e5e9ee8657a02fe855daa21" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "5402ee372bdc7d92273d4bf78c201fe4" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "22ea62a2fb547263322bfdb67f9b26d7" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "9f4bc08b80c89c6aec1abb8ab80b7531" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "0c6e23331edb94cc1303b1fb9e1d66b5" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "01be5ed85d204b36c6e050ddf89a4e2f" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "c1c5184ea85e8e5ee39d2dbc7df7d4e0" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "486d29a91dbcc66e1ae62de10d8c5edc" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "6386849e20e1841eed2f7a0246a0cc61" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "157571065a1cce9516c99b9b246afc44" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "a70ed87d945e277a46a605a7900a232c" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "28f0232651334302a6743e6f785ae2be" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "d1d9a281d98ef76aeaa3f538f8f13e64" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "15b0a0615b66fc294ebb824c7410474e" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "a0e8e3b464a7fac3d3fbbb2a2dd40ab7" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "9e2797019107e5117fa92161d1449f5f" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "c2708ea86553bc47a457cb2a6d9e2eb5" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "279c3573fe4bac04ff53a3b12418d4d6" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "30bd5514d4c950be3fa7080c330bd09a" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "97047d94415c2aedca46f82db0b6ffe3" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "8a9ccf558348009a94b1855887b41436" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7b73e0b6791ad05dfcd6cd22c1c63952" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "c84a0abf708eda36c8d3450e91eb68ea" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "6ecf266855ad22708713ae8c9f7e6b08" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c415bf6608d116bf14925e86cc2abbd6" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "11eb93e1055e7acdc24b8e27ae1e6b4b" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "de1c354c59452fe09c4319e83914837b" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "61040cb8aab7ad1b402100a1c3e465fc" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "977e88dc7c790c13b7483c7c869fc459" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "0b3761ecd918f341a7a686e8a9925d1a" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "7139026bc4fa834fc4c9ebf81f3c54d0" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "d150186dc490cdf0c3c24824a45a8ec0" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "e46a1927c3cc6773df178737d4d6a33d" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "282ee3bc94c03a78895cb703719b0523" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "83eda8496e49a3675388060df1c5db60" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c86ae162ed3e077e5b64c6d41af5719c" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "b9a3c4463bb23e29ab83450c886a58d4" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "849738915fb1608f121e8d3009f680c9" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "58b01846b9912eed18cf19d8901dca06" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "ef34456a5e8077b7daffd5c28ae5fde3" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "35d1a75564274fd8ae95c585a8f6f1ba" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "e5a6e065d5e0c243dad00c5a0de71785" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "2d87b238e00cf823bf584da1583b9e5d" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "4357148993d4bcc6978346cf0beb16a6" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "040cd0dfb7198f114e88c34a8cc23651" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "9bbd91d4f7f66f5c0feadd0a514b42bd" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "e4169e3196c13ee62337e6ffb87aa6b1" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "795f973cf306fa4b5d2ac0ec7d395646" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "e4ce0e42c591195a61b5ab793a1ab1ed" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "91288bc7cebccdd3937af7bed6e32780" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "d2a2b3f5f3c60b815c3ddf0376121fb3" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "bb1efaf9ed29d112ac43a8eb30de0dc5" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "0c6296a65f99bc496c0c99f6a78cfa14" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "630e34878ff9dff0c6c7b48eb7c634be" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "5c84cb57b025bdbadd1347ba6fa786d5" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "4aa8fbbef46ca7947c01c1bb4559e9ba" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "040c8eeaed159adb3181d9a7990b7646" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "73a29504631477ee0b8d6a95e8640f2c" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "75674601b2f7c75e30d77460bb57a8dd" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "558e85480c7405d67591303e26c58cf9" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "0f55225640be2a94bba67f16288fb184" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "ca3be6480cfef23ffacdce9faca7bb2b" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "a46106737e7c80aae932309241a976bf" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "fdd0d7cef5b653ca1fb0bc3982d22faa" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "012b0c72ce7d4030ed47ca7b85abcb51" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "a5beee559eb15e02599fca77be94ccb3" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "bafd668c35ef41c22d2903bd75b9fe64" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "12597232482b67c1381d24d9c2d4fe84" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "78d498632b89826f9c6f7d9e583eb3f4" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "e72bc6fd763bad0ad7b1b7c2d3964147" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "5dd6ba10116117890be0b220c5825138" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "61939eea2febd742a18842e4c27688ea" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "2764fe72be7e02ca952c3a6f79568134" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "629368a52d33f4331e69d046f31ac568" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "40637132f17d5b60d3d6b95f5ff4f97b" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "e7ebded1b80a50d74023cf6d8e262b4e" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "e098f2cb89bfa07e015ba6ba63ea3e97" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "bea0a1ba7d49dec859b9e36860c07e84" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "3a7d3eb6990b2142dc61187fd9f96ab0" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "96ff2a8b3e550dda18fb82b422e6aa21" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "51d0140800e71be2d35ff6589028016c" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "23c4278bfe70e801996e42b589c7479e" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a7985ff4dabe165f35d20a49b9ffb1a4" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "8370e0e95e5051501c8145300f2cc045" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "40f3a44befeb28623daf696e05ef849b" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "3f5b2d138c86ca7ba5812573c547859f" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "1093ad29b0d016a23f0dca073492d906" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "a0553d95161571af7a4fa858d07e86de" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "69eda2d395025b29395e9146d5910bad" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "f422b63d213b7b3917afc3009d0ceb3e" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "45640c943876aa0a62c90c92b5e3611d" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "8029f94384c2cc108a8120c753339535" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "13df791227553e0f4c7e4a09dd591170" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "ce8700312286e1db2758aba0f069ff87" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "d9b31e383af659953619baa6f623920e" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "ad1e9605909da056842007624baf60f4" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "aaf09649cabe44126b674c13dfdb81a2" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "0f577151ef008177df8defbb898d51e9" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "b916888e1eb4aaf8fc81fb8ff6ae8842" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "35c9e0e270299f02dd539147c00a1213" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "e124f0a901b7ea882df2950c8f3c09eb" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "47e2cbc05ad6ef462627cdb9cf415441" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "041ffc519cbe5e212a53237cfb94e27b" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "5d8e5b0fc546ac52c83fc8c479dcfbba" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "0f6a1c0b2dac63d598fe2ae998cfd1a4" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "79356200fb92e1fa5c1d10fdb603cabd" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "fe886dc2743c38c54d9c171c68aae813" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "9d133b0f8c5fb6e2d3d609a6b087a330" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "67032fd0d1b0bcf51fb57947c837f672" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "ac5ed35423565f631323cc2ad54cf9f0" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "2befb410c0ebaff86885809150e756f5" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "92c6be51dbe25dbccc72d9f1a2d17cb3" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "e5eed548e9a1ab136f2895179b5fc4dd" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "c3979bc159198f9d88267153b19be3ad" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "8bfd01cb30e0cda175751f3636475816" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "67794bf1887ec0c421d6ab2ddcee07c1" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "8b60f4aac7d2fb9be0c0187b4503629e" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "66749ba879e1d4b6c773f1863fd6981b" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "31720a5203beef093c5f809c0f75cce7" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.64.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "ad32487b33ee37791c5b57aa551173ae" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "00d66f0ca8f64c8a134e79a9bf437579" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "d32cc3516fc7b5722fdf97ff09101f62" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "436d41a7b46dd50ac8852f9e2c810495" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.65.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "cfa538217114031e9e0bbfa55118794c" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "0f748fa82303296fb44fd701b8e13bfa" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "cc3de7f22c7c67d1aff58288b6466bd1" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "d6adb1e0267946052a0419c296afd16e" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.66.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "5ec377eb8d9ef269a3760a0e9554390d" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "fda50be1b13c28bbafade02832f0c39b" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "a233d6ed653cbd8e6f1f006c0b9d2ca6" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "3c3f2a11d829bcb0cfc1b404254f6d41" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.67.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "11a5b211b7783d43f1da68651025e865" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "175b81fe6e5bf2b3a4028679f20983fc" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.68.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "5897015191d6b8a25030008f37f8a5ae" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2211b4a219ffbac8736eea25a1feb9b4" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "145052cc917add033ea0326c7a6573cb" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "35da03e83e25862bebe87def06dae5ca" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "c122ea194fb79fad0b081548a0ca2748" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "8e37c567a8f4ad62570aa9639a26185a" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.69.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "4854f115fbf4a41fdfae599f836413fc" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "2cbf61213347aad46aa29d9bec1d7ad8" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "4c545740f6d988b91e221fae3d4fca30" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.70.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "678df97bda283b8980d849d4b720c14d" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "45e5a7b2a7a9d50c858adb3ab778adec" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "d0968f671d3989bf89dc76724d512b7c" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "420800e88f07272533604317cf8ede24" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "e6b21c1881387e0d5e360dd59c07c2ac" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "0ebe7e26bfef6ada42dea79aef4547e9" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "56c76258d8b84d77cf91c9591e4b4e65" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "504e229f44f2f6153735f5ea8d2cc882" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "f58c8a3f112678c000139e730c15067c" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "993df2f5fc72ffeccd3ea3e5ef9b71dc" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "effb6fccddfcb6e87d601a1024de0167" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "1ffea0abd085e3442fb39e16d0ae3f61" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "d8c37ba1020bbbdc07392e57ed3c0dcb" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.71.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "e5aca5d62a9b0b950d547d412aed573b" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "357a4b2a0d21fe05b4b77f5f3b7fe749" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "a3d9b956a240e29e5c2b86911c403cd7" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.72.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "faa4f6e0604bb55cc7970b4969bd68bc" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "8d558fb0ed587151b01da686cb9d9609" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "2bf2f09107316a62934b350247b226eb" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "2894e75e0b4c2176c2f12b825199b4e3" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7bc9d667345f8466c94d6b0c4dacad0e" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.73.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "a798b5ec49d5450fd59de46c4865e80e" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "9ed0a40534bfe95c7df6f51743235011" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "71cfa989d52e360ad9eb9f542a581d12" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7bd52bdd0be3782d3bc9f61c13aa8af5" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.74.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "5718e80aecf28aff0dccee32fd524c89" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "74259eb64475640c76a225325f709ac2" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "47f597260cb8cf87938609d29e50f33a" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "d4052ce4027b6cc855da1cf26194822a" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.75.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "8a174e0ddc50be397cf2dd7920274377" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "bab1e89a756821531516823465a8e000" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "f025bbcfd419c136691e6c48dbbee363" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "6f3f5a5e78d636d7c0041ad66dc74323" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.76.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "b32bff96c87bc06494faade95c84ca46" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "b65fe0476e21f058832bc705e3da83c8" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.77.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "0809a8ad01771e7dbb4da3f1aca5f962" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "f858ac75e76a1ccf7886cf637e67e821" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "e4d493e762ce1797daa3eccef5e8da26" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "8d48cfb5e9e25ae7f3e8fd92c5ffa954" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 484442112, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.weight", + "shape": [ + 8192, + 29568 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 484442112, + "byteOffset": 0 + } + ], + "md5sum": "ce474355f278fecb4dace8910ccfe600" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 968884224, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.weight", + "shape": [ + 59136, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 968884224, + "byteOffset": 0 + } + ], + "md5sum": "7eb2d8ce567f538341d00f3dcae34000" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.78.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "d15e319b93808e2212cdddbf58ebdeff" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "96a450e8aba8aab584d5af51ecc54a71" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 167772160, + "records": [ + { + "name": "model.layers.79.self_attn.c_attn.weight", + "shape": [ + 10240, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 167772160, + "byteOffset": 0 + } + ], + "md5sum": "e3e74b670f9e4b2ba8cb4a67d91630f3" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 134217728, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.weight", + "shape": [ + 8192, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 134217728, + "byteOffset": 0 + } + ], + "md5sum": "9befbb1fd452790768d92b7c305caec7" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 4276224, + "records": [ + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 16384 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 32768 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 49152 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 69632 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 86016 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 102400 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 122880 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 139264 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 155648 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 176128 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 192512 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 208896 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 225280 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 245760 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 262144 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 278528 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 299008 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 315392 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 331776 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 352256 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 368640 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 385024 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 405504 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 421888 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 438272 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 458752 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 475136 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 491520 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 512000 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 528384 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 544768 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 565248 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 581632 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 598016 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 618496 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 634880 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 651264 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 671744 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 688128 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 704512 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 724992 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 741376 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 757760 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 778240 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 794624 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 811008 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 831488 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 847872 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 864256 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 884736 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 901120 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 917504 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 937984 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 954368 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 970752 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 991232 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1007616 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1024000 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1044480 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1060864 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1077248 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1097728 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1114112 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1130496 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1150976 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1167360 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1183744 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1204224 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1220608 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1236992 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1257472 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1273856 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1290240 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1310720 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1327104 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1343488 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1363968 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1380352 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1396736 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1417216 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1433600 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1454080 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1470464 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1486848 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1507328 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1523712 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1540096 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1560576 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1576960 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1593344 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1613824 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1630208 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1646592 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1667072 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1683456 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1699840 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1720320 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1736704 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1753088 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1773568 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1789952 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1806336 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1826816 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1843200 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1859584 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1880064 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1896448 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1912832 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1933312 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1949696 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 1966080 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 1986560 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2002944 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2019328 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2039808 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2056192 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2072576 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2093056 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2109440 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2125824 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2146304 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2162688 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2179072 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2199552 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2215936 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2232320 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2252800 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2269184 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2285568 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2306048 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2322432 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2338816 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2359296 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2375680 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2392064 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2412544 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2428928 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2445312 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2465792 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2482176 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2498560 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2519040 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2535424 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2551808 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2572288 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2588672 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2605056 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2625536 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2641920 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2658304 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2678784 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2695168 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2711552 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2732032 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2748416 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2764800 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2785280 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2801664 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2818048 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2838528 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2854912 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2871296 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2891776 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2908160 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2924544 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2945024 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2961408 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 2977792 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 2998272 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3014656 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3031040 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3051520 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3067904 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3084288 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3104768 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3121152 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3137536 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3158016 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3174400 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3190784 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3211264 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3227648 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3244032 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3264512 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3280896 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3297280 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3317760 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3334144 + }, + { + "name": "model.layers.64.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3350528 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3371008 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3387392 + }, + { + "name": "model.layers.65.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3403776 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3424256 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3440640 + }, + { + "name": "model.layers.66.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3457024 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3477504 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3493888 + }, + { + "name": "model.layers.67.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3510272 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3530752 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3547136 + }, + { + "name": "model.layers.68.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3563520 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3584000 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3600384 + }, + { + "name": "model.layers.69.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3616768 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3637248 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3653632 + }, + { + "name": "model.layers.70.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3670016 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3690496 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3706880 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3723264 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3743744 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3760128 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3776512 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3796992 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3813376 + }, + { + "name": "model.layers.71.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3829760 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3850240 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3866624 + }, + { + "name": "model.layers.72.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3883008 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3903488 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3919872 + }, + { + "name": "model.layers.73.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3936256 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3956736 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 3973120 + }, + { + "name": "model.layers.74.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 3989504 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4009984 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4026368 + }, + { + "name": "model.layers.75.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4042752 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4063232 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4079616 + }, + { + "name": "model.layers.76.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4096000 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4116480 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4132864 + }, + { + "name": "model.layers.77.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4149248 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4169728 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4186112 + }, + { + "name": "model.layers.78.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4202496 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4222976 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4239360 + }, + { + "name": "model.layers.79.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4255744 + } + ], + "md5sum": "0f7202328665846034cab834f1c75d8b" + } + ] +} \ No newline at end of file