diff --git "a/tensor-cache.json" "b/tensor-cache.json" new file mode 100644--- /dev/null +++ "b/tensor-cache.json" @@ -0,0 +1,7489 @@ +{ + "metadata": { + "ParamSize": 515, + "ParamBytes": 65524246528.0, + "BitsPerParam": 11.789048988991775 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 1555824640, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 151936, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1555824640, + "byteOffset": 0 + } + ], + "md5sum": "3d5bb74cac95af1c2801cb727bd8b00e" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "7c0d033b7869b4fd3800e42ac8414e7f" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "b080d39d5fbf502e476b5a07c3716f2b" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "a4a56438b3c20a2c046b875b7039cade" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "19b86d9ddb04b97ab9351529b04ee19d" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "6bebdcecc06af82529148708daeb5420" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "002bda043373c2ba4c5a859d2c30a5e1" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 1555824640, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 151936, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1555824640, + "byteOffset": 0 + } + ], + "md5sum": "7a86dea18cd4af0d9c4fb7de2e9fe931" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "36aac9da67ad1fbe3d63f4e48b18b8bf" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "7ace92cda5bdd70734a36ca10d09c074" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "9fe9b8a154d292fc5d0aea5d2d96261c" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "b249208b00f0a046a04ee78b4a27d887" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "6e5aa5d9652c0a1765ea4aea04dbe5b2" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "e5c36433e9b9acaecd1ed6dd39ea808f" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "d60ea5d1870cab509bd4744ee2783049" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "f4980406ec17dd39abafeaa54a404222" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "d790e02c517a5d1c8238a3ce30cce093" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "a088b8268675a49be25da2b5cf28cc26" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "06d095f6315c704e86b94358d09489d8" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "6e98853f1e471398630a77e5e9b9bc79" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "8ae3f5aa5834127f6552e34fda54258e" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "c08d719052fe517862b03f835e671c6a" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "9295a82fe0bb000003d030b5f2766d0e" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "f780870148dd0c55ee20b7453f79b257" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "afde3d84a27e04540e48ea2f45a15ddc" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "1eda71f3d668b9e462da8c106e6e8ee9" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "ea2597b9bb3a50d48f947f55d1aebd19" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "2f9f1a80a263cb577e9e60a14ff86926" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "bcb1e5802cc0057a01c81daaab9fe54a" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "0a1ef334dd5dbebca413f1eb38bc0aca" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "03ec36462b435d59cdcefd557847823f" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "3d7ca27969331d2db361e5352df5f5af" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "8e4435afb0b3b7babbed289defb33f5b" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "3385bf32b42711ccf2cf0de03238b23c" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "82b331c61c2d7dd64455cfde58be858d" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "a39c9d0990719aa9466ae1f32a584a44" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "09238ecabe6dbd78b56b36a12f7ee3bf" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "07b2582e9b8b88a57865dfbfd52e72eb" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "c8a07fe7fd9b0e1d00bca18966d17fcd" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "630a04da31b5c9e138be8fac8f9fc433" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "8058e6ad5387bd7c482773548d11c68e" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "115e5dad8c40484d19ce085753014321" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "0d4b4897db976a7b3de1bddc68ab7aae" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "42c3c01f74c11980f48f263ef64c9dd0" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "b10ad9257cf8748d161ca508474ab06d" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "34897218922d29b7836f71521780e5fc" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "2dd6efd882a305041c4dbd861989e760" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "eb5d641a8b74473d15a8639defafab1f" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "fb6f8ca031d769005dddda4a2a1aa8ec" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "382c8273881d170c4e20e05286b68902" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "7d6f6bae0f660d63bf2ae8550c7996fa" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "99356840a7619df7e9b2abe399969c0e" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "64a457b9eff2258fe62dcd94038c1eee" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "75f4950520c54091962b6ca9c14f1384" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "b9f0f2e2e68dea49de9f91dac7dac506" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "a306bead50c70ac28fd2713fa9419f0c" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "9812201e92f7859e01c647a8048e2a59" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "1af4ac29a71117d8a0c405cb93141b90" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "87f01b291ca424dc4425db0f631baa38" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "c01c94d4786f6a6d289b78a24922d82b" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "426e73563c8187e9d546cb797fd62f0a" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "752043de60b45c66f4272f0a0efe3592" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "7966dccdee681dfdd70627169f07d670" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "cb1690d4a2e29fe34801d5b83049e719" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "60769a41ee345f5c27c6c7212d0b78ae" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "aeb0cbe91474ceec26b305dbef856fbd" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "3d44bf86d954deeb1a0fe08c36f7846d" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "eb668397d02346d23417a47f4c6e5c23" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "d4139a3c71a6b8988b30797b17fa769f" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "a6d4cd630cdb69b6a40d125fd24ed5a8" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "10d2ff6f5eeac8c20adfab29bdfcfe24" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "9b01d19b298d9e673aa14b7104476026" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "0062e44a314385ef6b0473e1b7ccd20b" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "7e0511ac634faa50430e502db1b4e45f" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "729f2bbfbbadc5817d22fc264a89a23b" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "7db8c0d5cc3d09fd1d2fae0062026b24" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "1ff660beb10dd9842cf112a4151f2940" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "a45fa859fa3af727a13419bf9671c05f" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "7ce72a3d457950908a4adf099ce2b381" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "d9a52b75741783bf7053f5a7952a4bd7" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "fadfe0c2778657599c86e51ad3b1ffa2" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "65f032d4dffec8f3975bc47cfedcb646" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "927b65678cf3524a46ec89c94cabff2d" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "3ee65f67fcdf5d671cef7068bad87416" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "616ec37ee748bf8baaa86ff84e69f4c1" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "fb809c2cfb93fc173576da164612832c" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "9f5e03368a1a87083e3a0245230eb1cd" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "8d348482db98bf276e171178a74122c0" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "21452afe5b496d08e0ad81c83167c18f" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "b6379c7167d66214aa925ff47c782b18" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "78803a785f8995f8f89892e66dd113e6" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "8639b5636a0f6fa34293103a8b4156b8" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "abd923a9c5b44cd1e0f84ca988b34b8e" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "e5c0c11b52832d6f9ed05353db1bea1e" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "1ba5723642501ac97a31ee3275be18b6" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "c81a311f5b3996227910a4c9edaeafd4" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "f28684b207b9ffc8177e0c22eb611469" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "3124e0f4664a604caa7018850bba18d5" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "24e819cca8e9e60d6a5f9a89c912fdf9" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "4e8d28ffbd30db0b68cd15232bde2e28" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "676b9bf0643ad97f3a3622f5db4635d5" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "16cff334d4ec950e40978bc2a8b5b18e" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "9b770228e9a792e309ffcf35a0fd2c19" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "dd811542ea2b5831ab371cea83f6bf3b" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "77f9613972fa70a34d3c1afdeb3f3742" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "2ed383f1ae82b5dbde06a0cc5bb5e848" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "97bc0ea6425a49b1078495d9b9bb519a" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "bb3149727af804b5f0fab3aece8ced0f" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "025dbeb3d95804712b416f16dd71fcfa" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "2293cf88265bc512d5c972858759f830" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "bcffc384a4d0a27b545d7132df2676d9" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "f8238819664fac41c771ee76323572f8" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "03b0deb3fed4365c71893a251ace85eb" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "9ee8632c899374cc36f7c533bd5dd84e" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "8ff5eec85b5be49fad3595cab51fd0c1" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "b1dac71020c3f246cbde5ea2f6112b72" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "c1dbeaf586463c0982329bd7c8d23cf9" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "67cdb5606b53029e84a126af38b29e2c" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "1ea8cf768948173f2553530edac2e8e5" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "3ecc2abee0040974642744db330c3e81" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "9172f82576debf98c98671e40689cb2f" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "f50d1274f634dfe80f3e04fab974d4b5" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "860dca220827db5f2f1bb71bff25062e" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "a77ed18ce2d4e193c98994ea3a716b6c" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "7d17d166f0d2c8320e854996c4c965e3" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "0f1f1d90ae646d4f88a25a024ffc0047" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "06dac4b1e211abd5c36ffdfd496b4930" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "903fdeb30cc8d8b5230ebdd38788d38c" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "07132a0146b9058e82e9772e180d1465" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "3000ef53483d2dd185ef5f8065aa9faa" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "0451dcc810fbb9529c7e8cf08adb3d1d" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "cc2942d5e888325bb34a04a68159205c" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "253aa91b707e3cf4fdb5579addca29be" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "dc4687ced64088cd9324c3a605bfbc4c" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "87b0f3b371294e611bbea8af86946e12" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "36d352dc86cd99b8763f180f5fcc8de6" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "2759e4c4e2270cc89e2143817e3b44ae" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "66aad82108a576de158d46c4aac92f3a" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "1d72fc7b2a38a5fad2a58e78c50bd857" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "3d34f3c09052b2a9286f70e9ebc4a1c2" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "f77a05a3ef4f33ac53586481a21c1bce" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "35fc9cc56ad36028de3391fd47c205b3" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "6c42204d14da099ed5e38e1ab06e78a8" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "72d348ed1989ea72912c264afff038ba" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "0b9c79406108990612a6ce9fd80ec860" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "a407552f53c24ab0723784fc41ff0899" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "87ece3075c189b77a0c1808853f96d56" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "b2006d6ee25b3547f6e8fa4a7000c75e" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "34a31c9189e998358a3b7b78142da93a" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "96cafccac89ca2b2e186170b6be9b9de" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "7118228cc3bbe5b5f37edddb82b9ebbb" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "493cb237803989a6feba6b4f5051ac1b" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "41f71bf94708a57dba2a5efce5d7409d" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "ab69b811ac9377b92d6639deeaed6918" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "29709ada5940f2e83e9e5286a6004564" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "dbfc341f6b048d45351e5d7818e403c3" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "02b66faed984e9f93c09b64b509fb5b7" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "7fd6e516b659aa19ebc13964e126c2e0" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "d89fc37f511c99c1a9b7a60bf8af5b64" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "75e9502edab0ba791a866395ebd9b26b" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "84959d091ac6c4bee48d77d0b4d4a090" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "9ffbcc9f6a0f8de40cc4a5f759d5441e" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "1bdc2754b56708e53a04cac20ceb9301" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "40e47b3ad51ea2bdb8c6d1fba64cd355" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "16e4d35367f8f03fa95f349e8751165e" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "a7a085f0e329f8fb08a8e82ccf543602" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "251c8373cd83c748ea02b75f900d9e0b" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "6f42198b515050f37136958b66bf64a8" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "74aeb1fbe136b583fa76be09865741b8" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "9a0b9d3c27598fd6820afc8efc649fb0" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "3a73d884767d57dd1b54b105d8bc98e4" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "7f63e07da5e60a93e2989782a62fe338" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "fc3259d445d947798a50aa274d0c6249" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "b92f5f3c3aad1a6f1112f5131113f5ed" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "1105cd8e2b5931c3e72fb16a8ecbb0e6" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "1686a130ca6c23a57658331a6359274d" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "4b1f0b05beb18f56e42d510a54724dac" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "7af12f9ed87c85a2dc26f3cc2bf2b36b" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "ee8d8685f3aaa83a5af9c2ecef9b0f64" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "c08d65bc8fe945c8cb326890e4c8a9db" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "15b902c2ce511a9b81a714c1f5f5770c" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "ea1dc894f6abb78fc7add6c031089834" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "105a167ef20613dd587d9396675f4c64" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "7b537a15257ac4213a2a12f6e100da28" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "d98fb9839cd88151a7f8e9897689007a" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "e3f8d9585ac28968967ec535dbf3e460" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "f367a545db81f366174837e9d1b86052" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "403b2bcf6066f79c10c9deafdb9085a8" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "eeca4e2acd87ea0845bc149fb8894ad3" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "e7eb7450b9422f7d04af18cb047281c9" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "0478b154f9f19de137058ef8c119c617" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "0b8d1620eeaf5cf1c672312159eba468" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "9f7f02485027e24e23fbe87052025c6a" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "c9be03ffdccdbd503c9beb97c49c6f76" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "9490a9b00dd59be4b2e676766050c67f" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "7bac26b499e3245f3ad144df760887c5" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "500a55c0ec2b8fbd6e2010af47c0c97a" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "4eb79bd5ae85490ce8a7acb74a7926c6" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "8d4e55d5d576c526b8a80ca41cfbe299" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "88d4cbf9aebad328bc996d6decafb0f2" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "905612853332da1f51f9f6ff17c351b8" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "373b056073ed2ce55ed6ded986a383e2" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "060a5fd45cb00a01850db7e0297f8a21" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "f0b79c7ea6f034292b23fee54e4b55a4" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "acd3b54ad1d2df18fbec679f06a8d7c7" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "9c20f6737c61ffbff22a2dc209490345" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "25d0b859e47cae61fad806dd824a9eec" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "d7143793fca5a7512c3e0fd7024b230f" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "6de0ba79398458f053bc15c20e07d2f0" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "510e818fc3f33914c5d061fcd872109a" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "9a72ae54f8006f6be7e7771d23a71298" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "c0e3ba46f596e9b478f59a66f2377b5a" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "201129849abc3e30f682934eae5d4303" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "03fc5b512626be46d83e825033385f78" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "c8807faea89229c47d0b2515b0ce8c2d" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "0f7303e9b640fb61ec4c68c381757930" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "249ae968569af1b8247043a48775b82d" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "9c6d238f3dac10cd4d164a47acb411bb" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "789869b0c18b21ee37871dd1d3c63eb5" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "da9943596b7425b783d6614ec9674112" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "7f0bbfa7215834d8d4e3c67972f84f9d" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "c0442d80d234f5b072dec4ccda8b9049" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "f5b764680031e92f0135a09bbb510fb9" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "0c19d93d0d012f9e1f6e7909eb1698ad" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "ae39332c400856f282bcde93bec696ee" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "3019a0094d5cdd09615fe18a83f3275e" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "69837ed0265c5a7941118fefafb581f2" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "fa80a4b140b72ba2c7454c0145fa1e56" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "cc50756eeddf1c6edaa9e82a9f4771ce" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "6881b4b3ab245b60c7a89dd9b875b08c" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "9b01b73ea4dc0f6f8dc4398ac043bf3a" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "899166e65b89611814777eb6b3b936c4" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "5de89ac4e611f03db47298470f33af2e" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "43b49dcf83d4fe8065634c97b30475b5" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "c003994a00642f8356e2dad2cf29c0b0" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "e6099118b7df2d98c7b429aff78decfc" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "b7fad5fb4d7daa3e783fef2d34c71d01" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "6ad4422beec0bae1b0e5d49bc16b105a" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "438c4b5a1844753b5dd7f2db799d9e38" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "ead0345302c1dcc42a799c40d1800096" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "08f8d2d20dfa0837cbbf3dcecc9fdfda" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "3ecb52cf730c408c467c95af0d986db2" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "fb95cc1733e7ac4f5ce16b9b425f02d1" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "74b855a86b338280213244bdca12700d" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "b01fcfe9e10c0b7f8e59adbf675a9bb4" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "94a5df1079590c4bd6dbcc2a29400ed0" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "f6da87870e60656915a439a4adbc66d9" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "67739cfbd478e3ffaed25055347375f5" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "c83aa6f2b1cfc9fba3f164131291102e" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "ce17a83245b953f4e3a0724e7ded4bc4" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "4aeeac27b2cbfc81d19353b3ac410860" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "6aac0f9dac994b9cf93b1801857cc1e1" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 262144000, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.weight", + "shape": [ + 5120, + 25600 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 262144000, + "byteOffset": 0 + } + ], + "md5sum": "3b16cf05be0d436cfa460c92c2c6a8c3" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 524288000, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.weight", + "shape": [ + 51200, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 524288000, + "byteOffset": 0 + } + ], + "md5sum": "8c6c71c33a5ad171ce2adc65275bb183" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "5b0a25c6d6ffd8ce1239d4c27c40f13e" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "eb895cae563fd099311cf19ceda94591" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 104857600, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.weight", + "shape": [ + 10240, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 104857600, + "byteOffset": 0 + } + ], + "md5sum": "256482f86a6e32f506a0a7968d377b63" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 83886080, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.weight", + "shape": [ + 5120, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 83886080, + "byteOffset": 0 + } + ], + "md5sum": "e391d0bf2ae0b8358753a1ef62d43b3f" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 1353728, + "records": [ + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 0 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 10240 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20480 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30720 + }, + { + "name": "model.layers.63.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 40960 + }, + { + "name": "model.layers.63.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 41216 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 41472 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 51712 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 61952 + }, + { + "name": "model.layers.0.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 72192 + }, + { + "name": "model.layers.0.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 72448 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 72704 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 82944 + }, + { + "name": "model.layers.1.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 93184 + }, + { + "name": "model.layers.1.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 93440 + }, + { + "name": "model.layers.2.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 93696 + }, + { + "name": "model.layers.2.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 93952 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 94208 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 104448 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 114688 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 124928 + }, + { + "name": "model.layers.11.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 135168 + }, + { + "name": "model.layers.11.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 135424 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 135680 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 145920 + }, + { + "name": "model.layers.12.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 156160 + }, + { + "name": "model.layers.12.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 156416 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 156672 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 166912 + }, + { + "name": "model.layers.13.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 177152 + }, + { + "name": "model.layers.13.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 177408 + }, + { + "name": "model.layers.14.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 177664 + }, + { + "name": "model.layers.14.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 177920 + }, + { + "name": "model.layers.10.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 178176 + }, + { + "name": "model.layers.10.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 178432 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 178688 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 188928 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 199168 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 209408 + }, + { + "name": "model.layers.7.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 219648 + }, + { + "name": "model.layers.7.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 219904 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 220160 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 230400 + }, + { + "name": "model.layers.8.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 240640 + }, + { + "name": "model.layers.8.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 240896 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 241152 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 251392 + }, + { + "name": "model.layers.9.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 261632 + }, + { + "name": "model.layers.9.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 261888 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 262144 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 272384 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 282624 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 292864 + }, + { + "name": "model.layers.15.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 303104 + }, + { + "name": "model.layers.15.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 303360 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 303616 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 313856 + }, + { + "name": "model.layers.16.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 324096 + }, + { + "name": "model.layers.16.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 324352 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 324608 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 334848 + }, + { + "name": "model.layers.17.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 345088 + }, + { + "name": "model.layers.17.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 345344 + }, + { + "name": "model.layers.18.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 345600 + }, + { + "name": "model.layers.18.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 345856 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 346112 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 356352 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 366592 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 376832 + }, + { + "name": "model.layers.19.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 387072 + }, + { + "name": "model.layers.19.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 387328 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 387584 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 397824 + }, + { + "name": "model.layers.20.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 408064 + }, + { + "name": "model.layers.20.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 408320 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 408576 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 418816 + }, + { + "name": "model.layers.21.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 429056 + }, + { + "name": "model.layers.21.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 429312 + }, + { + "name": "model.layers.22.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 429568 + }, + { + "name": "model.layers.22.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 429824 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 430080 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 440320 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 450560 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 460800 + }, + { + "name": "model.layers.3.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 471040 + }, + { + "name": "model.layers.3.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 471296 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 471552 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 481792 + }, + { + "name": "model.layers.4.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 492032 + }, + { + "name": "model.layers.4.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 492288 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 492544 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 502784 + }, + { + "name": "model.layers.5.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 513024 + }, + { + "name": "model.layers.5.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 513280 + }, + { + "name": "model.layers.6.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 513536 + }, + { + "name": "model.layers.6.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 513792 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 514048 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 524288 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 534528 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 544768 + }, + { + "name": "model.layers.23.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 555008 + }, + { + "name": "model.layers.23.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 555264 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 555520 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 565760 + }, + { + "name": "model.layers.24.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 576000 + }, + { + "name": "model.layers.24.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 576256 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 576512 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 586752 + }, + { + "name": "model.layers.25.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 596992 + }, + { + "name": "model.layers.25.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 597248 + }, + { + "name": "model.layers.26.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 597504 + }, + { + "name": "model.layers.26.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 597760 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 598016 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 608256 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 618496 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 628736 + }, + { + "name": "model.layers.27.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 638976 + }, + { + "name": "model.layers.27.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 639232 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 639488 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 649728 + }, + { + "name": "model.layers.28.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 659968 + }, + { + "name": "model.layers.28.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 660224 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 660480 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 670720 + }, + { + "name": "model.layers.29.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 680960 + }, + { + "name": "model.layers.29.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 681216 + }, + { + "name": "model.layers.30.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 681472 + }, + { + "name": "model.layers.30.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 681728 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 681984 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 692224 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 702464 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 712704 + }, + { + "name": "model.layers.31.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 722944 + }, + { + "name": "model.layers.31.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 723200 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 723456 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 733696 + }, + { + "name": "model.layers.32.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 743936 + }, + { + "name": "model.layers.32.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 744192 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 744448 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 754688 + }, + { + "name": "model.layers.33.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 764928 + }, + { + "name": "model.layers.33.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 765184 + }, + { + "name": "model.layers.34.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 765440 + }, + { + "name": "model.layers.34.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 765696 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 765952 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 776192 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 786432 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 796672 + }, + { + "name": "model.layers.35.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 806912 + }, + { + "name": "model.layers.35.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 807168 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 807424 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 817664 + }, + { + "name": "model.layers.36.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 827904 + }, + { + "name": "model.layers.36.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 828160 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 828416 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 838656 + }, + { + "name": "model.layers.37.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 848896 + }, + { + "name": "model.layers.37.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 849152 + }, + { + "name": "model.layers.38.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 849408 + }, + { + "name": "model.layers.38.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 849664 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 849920 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 860160 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 870400 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 880640 + }, + { + "name": "model.layers.39.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 890880 + }, + { + "name": "model.layers.39.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 891136 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 891392 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 901632 + }, + { + "name": "model.layers.40.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 911872 + }, + { + "name": "model.layers.40.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 912128 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 912384 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 922624 + }, + { + "name": "model.layers.41.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 932864 + }, + { + "name": "model.layers.41.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 933120 + }, + { + "name": "model.layers.42.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 933376 + }, + { + "name": "model.layers.42.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 933632 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 933888 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 944128 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 954368 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 964608 + }, + { + "name": "model.layers.43.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 974848 + }, + { + "name": "model.layers.43.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 975104 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 975360 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 985600 + }, + { + "name": "model.layers.44.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 995840 + }, + { + "name": "model.layers.44.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 996096 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 996352 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1006592 + }, + { + "name": "model.layers.45.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1016832 + }, + { + "name": "model.layers.45.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1017088 + }, + { + "name": "model.layers.46.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1017344 + }, + { + "name": "model.layers.46.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1017600 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1017856 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1028096 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1038336 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1048576 + }, + { + "name": "model.layers.47.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1058816 + }, + { + "name": "model.layers.47.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1059072 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1059328 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1069568 + }, + { + "name": "model.layers.48.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1079808 + }, + { + "name": "model.layers.48.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1080064 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1080320 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1090560 + }, + { + "name": "model.layers.49.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1100800 + }, + { + "name": "model.layers.49.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1101056 + }, + { + "name": "model.layers.50.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1101312 + }, + { + "name": "model.layers.50.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1101568 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1101824 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1112064 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1122304 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1132544 + }, + { + "name": "model.layers.51.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1142784 + }, + { + "name": "model.layers.51.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1143040 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1143296 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1153536 + }, + { + "name": "model.layers.52.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1163776 + }, + { + "name": "model.layers.52.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1164032 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1164288 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1174528 + }, + { + "name": "model.layers.53.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1184768 + }, + { + "name": "model.layers.53.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1185024 + }, + { + "name": "model.layers.54.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1185280 + }, + { + "name": "model.layers.54.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1185536 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1185792 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1196032 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1206272 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1216512 + }, + { + "name": "model.layers.55.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1226752 + }, + { + "name": "model.layers.55.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1227008 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1227264 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1237504 + }, + { + "name": "model.layers.56.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1247744 + }, + { + "name": "model.layers.56.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1248000 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1248256 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1258496 + }, + { + "name": "model.layers.57.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1268736 + }, + { + "name": "model.layers.57.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1268992 + }, + { + "name": "model.layers.58.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1269248 + }, + { + "name": "model.layers.58.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1269504 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1269760 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1280000 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1290240 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1300480 + }, + { + "name": "model.layers.59.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1310720 + }, + { + "name": "model.layers.59.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1310976 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1311232 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1321472 + }, + { + "name": "model.layers.60.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1331712 + }, + { + "name": "model.layers.60.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1331968 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1332224 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1342464 + }, + { + "name": "model.layers.61.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1352704 + }, + { + "name": "model.layers.61.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1352960 + }, + { + "name": "model.layers.62.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1353216 + }, + { + "name": "model.layers.62.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1353472 + } + ], + "md5sum": "58261acfa939795e4ae3cae09f7d11c6" + } + ] +} \ No newline at end of file