diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,12215 @@ +{ + "metadata": { + "ParamSize": 849, + "ParamBytes": 470187269120.0, + "BitsPerParam": 13.014593947301632 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 1244659712, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 151936, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1244659712, + "byteOffset": 0 + } + ], + "md5sum": "14d53a10bfd68bfddb4a50db3f35d399" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.93.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "c10338566cf6479eaa11c76cd2ce3848" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.93.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "590dc6c447aab2e9cd11a271f7ef732c" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 1244659712, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 151936, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1244659712, + "byteOffset": 0 + } + ], + "md5sum": "7b25b4e6ee3efc5fcfc9884c1acfa6bd" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.0.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "a9e54bee39203214ec8aca464697649a" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.0.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "b0d3b61b49f9e50aa1ec360bc9b50bee" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d2bba38c33e9ceaaae28b3b60bb93b2b" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "0b86ac96a36947c9fc95d633decc8b7c" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.1.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "6a79d4c49c69741d5b79d2caf163f6bc" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.1.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "e015ccbc03b61d16ef248b6ed08d1f79" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4e4b640ce7e45330217d3755af4aa845" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "51a37259b1c54339010b3c95057bf91a" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.2.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "948be47846020e85ca33c9593319bede" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.2.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "a76b50e876202f242783924cdfb52a32" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4f9d07e306a0859697cb9c63b56a1daf" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "10025fdd0a6a6837b16d68aae9ba4f5a" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.10.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "50e8210952348271a983620c60de1c6b" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.10.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "3775a1f7d93a06a0faed886f2542643b" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.11.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "836f186601e30704908d233ee05f53cf" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.11.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "393adbc33cff0628200872d47ff9a9b1" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "012b33d49e8e6dd0df59e76b1f0c7db0" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "245afeecf2ce287c3dae9a0ec4af9289" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "fdbe85cafbf00f2008eac202ddc47b26" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "a85776cfc28ac312e3345075835decbf" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.9.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "309b5f47441b98aed1fc0692b5e37b94" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.9.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "fa7a3402682d863d93b6ee3ff2e033e5" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.12.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "5de7fb1a0cfead933d9604e505821bf3" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.12.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "47e3173ad4c13610b68489d622441e53" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ddfd74fc1a14ea9aed3e29c9c5cf60cf" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "e79c01926c1ef427e72ac43543873a2a" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.13.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "84d0ea0c654cb33600834185796e7dd6" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.13.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "dd3b37a302208dd63f0be64f72bbdf4b" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a68f1e7dbf7681e1af0fc1d1aa43dd0d" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "986d576e5dd81bd3fe20551633b930ba" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.14.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "f1594aa197c5b6a900661d41830860bb" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.14.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "f9f0523020ee3cd1e5357570f044b4bf" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "2127b9c6981844d4dff171697f9154a2" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "fb31a1bf0d804fda3437e48c1ce239bc" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.15.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "f8d4f2cc3017ae1229356aa9414f9d5a" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.15.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "510744e474954007d39bb2b9d5d403bd" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "7d0b5ab47133f26018aec040eb469150" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "0cd2bb4e68298e839589e8a3a4342403" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.16.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "31669b271ea7c745bf08ab55021734df" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.16.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "2477fc4034e53b93fc2e47a1fe275d12" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9c12dbc321656502160f9b559da34bef" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "817e67219051cc7ebc5a5cf4d72af9bd" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.17.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "bd75c2a2f471b554c458b3d887d59869" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.17.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "f4828410347e1068a1202e52c820c418" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0ee5421bb3227ee3e860894662422574" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "99c910f4263c29e5a79497fcf555620c" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.18.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "63baa0b0e15a069651babed2c648e891" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.18.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "ddf28861bb6f7e2b19c12cfa67bf74b7" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "7ed42066c11350a613fc720511950a79" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "3f05bd2ca7a9f1b100dc5e4759b87899" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.19.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "7f247401284cdca1071e5e2d44efab0d" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.19.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "2f7d6334d81f2d0741112ff7d74d99e7" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.20.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "5739bd59cdd88aee5708c531c79b6acc" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.20.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "67c26a149f9a26a90d414f93adaab296" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ec42f609fbb4cf8e2c7e787e082d6dee" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "ed7cba6370bfec187de289507c79844c" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "119dac0d33d572b6053b1704864bb830" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "e71b8cbaa7dfe3c0d4a3438fec98dc22" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.3.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "c09afa0cfa1b925ca114e17bd8ab3e97" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.3.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "f53669d6247c3209b6366b42cc327b86" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4e3e95e445a94dc60178075ac1c1582d" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c415cc2b7fa566d5eead38dcf2dc9386" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.21.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "5d9d802f8904d83d16415c6e99547f1a" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.21.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "54e488af488a1722ea4b50e6519c0a8f" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a733c795cfb2b20b3923fea3036b071d" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "9525d04a0346abde96b1ae6835051307" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.22.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "6e6da4bef8565f8119fb6b0bc8c7cf68" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.22.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "f0a5b4f6c9fac3e68cacd03a50cc4c33" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c444990a9f51a055925f5daac4e3708f" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "66de1b98d2457806b87d8115a35f0545" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "41c403fbd267ca056df38d8a96a5d470" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.23.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "46824b5a19b4f9ca976a8e5730a061ae" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.23.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "950231a2f0100380b4f026087ff9b172" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.24.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "f108b4099a4c640d0d75c85236223958" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.24.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "29d302c832efcca0b01154a407b7e865" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a168ab14214d5f37ce56f694ba78c547" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "62b01635e5df6d6ea9b96937d60ccde6" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "124d533062bc5b0a93e7522cd1fd7a87" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.25.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "0905094fd722331f16ce4f2ac31833e1" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.25.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "6bbd67f7da4ac70a45549aa7b17f5d7e" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3e6fadcab20432db39a510b39e096dcf" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "72dccb3de69f362c94d9bd571c28ce90" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.26.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "d3704fd38fe8637ebf06a763e69d2e16" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.26.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "458d20df6faee3caa1816378ab77cb0a" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "74da278215ceea56fab7216e1e068017" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "e2ae35be050fa3f0ff7b806ddf701d04" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "1143dd05032eccec172ef2538b4eedea" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "7569c77a979f634f36f5b97fc23ce968" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.27.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "931009f89bb9627cab12f36d6c62a161" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.27.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "5b3394b2907be444837328a512ca818e" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.28.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "00e9501b4f2b48bd77d67331f30c3dc0" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.28.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "de76f1681eb2d1655a982d6493cd7c53" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "477e78323cc7457f27f92a0ee34d0fa5" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "3ae8f9b2d75009918d18e8f307d6d9e5" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.29.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "4d00c6e686190204bf3d63fe39a6ec56" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.29.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "5c84ad702dffb71bb2d69c5166ae39f7" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "003db4222ea14e2559fdb47d981836f1" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "8a78d54c5ea835f8d700e77c306cd066" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.30.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "3ec4038429b17949363278a1db34de20" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.30.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "cf45328a62247ac87196085715f59043" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "36d5fe58486c234ce981c918a4c3f361" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "9d9dc0ab9e93ff78cef166cc0c438ea5" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.4.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "14c57c8b803e58b9619a7c3655e41878" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.4.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "6c8086b6e580c7338eb0d407de63f82a" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "5faf9a28caffac307099ef8cfbca027d" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "0bc58cdf0e819f19ff1e2f5c5e64e645" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.31.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "883498fb89ad89634c2719b55b7165dc" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.31.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "0d4b2d4ff1142ded74675099ec96ad62" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "39126336b2ae8876c38fc82e09c8d6ef" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "fdfae28ac413e0127e7d961092ba121a" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.32.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "26d7e4764381ab95b60cabe54f13c3cb" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.32.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "031e1b788f309fd870551339e9453865" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9cfb9bd139430c466f9e9c4d8a6b6136" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "b0c38958c7338e7a13e457b538c0c1b9" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.33.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "ea18d4ace2fd115a520db57162bb6c06" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.33.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "82b4bcac9193273b24bba0a715c1c462" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "747d59f4b449c7ac8490e776b78b207f" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c78539b0e7fcffb0d1bd8c82d8623360" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.34.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "36306515779cd5f986d50d3bf1dc3ecc" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.34.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "a275582c9d1ac3c59de2c0ae36743bf1" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "bc1778897e3aac1c7f2e0441d90a2f8d" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "3a6d95aead6ce3db2beaa1705c890f63" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.35.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "282ce0ca3f03074d3d4584d45029922a" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.35.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "e74393636df9fdd9d05891ec27573310" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "658ce8afcc5dcfd228dd1ebf33906186" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "a9fea81f70086f0c80df868924dfafec" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.36.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "98e9ad835d06b9f99981f0837f8780cf" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.36.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "64a9c482889ea3d94a89da0d403bc2bd" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 33054208, + "records": [ + { + "name": "model.layers.93.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 0 + }, + { + "name": "model.layers.93.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8192 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16384 + }, + { + "name": "model.layers.0.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 24576 + }, + { + "name": "model.layers.0.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1073152 + }, + { + "name": "model.layers.0.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1073408 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1073664 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1081856 + }, + { + "name": "model.layers.1.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 1090048 + }, + { + "name": "model.layers.1.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 2138624 + }, + { + "name": "model.layers.1.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 2138880 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2139136 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2147328 + }, + { + "name": "model.layers.2.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 2155520 + }, + { + "name": "model.layers.2.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3204096 + }, + { + "name": "model.layers.2.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3204352 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3204608 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3212800 + }, + { + "name": "model.layers.11.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 3220992 + }, + { + "name": "model.layers.11.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 4269568 + }, + { + "name": "model.layers.11.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 4269824 + }, + { + "name": "model.layers.10.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 4270080 + }, + { + "name": "model.layers.10.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 5318656 + }, + { + "name": "model.layers.10.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 5318912 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5319168 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5327360 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5335552 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5343744 + }, + { + "name": "model.layers.12.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 5351936 + }, + { + "name": "model.layers.12.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 6400512 + }, + { + "name": "model.layers.12.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 6400768 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 6401024 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 6409216 + }, + { + "name": "model.layers.13.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 6417408 + }, + { + "name": "model.layers.13.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 7465984 + }, + { + "name": "model.layers.13.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 7466240 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7466496 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7474688 + }, + { + "name": "model.layers.14.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 7482880 + }, + { + "name": "model.layers.14.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 8531456 + }, + { + "name": "model.layers.14.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 8531712 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8531968 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8540160 + }, + { + "name": "model.layers.15.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8548352 + }, + { + "name": "model.layers.15.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 9596928 + }, + { + "name": "model.layers.15.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 9597184 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9597440 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9605632 + }, + { + "name": "model.layers.16.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9613824 + }, + { + "name": "model.layers.16.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 10662400 + }, + { + "name": "model.layers.16.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 10662656 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10662912 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10671104 + }, + { + "name": "model.layers.17.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 10679296 + }, + { + "name": "model.layers.17.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 11727872 + }, + { + "name": "model.layers.17.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 11728128 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11728384 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11736576 + }, + { + "name": "model.layers.18.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11744768 + }, + { + "name": "model.layers.18.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 12793344 + }, + { + "name": "model.layers.18.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 12793600 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12793856 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12802048 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12810240 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12818432 + }, + { + "name": "model.layers.20.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12826624 + }, + { + "name": "model.layers.20.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 13875200 + }, + { + "name": "model.layers.20.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 13875456 + }, + { + "name": "model.layers.19.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 13875712 + }, + { + "name": "model.layers.19.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14924288 + }, + { + "name": "model.layers.19.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14924544 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14924800 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14932992 + }, + { + "name": "model.layers.3.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 14941184 + }, + { + "name": "model.layers.3.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 15989760 + }, + { + "name": "model.layers.3.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 15990016 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 15990272 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 15998464 + }, + { + "name": "model.layers.21.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 16006656 + }, + { + "name": "model.layers.21.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 17055232 + }, + { + "name": "model.layers.21.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 17055488 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 17055744 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 17063936 + }, + { + "name": "model.layers.22.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17072128 + }, + { + "name": "model.layers.22.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18120704 + }, + { + "name": "model.layers.22.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18120960 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18121216 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18129408 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18137600 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18145792 + }, + { + "name": "model.layers.24.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 18153984 + }, + { + "name": "model.layers.24.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 19202560 + }, + { + "name": "model.layers.24.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 19202816 + }, + { + "name": "model.layers.23.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19203072 + }, + { + "name": "model.layers.23.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 20251648 + }, + { + "name": "model.layers.23.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 20251904 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20252160 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20260352 + }, + { + "name": "model.layers.25.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20268544 + }, + { + "name": "model.layers.25.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21317120 + }, + { + "name": "model.layers.25.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21317376 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21317632 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21325824 + }, + { + "name": "model.layers.26.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21334016 + }, + { + "name": "model.layers.26.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22382592 + }, + { + "name": "model.layers.26.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22382848 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22383104 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22391296 + }, + { + "name": "model.layers.27.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22399488 + }, + { + "name": "model.layers.27.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23448064 + }, + { + "name": "model.layers.27.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23448320 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23448576 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23456768 + }, + { + "name": "model.layers.28.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 23464960 + }, + { + "name": "model.layers.28.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24513536 + }, + { + "name": "model.layers.28.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24513792 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24514048 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24522240 + }, + { + "name": "model.layers.29.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 24530432 + }, + { + "name": "model.layers.29.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 25579008 + }, + { + "name": "model.layers.29.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 25579264 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25579520 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25587712 + }, + { + "name": "model.layers.30.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 25595904 + }, + { + "name": "model.layers.30.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26644480 + }, + { + "name": "model.layers.30.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26644736 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26644992 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26653184 + }, + { + "name": "model.layers.4.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26661376 + }, + { + "name": "model.layers.4.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27709952 + }, + { + "name": "model.layers.4.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27710208 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27710464 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27718656 + }, + { + "name": "model.layers.31.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 27726848 + }, + { + "name": "model.layers.31.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 28775424 + }, + { + "name": "model.layers.31.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 28775680 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 28775936 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 28784128 + }, + { + "name": "model.layers.32.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 28792320 + }, + { + "name": "model.layers.32.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 29840896 + }, + { + "name": "model.layers.32.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 29841152 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29841408 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29849600 + }, + { + "name": "model.layers.33.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29857792 + }, + { + "name": "model.layers.33.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30906368 + }, + { + "name": "model.layers.33.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30906624 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30906880 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30915072 + }, + { + "name": "model.layers.34.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30923264 + }, + { + "name": "model.layers.34.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 31971840 + }, + { + "name": "model.layers.34.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 31972096 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 31972352 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 31980544 + }, + { + "name": "model.layers.35.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31988736 + }, + { + "name": "model.layers.35.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33037312 + }, + { + "name": "model.layers.35.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33037568 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33037824 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33046016 + } + ], + "md5sum": "3c342f2ce836c46e0ba859896d33668c" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "1a7d5156d1b67d4dc72e6ec942f7b5d7" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "62da058a0747a23e368e87d88c6b3c0b" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.37.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "03e91a9871a2b5b377cc3edf43391bab" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.37.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "72d2d60f94227c69a9c9e0c6a0726747" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "7c6d67000f7f52929ace98db23898a46" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "aa8c1bfebdf38632adc424462ef1169f" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.38.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "db63c89c18eae2601add489d604b3a81" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.38.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "aeffb514997523fac61a1c07c78a223c" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "982352ab5317bd858acc5062b561610b" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "abd4ae096c17c47fbd790a8ed3d9f2ce" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.39.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "176ff064f0ec821bfe90f9b2ea0bb7d6" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.39.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "01ce9a8af7f833930669d133bacdfed4" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d4d41b4e73b62b6bacc8977a8058744e" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "0fcb1bc55a6e9a9b6e054890dd76397e" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.40.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "a019d6acfc02e67d4271aac9b7c82ce4" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.40.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "d67662e2cdb7e8b7cf6fc22f4202b8d1" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f3f4a073a14dd50f6a6ac7af2f57ad86" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "a08a720dca2d4abf4c88250b78864725" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.5.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "afb449bbb969d35d887aef08d8d4e5fc" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.5.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "695b5ab1fbe1d093debd87545d0743b0" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "cab42d1a69922ea664938bdca70adfc4" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "b4683e0139da42688bfb7bfe0d9bad56" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.41.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "c2e30cdb46c5aacbfe7952e31e9a6cee" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.41.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "88331700ed1ddebb517fabab41538599" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c895bf6f8a166f7a099c6f3c8a45c42d" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "befdc056ebbb4bba1752d0a9f2b0bb2b" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.42.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "0a8bdb645d1a62c3e920037c11a681b4" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.42.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "f1bf6635e22f756b06a8ac576d21c2e0" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "772b96eabf704289da394c019dd4a9f1" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "d28958e28b46efa6203d5119286364d1" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.43.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "c24e966adc700125eb614da57c798b60" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.43.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "6329355124a4ed4938ccf46798a1505a" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "052e27daf5341eb97d3df22dde08fa45" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "9f24f480d838b40e3045e3f4ab3237a5" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.44.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "4dcfada7bc48c66f22c47ec08df0986c" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.44.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "a5f8d33dac684d900a7d0a6935cd8cfb" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "5fa840c330b3d6b0d209bf3e147eef1c" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "114548410e388af44d50b4e60cae8386" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.45.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "5e86a485fa0bd0eb485d92bc8b1dfad3" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.45.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "341fb37fb33079a741b13ade2d7f017c" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f6ce2db6db91c14b667eeba0ed80ffe6" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "f39749ad660aa921ac4fe59800f5ee46" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.46.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "5abbc5d4f1699ef5adbd2f834ede6a6c" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.46.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "d282dbe0818b0f70b176b44f34ae001c" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "827cda797b84d9c5c07cacc907ef0627" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "88560b651391583e14966bcfa24125b5" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.47.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "94ba9a26655433687b55f9b50039b28d" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.47.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "1908d628a7204613b461eb03d6b29d2b" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "8447e8ae70f02e3a1bca3f242d82564f" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "81fefe316e8433110b0af18fa12b6662" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.48.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "02555239805148f0f6a1f8b1973cda76" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.48.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "571934588a664a47a4a63eb71e61ec8e" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "b6a2f909a52b87d6709a9377a1cf527c" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "53f85ee7bf2471a57e7552344fe1eb94" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.49.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "2bbbfe135964e8fc6d07d28ba415fd41" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.49.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "e564d70e5b84202f6f8f1c7cbb279be7" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9d15a1278e624462c2a5629d77253f2a" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "daa16459df019018c822d7d30d14a796" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.50.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "3cbbaa4059e937166ffb6c1d23921bef" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.50.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "2afe58cf1ea28ed1d5b4109fac88a0b7" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c84b670e5e5d2056e1943df814ca7e70" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "f6c7262f6109edf511426c6eade75fe6" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.6.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "304e1fee476073f9d130076bad68a016" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.6.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "c3a90f18db021f3a37f731dcce8aea3f" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "60b6d11880721512263ee441ae5cec6c" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "74080497588c20b7d93076cf69679571" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.51.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "62769ce213973e13ca94faebb37349f4" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.51.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "71c838c31a3d0e9bbc3ae6c046444cbb" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a6cb51edfb73d22d3387a3dd8b031cb4" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "3487835abc9a89690b047a79563985df" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.52.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "07680392628975eafed4e07614e683b4" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.52.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "1cab3a2d0b8fa5a8e021b120c4e76f4b" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "7507f5537df3a662b3c716cdb87336cd" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "925370910df7afe8dbce58fc7d3c8718" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.53.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "6f11cfd6393ef7321645712727fe1450" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.53.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "3eee88f09b1e6ba7a00c732e0ee1e37f" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "73260b85881304edc353a8233513dc50" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "26a99d6bdfe5833df8d0bf017ec80d28" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.54.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "18e8297c7830f872b7284b22f4ab4fff" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.54.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "73b59f7b0fa22e341edd85d63102e390" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0ca90780805d37d5cbc8715f86163c54" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "2dd76d70ad8e5b275c2c00e4745f629c" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.55.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "9e306dca24815e8c16d64af4dc48f912" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.55.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "152665596ba7fdcc44ef3df166a9353b" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "722823cce9d46f2b016cee4e220c9e57" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "9bf1d535e8aa63c221e89fc870233601" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.56.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "a355767067e0e18c7b17a98e1f9e9415" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.56.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "72b72cb8d2326982f4a04e5cc5e76e92" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d4b7545e628740c304a207aa9636a882" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "6b20ee8107f0b44d7480edaf2db7c748" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.57.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "b47632fa78ffc0fa12e8e1f3e646bd32" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.57.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "2232af115dde28f8933a674db8c0a366" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "39c3ec16fd83c9e3059b8938dfc6d13f" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "dc265befffeb7fda60bb634a00a014ec" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.58.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "1dcb32b72086f76a70ca8e6bab44611b" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.58.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "8034a686b06105fdd29aeb58080a8125" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9b0d81d0013c878b53ff303dc2c65a47" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "0ce57700be5185b148c721243e256465" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.59.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "df31af9b4c76a3559e439d53f6ae162f" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.59.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "674453064af2bb799fb63f90601d0f4e" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "b1a5c72ff286dbff7d4e44fe2a198c00" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "2eda534281fc5f59947058a30522456c" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.60.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "60969504beab87c70e6f778e93c2ea85" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.60.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "04f6aa84d822320a8027c3b7c395592e" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "e91f914a932884873a3e07ba452e3c87" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "9114b9bd81c886884cf3bd8d19722dc1" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.7.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "8470d30345795f792c5232eff6902ee8" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.7.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "2352e6ae325d85893d390973509b1f8f" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f31ac71aefc96c80bc90222f4f354794" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "68ce33049e23d1c2ab250d7ea0e22fa1" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.61.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "924bd01d9c1cc424ef473d6081d2a30e" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.61.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "fe4f0b617084c9bba59cfc326ada7f52" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "bbe7833604b7786bb4e55cdd5e9102a9" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "aae970e7ce80e1d22702f6973b777abe" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.62.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "b1392efd8c1fe3f725f43e2a77d7808d" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.62.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "38fc485ba54c2748fafdf785ee381bef" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "bf0142a79df800608caf6c3e9e8b534e" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "cb66dbc0348db1c87eaacedfe9242ba9" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.63.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "88b39c807d600de3909889eb4e2187d9" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.63.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "d1bd151ff122815a3d2bced21f73d80b" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "bb2e3381840785077d3b8c0dc0f86898" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "386a7366665ca0490346a23d574c955f" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.64.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "0ac1ac091488878f9f3964ec33e4a43c" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.64.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "8370ab3e5ed73dd5c722080f14d391a4" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 33029632, + "records": [ + { + "name": "model.layers.36.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1048576 + }, + { + "name": "model.layers.36.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1048832 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1049088 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1057280 + }, + { + "name": "model.layers.37.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 1065472 + }, + { + "name": "model.layers.37.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 2114048 + }, + { + "name": "model.layers.37.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 2114304 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2114560 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2122752 + }, + { + "name": "model.layers.38.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 2130944 + }, + { + "name": "model.layers.38.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3179520 + }, + { + "name": "model.layers.38.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3179776 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3180032 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3188224 + }, + { + "name": "model.layers.39.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 3196416 + }, + { + "name": "model.layers.39.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 4244992 + }, + { + "name": "model.layers.39.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 4245248 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4245504 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4253696 + }, + { + "name": "model.layers.40.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 4261888 + }, + { + "name": "model.layers.40.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 5310464 + }, + { + "name": "model.layers.40.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 5310720 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5310976 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5319168 + }, + { + "name": "model.layers.5.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 5327360 + }, + { + "name": "model.layers.5.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 6375936 + }, + { + "name": "model.layers.5.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 6376192 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 6376448 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 6384640 + }, + { + "name": "model.layers.41.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 6392832 + }, + { + "name": "model.layers.41.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 7441408 + }, + { + "name": "model.layers.41.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 7441664 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7441920 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7450112 + }, + { + "name": "model.layers.42.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 7458304 + }, + { + "name": "model.layers.42.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 8506880 + }, + { + "name": "model.layers.42.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 8507136 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8507392 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8515584 + }, + { + "name": "model.layers.43.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8523776 + }, + { + "name": "model.layers.43.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 9572352 + }, + { + "name": "model.layers.43.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 9572608 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9572864 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9581056 + }, + { + "name": "model.layers.44.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9589248 + }, + { + "name": "model.layers.44.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 10637824 + }, + { + "name": "model.layers.44.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 10638080 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10638336 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10646528 + }, + { + "name": "model.layers.45.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 10654720 + }, + { + "name": "model.layers.45.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 11703296 + }, + { + "name": "model.layers.45.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 11703552 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11703808 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11712000 + }, + { + "name": "model.layers.46.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11720192 + }, + { + "name": "model.layers.46.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 12768768 + }, + { + "name": "model.layers.46.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 12769024 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12769280 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12777472 + }, + { + "name": "model.layers.47.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12785664 + }, + { + "name": "model.layers.47.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 13834240 + }, + { + "name": "model.layers.47.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 13834496 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 13834752 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 13842944 + }, + { + "name": "model.layers.48.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 13851136 + }, + { + "name": "model.layers.48.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14899712 + }, + { + "name": "model.layers.48.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14899968 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14900224 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14908416 + }, + { + "name": "model.layers.49.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 14916608 + }, + { + "name": "model.layers.49.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 15965184 + }, + { + "name": "model.layers.49.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 15965440 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 15965696 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 15973888 + }, + { + "name": "model.layers.50.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 15982080 + }, + { + "name": "model.layers.50.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 17030656 + }, + { + "name": "model.layers.50.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 17030912 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 17031168 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 17039360 + }, + { + "name": "model.layers.6.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17047552 + }, + { + "name": "model.layers.6.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18096128 + }, + { + "name": "model.layers.6.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18096384 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18096640 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18104832 + }, + { + "name": "model.layers.51.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 18113024 + }, + { + "name": "model.layers.51.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 19161600 + }, + { + "name": "model.layers.51.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 19161856 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 19162112 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 19170304 + }, + { + "name": "model.layers.52.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19178496 + }, + { + "name": "model.layers.52.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 20227072 + }, + { + "name": "model.layers.52.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 20227328 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20227584 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20235776 + }, + { + "name": "model.layers.53.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20243968 + }, + { + "name": "model.layers.53.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21292544 + }, + { + "name": "model.layers.53.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21292800 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21293056 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21301248 + }, + { + "name": "model.layers.54.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21309440 + }, + { + "name": "model.layers.54.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22358016 + }, + { + "name": "model.layers.54.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22358272 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22358528 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22366720 + }, + { + "name": "model.layers.55.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22374912 + }, + { + "name": "model.layers.55.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23423488 + }, + { + "name": "model.layers.55.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23423744 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23424000 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23432192 + }, + { + "name": "model.layers.56.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 23440384 + }, + { + "name": "model.layers.56.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24488960 + }, + { + "name": "model.layers.56.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24489216 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24489472 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24497664 + }, + { + "name": "model.layers.57.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 24505856 + }, + { + "name": "model.layers.57.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 25554432 + }, + { + "name": "model.layers.57.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 25554688 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25554944 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25563136 + }, + { + "name": "model.layers.58.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 25571328 + }, + { + "name": "model.layers.58.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26619904 + }, + { + "name": "model.layers.58.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26620160 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26620416 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26628608 + }, + { + "name": "model.layers.59.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26636800 + }, + { + "name": "model.layers.59.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27685376 + }, + { + "name": "model.layers.59.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27685632 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27685888 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27694080 + }, + { + "name": "model.layers.60.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 27702272 + }, + { + "name": "model.layers.60.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 28750848 + }, + { + "name": "model.layers.60.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 28751104 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 28751360 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 28759552 + }, + { + "name": "model.layers.7.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 28767744 + }, + { + "name": "model.layers.7.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 29816320 + }, + { + "name": "model.layers.7.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 29816576 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29816832 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29825024 + }, + { + "name": "model.layers.61.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29833216 + }, + { + "name": "model.layers.61.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30881792 + }, + { + "name": "model.layers.61.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30882048 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30882304 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30890496 + }, + { + "name": "model.layers.62.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30898688 + }, + { + "name": "model.layers.62.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 31947264 + }, + { + "name": "model.layers.62.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 31947520 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 31947776 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 31955968 + }, + { + "name": "model.layers.63.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31964160 + }, + { + "name": "model.layers.63.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33012736 + }, + { + "name": "model.layers.63.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33012992 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33013248 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33021440 + } + ], + "md5sum": "caeb08c63ce0163b43f35570fc58e2a0" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.64.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9b99b1592f2babe84522652a5ed50942" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "b22c51fa3fcfe9762b033264125785d4" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.65.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "53a849e7894ccff97913cb41175981cc" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.65.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "bac1e0bcdf1dd9a954cc600317b040f8" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.65.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a5add3d83a5c75036a25e4a03385167b" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "958acdcfb0d1c59da71223e7d5f3422e" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.66.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "ab2cf58530da0c62d8ba4ce349a21201" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.66.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "bde2ebcd116eac37001e5f72704dc396" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.66.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9377f5b1fc7fcd3d320d6cab06360d82" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "480188977b46c2f5824a6e5cdcaa9b84" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.67.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "8a19b89982543d30ae2e95ac3099c122" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.67.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "47ad7f7aadd5b15c930eb734b8a07cec" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.67.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "33ada9f6326b951dd2d201077069c3c8" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "346893b094738e3f12eb2eb957104cdd" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.68.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "ae1c07e56a6240879168998804aa4206" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.68.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "a4e9be3aebd1003c604da09452b5bdae" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.68.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4b6bc5615d1e987ebadbd60c4ef8b351" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "09919cc92bca5861604e084d6496fa84" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.69.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "7e5c585f974352603ec157495427626a" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.69.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "ca39dad7487c6ba8d76c5d79b60cc16d" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.69.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "19f37415ce20ad76eb9945f2c36e7233" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "daed807f6106723030ba0e283247835f" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.70.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "6ee45aa37811376cc506dedd317000cd" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.70.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "373f55d146f07c40f69c2fa4ee4d2c3e" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.70.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "69a5a0f4221fdae83f0472f35b12fa25" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "a84ba71bfd3ffe6f0b6282843ce9c5fd" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.8.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "1156f692f5bfae5ea50add52d155d173" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.8.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "b6b3091653ddd3f9dea8ff41f0ff28bc" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d97c799eb6a5cd3804ff68450037c1a7" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "07b2a88c176893f13155d460d0a4982e" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.71.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "4d0d28051b083fa9e472cd40d859566c" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.71.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "76fc7f1d39c6bbe2ba4f36ec2315c5ae" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.71.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ad1832a345be8431244ffca9f566d846" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "9d4a69edb9630f25d032a8b5237c5f71" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.72.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "12acf9e4d67dacbf8e73b42a7b53b606" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.72.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "f599a89315d7ad8efe05ca8ec003ac39" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.72.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "136c9244cdc392620789abe175783706" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "4a3f73c4960d77401aaf0515bebf623c" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.73.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "08e58facc9735af69b1ff645ac051108" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.73.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "29d9c7360c6330c528dd42a0dbb36407" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.73.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d27e0ab65cba2b26a17f5a75dc91b498" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "3e1f9ba59d7025421fc05172ddb4c7b7" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.74.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "87701fe36b5bfe0cdb0d3bf7a5e4cdb8" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.74.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "2b7ea29d70c3829a505c46070631822b" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.74.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ddb4f22d61ae5933ef11493be995464c" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c7d6dafa5879b787373efb00c9939f77" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.75.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "db91d08ebfa5af30598e9a3572f1ac9b" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.75.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "767fa1e8974a18a38a993632ed3a9774" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.75.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3609d341c354f2f6f1acad266bf25148" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "0f5091333d0bf22e1d064eb17faa9625" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.76.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "9a03f9fd1b1c4470cc9e611937780b39" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.76.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "d6bf717400a0ea9618a40273b46c50c3" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.76.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "aea9b40ed316191af179dc3d4faed65f" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "e08c38391028611decb66c281053ed53" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.77.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "9ce3b5fd1ab9591cde4e6e114acbce98" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.77.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "75c8ae67cdfcb80351b2b9c9074c14e7" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.77.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "778cef1f6e36cbbcca431c553c095f0a" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "e947c363e37e1b60aa8d0f26117ee624" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.78.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "eff713fb9169e23aca0bf13e15179e04" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.78.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "447b7f2107dbe86edd3eea8ade928ba4" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.78.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "91fc4defde24a2315b904dc22036abaf" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c93a355bb752107c0af7870e501e94e4" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.79.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "3ec22ef95332fa4ba61dcfcabd3f17c8" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.79.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "dbcf113321e08d36ab313b38176a7fbb" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.79.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0fc2dd2acb58a16ffd586e71cff444cf" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "4593b1ea44c7208fc5683749153e323f" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.80.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "f88edc53a25decbbc5444064ed9ba785" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.80.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "d00aff9b673f03271d9838e395d9c665" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.80.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "17d15478f54b934da2276cd2879dae47" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.80.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "0e0ba5e3ba24a37664d3e56a1b41c299" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "dc270cdaf9eb8cb5c980dbeff4708e89" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "ac2023147ef870b70802cd55c0e979c9" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.81.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "8a46c2883b2bf724d899782e44e89b0b" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.81.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "a33e786ee2edbb61a7d28ca4e52d50fd" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.81.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "bf0214bc7e6a6e29955e138f5c72d834" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.81.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "2499264f070d65870de29be31cd6e529" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.82.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "597b29c76c6e33ac7f619868d7e75c41" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.82.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "86e078e50dce7dd57c24539168ab3aaa" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.82.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d6641681ce8cf075c4b57e6d02f70741" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.82.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "910d7ee50c7753cdb97d873e6aa9f993" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.83.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "66872790d912faca87cb7ac82eda28fa" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.83.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "416c79675b6354be5d311322f1c431e5" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.83.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "7a5eee1332de39262e1f1118b2ee16b4" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.83.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "16f679f0047d0e367721ce7f7e4a7e86" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.84.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "8c69aff362378279a9b1cc68bffb5da7" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.84.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "43aa9100d63d818efc339a47af931e62" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.85.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "756bbb254564453e52a9c3360a1aa68c" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.85.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "0c31cbf876d687f38ed3d36e2c6fe8a4" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.85.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "6ecb0cc30842f9ceb4305555ab6cb4ae" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.85.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "0c9f00ccf479560f0c5a9a15def52462" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.84.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "e98f73290415fff1b7cd5b6b3930d7db" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.84.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "21a5063393b7d8036d87607fb437e8ed" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.86.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "51f43b856eda8fd9a090af2a0ee22779" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.86.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "6ec722e13e328c73ccde36f1e69a9c48" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.86.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f8f34e196c25db6f9eda2f2dfe2e4bce" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.86.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "51dd1b4762e214cdb54af8be674755b8" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.87.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "d9e405d27802fd2cb9c9ce495e6cd276" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.87.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "067064d32d9b4a646c514e0b436fd525" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.87.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "326096fbfe151e9dddb3019ca7d3edc2" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.87.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c749ed8c64daf1e7610d2a76889ea716" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.88.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "e1149b447edcb32f4d3a395df4597b31" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.88.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "76c7dba2f222e153c94cbe54910d37bf" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.88.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "584f51a530924f94fa84d848ef415cd1" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.89.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "18e78c026b0f6e4c81734895b93a6774" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.89.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "682e25187720b6343f2a92c9f49b32bd" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.89.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a6c65166eb6a07ceec22b8c05601a84f" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.89.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "8a82e028f45f1463d8c3544163718c88" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.88.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "ae64ecd2b518869c91e0bdc62c5f2832" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.90.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "38c24e2ae0652249ae17a89d056f21c0" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.90.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "a661cf4f5621717c41940fb9b050b6dc" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.90.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "7f086c728e0e73b4d08a7d63999f1a70" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.90.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "85a012f95802b3695a6295cb91336b6f" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.91.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "e3de83468b212153e61220947c48652d" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.91.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "91df9793228a8ac16681eb5097cbddec" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.91.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "6e964b41a1c517750f466c03327f6ede" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.91.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "406f47c590fd930cc3825fbeb2f55fc6" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.92.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c003a20098eae83266f82c5bca010e8a" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.92.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "d8921241e9e009d7c5e95860eb0fed1b" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 1610612736, + "records": [ + { + "name": "model.layers.92.mlp.moe_down_proj.weight", + "shape": [ + 128, + 4096, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1610612736, + "byteOffset": 0 + } + ], + "md5sum": "1574bf7baae09e222a06a3c8b3e0a0d6" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 3221225472, + "records": [ + { + "name": "model.layers.92.mlp.moe_gate_up_proj.weight", + "shape": [ + 128, + 3072, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3221225472, + "byteOffset": 0 + } + ], + "md5sum": "3dea36638d0df909a6859d28b0595543" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 33029632, + "records": [ + { + "name": "model.layers.64.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.64.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1048576 + }, + { + "name": "model.layers.64.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1048832 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1049088 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1057280 + }, + { + "name": "model.layers.65.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 1065472 + }, + { + "name": "model.layers.65.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 2114048 + }, + { + "name": "model.layers.65.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 2114304 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2114560 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2122752 + }, + { + "name": "model.layers.66.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 2130944 + }, + { + "name": "model.layers.66.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3179520 + }, + { + "name": "model.layers.66.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 3179776 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3180032 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3188224 + }, + { + "name": "model.layers.67.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 3196416 + }, + { + "name": "model.layers.67.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 4244992 + }, + { + "name": "model.layers.67.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 4245248 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4245504 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 4253696 + }, + { + "name": "model.layers.68.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 4261888 + }, + { + "name": "model.layers.68.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 5310464 + }, + { + "name": "model.layers.68.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 5310720 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5310976 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5319168 + }, + { + "name": "model.layers.69.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 5327360 + }, + { + "name": "model.layers.69.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 6375936 + }, + { + "name": "model.layers.69.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 6376192 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 6376448 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 6384640 + }, + { + "name": "model.layers.70.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 6392832 + }, + { + "name": "model.layers.70.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 7441408 + }, + { + "name": "model.layers.70.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 7441664 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7441920 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7450112 + }, + { + "name": "model.layers.8.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 7458304 + }, + { + "name": "model.layers.8.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 8506880 + }, + { + "name": "model.layers.8.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 8507136 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8507392 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8515584 + }, + { + "name": "model.layers.71.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8523776 + }, + { + "name": "model.layers.71.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 9572352 + }, + { + "name": "model.layers.71.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 9572608 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9572864 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9581056 + }, + { + "name": "model.layers.72.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 9589248 + }, + { + "name": "model.layers.72.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 10637824 + }, + { + "name": "model.layers.72.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 10638080 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10638336 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10646528 + }, + { + "name": "model.layers.73.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 10654720 + }, + { + "name": "model.layers.73.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 11703296 + }, + { + "name": "model.layers.73.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 11703552 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11703808 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11712000 + }, + { + "name": "model.layers.74.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11720192 + }, + { + "name": "model.layers.74.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 12768768 + }, + { + "name": "model.layers.74.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 12769024 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12769280 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12777472 + }, + { + "name": "model.layers.75.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12785664 + }, + { + "name": "model.layers.75.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 13834240 + }, + { + "name": "model.layers.75.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 13834496 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 13834752 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 13842944 + }, + { + "name": "model.layers.76.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 13851136 + }, + { + "name": "model.layers.76.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14899712 + }, + { + "name": "model.layers.76.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 14899968 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14900224 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14908416 + }, + { + "name": "model.layers.77.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 14916608 + }, + { + "name": "model.layers.77.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 15965184 + }, + { + "name": "model.layers.77.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 15965440 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 15965696 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 15973888 + }, + { + "name": "model.layers.78.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 15982080 + }, + { + "name": "model.layers.78.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 17030656 + }, + { + "name": "model.layers.78.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 17030912 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 17031168 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 17039360 + }, + { + "name": "model.layers.79.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17047552 + }, + { + "name": "model.layers.79.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18096128 + }, + { + "name": "model.layers.79.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 18096384 + }, + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18096640 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18104832 + }, + { + "name": "model.layers.80.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 18113024 + }, + { + "name": "model.layers.80.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 19161600 + }, + { + "name": "model.layers.80.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 19161856 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 19162112 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 19170304 + }, + { + "name": "model.layers.9.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19178496 + }, + { + "name": "model.layers.9.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 20227072 + }, + { + "name": "model.layers.9.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 20227328 + }, + { + "name": "model.layers.80.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20227584 + }, + { + "name": "model.layers.80.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20235776 + }, + { + "name": "model.layers.81.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20243968 + }, + { + "name": "model.layers.81.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21292544 + }, + { + "name": "model.layers.81.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 21292800 + }, + { + "name": "model.layers.81.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21293056 + }, + { + "name": "model.layers.81.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21301248 + }, + { + "name": "model.layers.82.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 21309440 + }, + { + "name": "model.layers.82.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22358016 + }, + { + "name": "model.layers.82.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 22358272 + }, + { + "name": "model.layers.82.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22358528 + }, + { + "name": "model.layers.82.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22366720 + }, + { + "name": "model.layers.83.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22374912 + }, + { + "name": "model.layers.83.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23423488 + }, + { + "name": "model.layers.83.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 23423744 + }, + { + "name": "model.layers.83.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23424000 + }, + { + "name": "model.layers.83.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23432192 + }, + { + "name": "model.layers.84.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23440384 + }, + { + "name": "model.layers.84.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23448576 + }, + { + "name": "model.layers.85.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 23456768 + }, + { + "name": "model.layers.85.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24505344 + }, + { + "name": "model.layers.85.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 24505600 + }, + { + "name": "model.layers.84.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 24505856 + }, + { + "name": "model.layers.84.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 25554432 + }, + { + "name": "model.layers.84.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 25554688 + }, + { + "name": "model.layers.85.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25554944 + }, + { + "name": "model.layers.85.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25563136 + }, + { + "name": "model.layers.86.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 25571328 + }, + { + "name": "model.layers.86.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26619904 + }, + { + "name": "model.layers.86.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 26620160 + }, + { + "name": "model.layers.86.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26620416 + }, + { + "name": "model.layers.86.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26628608 + }, + { + "name": "model.layers.87.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26636800 + }, + { + "name": "model.layers.87.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27685376 + }, + { + "name": "model.layers.87.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 27685632 + }, + { + "name": "model.layers.87.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27685888 + }, + { + "name": "model.layers.87.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27694080 + }, + { + "name": "model.layers.88.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27702272 + }, + { + "name": "model.layers.88.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27710464 + }, + { + "name": "model.layers.89.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 27718656 + }, + { + "name": "model.layers.89.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 28767232 + }, + { + "name": "model.layers.89.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 28767488 + }, + { + "name": "model.layers.88.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 28767744 + }, + { + "name": "model.layers.88.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 29816320 + }, + { + "name": "model.layers.88.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 29816576 + }, + { + "name": "model.layers.89.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29816832 + }, + { + "name": "model.layers.89.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29825024 + }, + { + "name": "model.layers.90.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29833216 + }, + { + "name": "model.layers.90.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30881792 + }, + { + "name": "model.layers.90.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 30882048 + }, + { + "name": "model.layers.90.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30882304 + }, + { + "name": "model.layers.90.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30890496 + }, + { + "name": "model.layers.91.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30898688 + }, + { + "name": "model.layers.91.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 31947264 + }, + { + "name": "model.layers.91.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 31947520 + }, + { + "name": "model.layers.91.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 31947776 + }, + { + "name": "model.layers.91.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 31955968 + }, + { + "name": "model.layers.92.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31964160 + }, + { + "name": "model.layers.92.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33012736 + }, + { + "name": "model.layers.92.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 33012992 + }, + { + "name": "model.layers.92.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33013248 + }, + { + "name": "model.layers.92.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33021440 + } + ], + "md5sum": "cecd9883bfc0edf6603ca57695a2c95d" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.93.self_attn.c_attn.weight", + "shape": [ + 9216, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "61a66c6b2c1e2c496a654bbc67c720bb" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.93.self_attn.o_proj.weight", + "shape": [ + 4096, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "a99c42892cdb71745b6adba72bf21784" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 1049088, + "records": [ + { + "name": "model.layers.93.mlp.gate.weight", + "shape": [ + 128, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.93.self_attn.k_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1048576 + }, + { + "name": "model.layers.93.self_attn.q_norm.weight", + "shape": [ + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 256, + "byteOffset": 1048832 + } + ], + "md5sum": "b8f6b27afc0cec05e67354fb9fa58c33" + } + ] +} \ No newline at end of file