{ "metadata": { "ParamSize": 849, "ParamBytes": 470187269120.0, "BitsPerParam": 13.014593947301632 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1244659712, "records": [ { "name": "lm_head.weight", "shape": [ 151936, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1244659712, "byteOffset": 0 } ], "md5sum": "14d53a10bfd68bfddb4a50db3f35d399" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.93.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "c10338566cf6479eaa11c76cd2ce3848" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.93.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "590dc6c447aab2e9cd11a271f7ef732c" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 1244659712, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 151936, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1244659712, "byteOffset": 0 } ], "md5sum": "7b25b4e6ee3efc5fcfc9884c1acfa6bd" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.0.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "a9e54bee39203214ec8aca464697649a" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.0.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "b0d3b61b49f9e50aa1ec360bc9b50bee" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d2bba38c33e9ceaaae28b3b60bb93b2b" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0b86ac96a36947c9fc95d633decc8b7c" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.1.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "6a79d4c49c69741d5b79d2caf163f6bc" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.1.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "e015ccbc03b61d16ef248b6ed08d1f79" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4e4b640ce7e45330217d3755af4aa845" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "51a37259b1c54339010b3c95057bf91a" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.2.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "948be47846020e85ca33c9593319bede" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.2.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "a76b50e876202f242783924cdfb52a32" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4f9d07e306a0859697cb9c63b56a1daf" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "10025fdd0a6a6837b16d68aae9ba4f5a" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.10.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "50e8210952348271a983620c60de1c6b" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.10.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "3775a1f7d93a06a0faed886f2542643b" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.11.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "836f186601e30704908d233ee05f53cf" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.11.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "393adbc33cff0628200872d47ff9a9b1" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "012b33d49e8e6dd0df59e76b1f0c7db0" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "245afeecf2ce287c3dae9a0ec4af9289" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "fdbe85cafbf00f2008eac202ddc47b26" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a85776cfc28ac312e3345075835decbf" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.9.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "309b5f47441b98aed1fc0692b5e37b94" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.9.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "fa7a3402682d863d93b6ee3ff2e033e5" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.12.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "5de7fb1a0cfead933d9604e505821bf3" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.12.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "47e3173ad4c13610b68489d622441e53" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ddfd74fc1a14ea9aed3e29c9c5cf60cf" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e79c01926c1ef427e72ac43543873a2a" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.13.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "84d0ea0c654cb33600834185796e7dd6" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.13.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "dd3b37a302208dd63f0be64f72bbdf4b" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a68f1e7dbf7681e1af0fc1d1aa43dd0d" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "986d576e5dd81bd3fe20551633b930ba" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.14.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "f1594aa197c5b6a900661d41830860bb" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.14.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "f9f0523020ee3cd1e5357570f044b4bf" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2127b9c6981844d4dff171697f9154a2" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "fb31a1bf0d804fda3437e48c1ce239bc" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.15.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "f8d4f2cc3017ae1229356aa9414f9d5a" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.15.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "510744e474954007d39bb2b9d5d403bd" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7d0b5ab47133f26018aec040eb469150" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0cd2bb4e68298e839589e8a3a4342403" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.16.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "31669b271ea7c745bf08ab55021734df" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.16.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "2477fc4034e53b93fc2e47a1fe275d12" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9c12dbc321656502160f9b559da34bef" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "817e67219051cc7ebc5a5cf4d72af9bd" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.17.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "bd75c2a2f471b554c458b3d887d59869" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.17.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "f4828410347e1068a1202e52c820c418" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0ee5421bb3227ee3e860894662422574" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "99c910f4263c29e5a79497fcf555620c" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.18.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "63baa0b0e15a069651babed2c648e891" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.18.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "ddf28861bb6f7e2b19c12cfa67bf74b7" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7ed42066c11350a613fc720511950a79" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "3f05bd2ca7a9f1b100dc5e4759b87899" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.19.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "7f247401284cdca1071e5e2d44efab0d" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.19.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "2f7d6334d81f2d0741112ff7d74d99e7" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.20.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "5739bd59cdd88aee5708c531c79b6acc" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.20.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "67c26a149f9a26a90d414f93adaab296" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ec42f609fbb4cf8e2c7e787e082d6dee" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ed7cba6370bfec187de289507c79844c" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "119dac0d33d572b6053b1704864bb830" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e71b8cbaa7dfe3c0d4a3438fec98dc22" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.3.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "c09afa0cfa1b925ca114e17bd8ab3e97" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.3.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "f53669d6247c3209b6366b42cc327b86" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4e3e95e445a94dc60178075ac1c1582d" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c415cc2b7fa566d5eead38dcf2dc9386" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.21.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "5d9d802f8904d83d16415c6e99547f1a" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.21.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "54e488af488a1722ea4b50e6519c0a8f" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a733c795cfb2b20b3923fea3036b071d" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9525d04a0346abde96b1ae6835051307" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.22.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "6e6da4bef8565f8119fb6b0bc8c7cf68" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.22.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "f0a5b4f6c9fac3e68cacd03a50cc4c33" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c444990a9f51a055925f5daac4e3708f" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "66de1b98d2457806b87d8115a35f0545" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "41c403fbd267ca056df38d8a96a5d470" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.23.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "46824b5a19b4f9ca976a8e5730a061ae" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.23.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "950231a2f0100380b4f026087ff9b172" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.24.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "f108b4099a4c640d0d75c85236223958" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.24.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "29d302c832efcca0b01154a407b7e865" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a168ab14214d5f37ce56f694ba78c547" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "62b01635e5df6d6ea9b96937d60ccde6" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "124d533062bc5b0a93e7522cd1fd7a87" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.25.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "0905094fd722331f16ce4f2ac31833e1" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.25.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "6bbd67f7da4ac70a45549aa7b17f5d7e" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3e6fadcab20432db39a510b39e096dcf" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "72dccb3de69f362c94d9bd571c28ce90" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.26.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "d3704fd38fe8637ebf06a763e69d2e16" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.26.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "458d20df6faee3caa1816378ab77cb0a" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "74da278215ceea56fab7216e1e068017" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e2ae35be050fa3f0ff7b806ddf701d04" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1143dd05032eccec172ef2538b4eedea" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "7569c77a979f634f36f5b97fc23ce968" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.27.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "931009f89bb9627cab12f36d6c62a161" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.27.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "5b3394b2907be444837328a512ca818e" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.28.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "00e9501b4f2b48bd77d67331f30c3dc0" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.28.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "de76f1681eb2d1655a982d6493cd7c53" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.28.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "477e78323cc7457f27f92a0ee34d0fa5" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "3ae8f9b2d75009918d18e8f307d6d9e5" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.29.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "4d00c6e686190204bf3d63fe39a6ec56" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.29.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "5c84ad702dffb71bb2d69c5166ae39f7" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.29.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "003db4222ea14e2559fdb47d981836f1" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "8a78d54c5ea835f8d700e77c306cd066" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.30.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "3ec4038429b17949363278a1db34de20" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.30.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "cf45328a62247ac87196085715f59043" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.30.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "36d5fe58486c234ce981c918a4c3f361" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9d9dc0ab9e93ff78cef166cc0c438ea5" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.4.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "14c57c8b803e58b9619a7c3655e41878" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.4.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "6c8086b6e580c7338eb0d407de63f82a" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5faf9a28caffac307099ef8cfbca027d" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0bc58cdf0e819f19ff1e2f5c5e64e645" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.31.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "883498fb89ad89634c2719b55b7165dc" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.31.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "0d4b2d4ff1142ded74675099ec96ad62" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.31.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "39126336b2ae8876c38fc82e09c8d6ef" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "fdfae28ac413e0127e7d961092ba121a" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.32.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "26d7e4764381ab95b60cabe54f13c3cb" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.32.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "031e1b788f309fd870551339e9453865" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.32.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9cfb9bd139430c466f9e9c4d8a6b6136" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.32.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b0c38958c7338e7a13e457b538c0c1b9" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.33.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "ea18d4ace2fd115a520db57162bb6c06" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.33.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "82b4bcac9193273b24bba0a715c1c462" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.33.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "747d59f4b449c7ac8490e776b78b207f" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.33.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c78539b0e7fcffb0d1bd8c82d8623360" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.34.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "36306515779cd5f986d50d3bf1dc3ecc" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.34.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "a275582c9d1ac3c59de2c0ae36743bf1" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.34.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bc1778897e3aac1c7f2e0441d90a2f8d" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.34.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "3a6d95aead6ce3db2beaa1705c890f63" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.35.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "282ce0ca3f03074d3d4584d45029922a" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.35.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "e74393636df9fdd9d05891ec27573310" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.35.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "658ce8afcc5dcfd228dd1ebf33906186" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.35.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a9fea81f70086f0c80df868924dfafec" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.36.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "98e9ad835d06b9f99981f0837f8780cf" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.36.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "64a9c482889ea3d94a89da0d403bc2bd" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 33054208, "records": [ { "name": "model.layers.93.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.93.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16384 }, { "name": "model.layers.0.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24576 }, { "name": "model.layers.0.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 1073152 }, { "name": "model.layers.0.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 1073408 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 1073664 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 1081856 }, { "name": "model.layers.1.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 1090048 }, { "name": "model.layers.1.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 2138624 }, { "name": "model.layers.1.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 2138880 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2139136 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2147328 }, { "name": "model.layers.2.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 2155520 }, { "name": "model.layers.2.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3204096 }, { "name": "model.layers.2.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3204352 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3204608 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3212800 }, { "name": "model.layers.11.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 3220992 }, { "name": "model.layers.11.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 4269568 }, { "name": "model.layers.11.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 4269824 }, { "name": "model.layers.10.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 4270080 }, { "name": "model.layers.10.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 5318656 }, { "name": "model.layers.10.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 5318912 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5319168 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5327360 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5335552 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5343744 }, { "name": "model.layers.12.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 5351936 }, { "name": "model.layers.12.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 6400512 }, { "name": "model.layers.12.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 6400768 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6401024 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6409216 }, { "name": "model.layers.13.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 6417408 }, { "name": "model.layers.13.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 7465984 }, { "name": "model.layers.13.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 7466240 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7466496 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7474688 }, { "name": "model.layers.14.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7482880 }, { "name": "model.layers.14.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 8531456 }, { "name": "model.layers.14.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 8531712 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8531968 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8540160 }, { "name": "model.layers.15.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8548352 }, { "name": "model.layers.15.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 9596928 }, { "name": "model.layers.15.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 9597184 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9597440 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9605632 }, { "name": "model.layers.16.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9613824 }, { "name": "model.layers.16.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 10662400 }, { "name": "model.layers.16.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 10662656 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10662912 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10671104 }, { "name": "model.layers.17.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 10679296 }, { "name": "model.layers.17.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 11727872 }, { "name": "model.layers.17.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 11728128 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11728384 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11736576 }, { "name": "model.layers.18.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11744768 }, { "name": "model.layers.18.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12793344 }, { "name": "model.layers.18.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12793600 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12793856 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12802048 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12810240 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12818432 }, { "name": "model.layers.20.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12826624 }, { "name": "model.layers.20.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 13875200 }, { "name": "model.layers.20.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 13875456 }, { "name": "model.layers.19.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13875712 }, { "name": "model.layers.19.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14924288 }, { "name": "model.layers.19.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14924544 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14924800 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14932992 }, { "name": "model.layers.3.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14941184 }, { "name": "model.layers.3.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 15989760 }, { "name": "model.layers.3.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 15990016 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 15990272 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 15998464 }, { "name": "model.layers.21.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 16006656 }, { "name": "model.layers.21.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 17055232 }, { "name": "model.layers.21.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 17055488 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17055744 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17063936 }, { "name": "model.layers.22.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17072128 }, { "name": "model.layers.22.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18120704 }, { "name": "model.layers.22.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18120960 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18121216 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18129408 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18137600 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18145792 }, { "name": "model.layers.24.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18153984 }, { "name": "model.layers.24.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 19202560 }, { "name": "model.layers.24.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 19202816 }, { "name": "model.layers.23.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19203072 }, { "name": "model.layers.23.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 20251648 }, { "name": "model.layers.23.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 20251904 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20252160 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20260352 }, { "name": "model.layers.25.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20268544 }, { "name": "model.layers.25.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21317120 }, { "name": "model.layers.25.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21317376 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21317632 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21325824 }, { "name": "model.layers.26.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21334016 }, { "name": "model.layers.26.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22382592 }, { "name": "model.layers.26.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22382848 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22383104 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22391296 }, { "name": "model.layers.27.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22399488 }, { "name": "model.layers.27.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23448064 }, { "name": "model.layers.27.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23448320 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23448576 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23456768 }, { "name": "model.layers.28.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 23464960 }, { "name": "model.layers.28.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24513536 }, { "name": "model.layers.28.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24513792 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24514048 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24522240 }, { "name": "model.layers.29.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24530432 }, { "name": "model.layers.29.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25579008 }, { "name": "model.layers.29.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25579264 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25579520 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25587712 }, { "name": "model.layers.30.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25595904 }, { "name": "model.layers.30.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26644480 }, { "name": "model.layers.30.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26644736 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26644992 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26653184 }, { "name": "model.layers.4.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26661376 }, { "name": "model.layers.4.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27709952 }, { "name": "model.layers.4.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27710208 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27710464 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27718656 }, { "name": "model.layers.31.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 27726848 }, { "name": "model.layers.31.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 28775424 }, { "name": "model.layers.31.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 28775680 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 28775936 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 28784128 }, { "name": "model.layers.32.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 28792320 }, { "name": "model.layers.32.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 29840896 }, { "name": "model.layers.32.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 29841152 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29841408 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29849600 }, { "name": "model.layers.33.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29857792 }, { "name": "model.layers.33.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 30906368 }, { "name": "model.layers.33.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 30906624 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30906880 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30915072 }, { "name": "model.layers.34.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30923264 }, { "name": "model.layers.34.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 31971840 }, { "name": "model.layers.34.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 31972096 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31972352 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31980544 }, { "name": "model.layers.35.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31988736 }, { "name": "model.layers.35.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33037312 }, { "name": "model.layers.35.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33037568 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 33037824 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 33046016 } ], "md5sum": "3c342f2ce836c46e0ba859896d33668c" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.36.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1a7d5156d1b67d4dc72e6ec942f7b5d7" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.36.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "62da058a0747a23e368e87d88c6b3c0b" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.37.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "03e91a9871a2b5b377cc3edf43391bab" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.37.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "72d2d60f94227c69a9c9e0c6a0726747" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.37.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7c6d67000f7f52929ace98db23898a46" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.37.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "aa8c1bfebdf38632adc424462ef1169f" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.38.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "db63c89c18eae2601add489d604b3a81" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.38.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "aeffb514997523fac61a1c07c78a223c" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.38.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "982352ab5317bd858acc5062b561610b" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.38.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "abd4ae096c17c47fbd790a8ed3d9f2ce" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.39.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "176ff064f0ec821bfe90f9b2ea0bb7d6" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.39.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "01ce9a8af7f833930669d133bacdfed4" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.39.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d4d41b4e73b62b6bacc8977a8058744e" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.39.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0fcb1bc55a6e9a9b6e054890dd76397e" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.40.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "a019d6acfc02e67d4271aac9b7c82ce4" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.40.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "d67662e2cdb7e8b7cf6fc22f4202b8d1" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.40.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f3f4a073a14dd50f6a6ac7af2f57ad86" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.40.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a08a720dca2d4abf4c88250b78864725" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.5.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "afb449bbb969d35d887aef08d8d4e5fc" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.5.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "695b5ab1fbe1d093debd87545d0743b0" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cab42d1a69922ea664938bdca70adfc4" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b4683e0139da42688bfb7bfe0d9bad56" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.41.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "c2e30cdb46c5aacbfe7952e31e9a6cee" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.41.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "88331700ed1ddebb517fabab41538599" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.41.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c895bf6f8a166f7a099c6f3c8a45c42d" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.41.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "befdc056ebbb4bba1752d0a9f2b0bb2b" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.42.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "0a8bdb645d1a62c3e920037c11a681b4" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.42.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "f1bf6635e22f756b06a8ac576d21c2e0" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.42.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "772b96eabf704289da394c019dd4a9f1" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.42.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d28958e28b46efa6203d5119286364d1" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.43.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "c24e966adc700125eb614da57c798b60" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.43.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "6329355124a4ed4938ccf46798a1505a" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.43.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "052e27daf5341eb97d3df22dde08fa45" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.43.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9f24f480d838b40e3045e3f4ab3237a5" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.44.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "4dcfada7bc48c66f22c47ec08df0986c" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.44.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "a5f8d33dac684d900a7d0a6935cd8cfb" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.44.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "5fa840c330b3d6b0d209bf3e147eef1c" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.44.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "114548410e388af44d50b4e60cae8386" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.45.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "5e86a485fa0bd0eb485d92bc8b1dfad3" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.45.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "341fb37fb33079a741b13ade2d7f017c" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.45.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f6ce2db6db91c14b667eeba0ed80ffe6" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.45.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f39749ad660aa921ac4fe59800f5ee46" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.46.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "5abbc5d4f1699ef5adbd2f834ede6a6c" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.46.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "d282dbe0818b0f70b176b44f34ae001c" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.46.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "827cda797b84d9c5c07cacc907ef0627" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.46.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "88560b651391583e14966bcfa24125b5" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.47.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "94ba9a26655433687b55f9b50039b28d" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.47.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "1908d628a7204613b461eb03d6b29d2b" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.47.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8447e8ae70f02e3a1bca3f242d82564f" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.47.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "81fefe316e8433110b0af18fa12b6662" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.48.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "02555239805148f0f6a1f8b1973cda76" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.48.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "571934588a664a47a4a63eb71e61ec8e" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.48.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b6a2f909a52b87d6709a9377a1cf527c" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.48.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "53f85ee7bf2471a57e7552344fe1eb94" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.49.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "2bbbfe135964e8fc6d07d28ba415fd41" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.49.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "e564d70e5b84202f6f8f1c7cbb279be7" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.49.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9d15a1278e624462c2a5629d77253f2a" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.49.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "daa16459df019018c822d7d30d14a796" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.50.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "3cbbaa4059e937166ffb6c1d23921bef" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.50.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "2afe58cf1ea28ed1d5b4109fac88a0b7" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.50.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c84b670e5e5d2056e1943df814ca7e70" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.50.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f6c7262f6109edf511426c6eade75fe6" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.6.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "304e1fee476073f9d130076bad68a016" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.6.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "c3a90f18db021f3a37f731dcce8aea3f" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "60b6d11880721512263ee441ae5cec6c" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "74080497588c20b7d93076cf69679571" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.51.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "62769ce213973e13ca94faebb37349f4" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.51.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "71c838c31a3d0e9bbc3ae6c046444cbb" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.51.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a6cb51edfb73d22d3387a3dd8b031cb4" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.51.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "3487835abc9a89690b047a79563985df" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.52.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "07680392628975eafed4e07614e683b4" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.52.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "1cab3a2d0b8fa5a8e021b120c4e76f4b" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.52.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7507f5537df3a662b3c716cdb87336cd" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.52.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "925370910df7afe8dbce58fc7d3c8718" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.53.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "6f11cfd6393ef7321645712727fe1450" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.53.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "3eee88f09b1e6ba7a00c732e0ee1e37f" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.53.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "73260b85881304edc353a8233513dc50" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.53.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "26a99d6bdfe5833df8d0bf017ec80d28" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.54.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "18e8297c7830f872b7284b22f4ab4fff" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.54.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "73b59f7b0fa22e341edd85d63102e390" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.54.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0ca90780805d37d5cbc8715f86163c54" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.54.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2dd76d70ad8e5b275c2c00e4745f629c" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.55.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "9e306dca24815e8c16d64af4dc48f912" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.55.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "152665596ba7fdcc44ef3df166a9353b" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.55.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "722823cce9d46f2b016cee4e220c9e57" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.55.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9bf1d535e8aa63c221e89fc870233601" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.56.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "a355767067e0e18c7b17a98e1f9e9415" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.56.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "72b72cb8d2326982f4a04e5cc5e76e92" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.56.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d4b7545e628740c304a207aa9636a882" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.56.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "6b20ee8107f0b44d7480edaf2db7c748" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.57.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "b47632fa78ffc0fa12e8e1f3e646bd32" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.57.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "2232af115dde28f8933a674db8c0a366" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.57.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "39c3ec16fd83c9e3059b8938dfc6d13f" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.57.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "dc265befffeb7fda60bb634a00a014ec" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.58.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "1dcb32b72086f76a70ca8e6bab44611b" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.58.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "8034a686b06105fdd29aeb58080a8125" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.58.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9b0d81d0013c878b53ff303dc2c65a47" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.58.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0ce57700be5185b148c721243e256465" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.59.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "df31af9b4c76a3559e439d53f6ae162f" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.59.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "674453064af2bb799fb63f90601d0f4e" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.59.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b1a5c72ff286dbff7d4e44fe2a198c00" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.59.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2eda534281fc5f59947058a30522456c" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.60.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "60969504beab87c70e6f778e93c2ea85" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.60.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "04f6aa84d822320a8027c3b7c395592e" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.60.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e91f914a932884873a3e07ba452e3c87" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.60.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9114b9bd81c886884cf3bd8d19722dc1" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.7.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "8470d30345795f792c5232eff6902ee8" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.7.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "2352e6ae325d85893d390973509b1f8f" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f31ac71aefc96c80bc90222f4f354794" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "68ce33049e23d1c2ab250d7ea0e22fa1" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.61.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "924bd01d9c1cc424ef473d6081d2a30e" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.61.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "fe4f0b617084c9bba59cfc326ada7f52" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.61.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bbe7833604b7786bb4e55cdd5e9102a9" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.61.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "aae970e7ce80e1d22702f6973b777abe" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.62.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "b1392efd8c1fe3f725f43e2a77d7808d" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.62.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "38fc485ba54c2748fafdf785ee381bef" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.62.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bf0142a79df800608caf6c3e9e8b534e" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.62.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "cb66dbc0348db1c87eaacedfe9242ba9" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.63.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "88b39c807d600de3909889eb4e2187d9" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.63.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "d1bd151ff122815a3d2bced21f73d80b" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.63.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bb2e3381840785077d3b8c0dc0f86898" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.63.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "386a7366665ca0490346a23d574c955f" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.64.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "0ac1ac091488878f9f3964ec33e4a43c" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.64.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "8370ab3e5ed73dd5c722080f14d391a4" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 33029632, "records": [ { "name": "model.layers.36.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 1048576 }, { "name": "model.layers.36.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 1048832 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 1049088 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 1057280 }, { "name": "model.layers.37.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 1065472 }, { "name": "model.layers.37.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 2114048 }, { "name": "model.layers.37.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 2114304 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2114560 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2122752 }, { "name": "model.layers.38.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 2130944 }, { "name": "model.layers.38.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3179520 }, { "name": "model.layers.38.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3179776 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3180032 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3188224 }, { "name": "model.layers.39.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 3196416 }, { "name": "model.layers.39.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 4244992 }, { "name": "model.layers.39.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 4245248 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4245504 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4253696 }, { "name": "model.layers.40.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 4261888 }, { "name": "model.layers.40.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 5310464 }, { "name": "model.layers.40.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 5310720 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5310976 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5319168 }, { "name": "model.layers.5.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 5327360 }, { "name": "model.layers.5.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 6375936 }, { "name": "model.layers.5.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 6376192 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6376448 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6384640 }, { "name": "model.layers.41.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 6392832 }, { "name": "model.layers.41.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 7441408 }, { "name": "model.layers.41.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 7441664 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7441920 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7450112 }, { "name": "model.layers.42.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7458304 }, { "name": "model.layers.42.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 8506880 }, { "name": "model.layers.42.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 8507136 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8507392 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8515584 }, { "name": "model.layers.43.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8523776 }, { "name": "model.layers.43.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 9572352 }, { "name": "model.layers.43.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 9572608 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9572864 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9581056 }, { "name": "model.layers.44.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9589248 }, { "name": "model.layers.44.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 10637824 }, { "name": "model.layers.44.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 10638080 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10638336 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10646528 }, { "name": "model.layers.45.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 10654720 }, { "name": "model.layers.45.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 11703296 }, { "name": "model.layers.45.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 11703552 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11703808 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11712000 }, { "name": "model.layers.46.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11720192 }, { "name": "model.layers.46.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12768768 }, { "name": "model.layers.46.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12769024 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12769280 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12777472 }, { "name": "model.layers.47.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12785664 }, { "name": "model.layers.47.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 13834240 }, { "name": "model.layers.47.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 13834496 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 13834752 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 13842944 }, { "name": "model.layers.48.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13851136 }, { "name": "model.layers.48.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14899712 }, { "name": "model.layers.48.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14899968 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14900224 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14908416 }, { "name": "model.layers.49.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14916608 }, { "name": "model.layers.49.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 15965184 }, { "name": "model.layers.49.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 15965440 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 15965696 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 15973888 }, { "name": "model.layers.50.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15982080 }, { "name": "model.layers.50.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 17030656 }, { "name": "model.layers.50.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 17030912 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17031168 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17039360 }, { "name": "model.layers.6.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17047552 }, { "name": "model.layers.6.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18096128 }, { "name": "model.layers.6.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18096384 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18096640 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18104832 }, { "name": "model.layers.51.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18113024 }, { "name": "model.layers.51.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 19161600 }, { "name": "model.layers.51.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 19161856 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19162112 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19170304 }, { "name": "model.layers.52.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19178496 }, { "name": "model.layers.52.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 20227072 }, { "name": "model.layers.52.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 20227328 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20227584 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20235776 }, { "name": "model.layers.53.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20243968 }, { "name": "model.layers.53.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21292544 }, { "name": "model.layers.53.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21292800 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21293056 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21301248 }, { "name": "model.layers.54.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21309440 }, { "name": "model.layers.54.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22358016 }, { "name": "model.layers.54.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22358272 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22358528 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22366720 }, { "name": "model.layers.55.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22374912 }, { "name": "model.layers.55.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23423488 }, { "name": "model.layers.55.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23423744 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23424000 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23432192 }, { "name": "model.layers.56.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 23440384 }, { "name": "model.layers.56.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24488960 }, { "name": "model.layers.56.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24489216 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24489472 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24497664 }, { "name": "model.layers.57.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24505856 }, { "name": "model.layers.57.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25554432 }, { "name": "model.layers.57.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25554688 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25554944 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25563136 }, { "name": "model.layers.58.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25571328 }, { "name": "model.layers.58.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26619904 }, { "name": "model.layers.58.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26620160 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26620416 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26628608 }, { "name": "model.layers.59.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26636800 }, { "name": "model.layers.59.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27685376 }, { "name": "model.layers.59.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27685632 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27685888 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27694080 }, { "name": "model.layers.60.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 27702272 }, { "name": "model.layers.60.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 28750848 }, { "name": "model.layers.60.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 28751104 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 28751360 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 28759552 }, { "name": "model.layers.7.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 28767744 }, { "name": "model.layers.7.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 29816320 }, { "name": "model.layers.7.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 29816576 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29816832 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29825024 }, { "name": "model.layers.61.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29833216 }, { "name": "model.layers.61.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 30881792 }, { "name": "model.layers.61.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 30882048 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30882304 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30890496 }, { "name": "model.layers.62.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30898688 }, { "name": "model.layers.62.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 31947264 }, { "name": "model.layers.62.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 31947520 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31947776 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31955968 }, { "name": "model.layers.63.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31964160 }, { "name": "model.layers.63.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33012736 }, { "name": "model.layers.63.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33012992 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 33013248 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 33021440 } ], "md5sum": "caeb08c63ce0163b43f35570fc58e2a0" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.64.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9b99b1592f2babe84522652a5ed50942" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.64.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b22c51fa3fcfe9762b033264125785d4" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.65.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "53a849e7894ccff97913cb41175981cc" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.65.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "bac1e0bcdf1dd9a954cc600317b040f8" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.65.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a5add3d83a5c75036a25e4a03385167b" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.65.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "958acdcfb0d1c59da71223e7d5f3422e" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.66.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "ab2cf58530da0c62d8ba4ce349a21201" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.66.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "bde2ebcd116eac37001e5f72704dc396" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.66.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "9377f5b1fc7fcd3d320d6cab06360d82" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.66.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "480188977b46c2f5824a6e5cdcaa9b84" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.67.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "8a19b89982543d30ae2e95ac3099c122" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.67.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "47ad7f7aadd5b15c930eb734b8a07cec" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.67.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "33ada9f6326b951dd2d201077069c3c8" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.67.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "346893b094738e3f12eb2eb957104cdd" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.68.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "ae1c07e56a6240879168998804aa4206" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.68.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "a4e9be3aebd1003c604da09452b5bdae" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.68.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4b6bc5615d1e987ebadbd60c4ef8b351" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.68.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "09919cc92bca5861604e084d6496fa84" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.69.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "7e5c585f974352603ec157495427626a" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.69.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "ca39dad7487c6ba8d76c5d79b60cc16d" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.69.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "19f37415ce20ad76eb9945f2c36e7233" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.69.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "daed807f6106723030ba0e283247835f" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.70.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "6ee45aa37811376cc506dedd317000cd" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.70.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "373f55d146f07c40f69c2fa4ee4d2c3e" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.70.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "69a5a0f4221fdae83f0472f35b12fa25" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.70.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a84ba71bfd3ffe6f0b6282843ce9c5fd" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.8.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "1156f692f5bfae5ea50add52d155d173" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.8.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "b6b3091653ddd3f9dea8ff41f0ff28bc" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d97c799eb6a5cd3804ff68450037c1a7" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "07b2a88c176893f13155d460d0a4982e" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.71.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "4d0d28051b083fa9e472cd40d859566c" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.71.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "76fc7f1d39c6bbe2ba4f36ec2315c5ae" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.71.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ad1832a345be8431244ffca9f566d846" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.71.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9d4a69edb9630f25d032a8b5237c5f71" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.72.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "12acf9e4d67dacbf8e73b42a7b53b606" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.72.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "f599a89315d7ad8efe05ca8ec003ac39" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.72.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "136c9244cdc392620789abe175783706" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.72.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "4a3f73c4960d77401aaf0515bebf623c" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.73.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "08e58facc9735af69b1ff645ac051108" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.73.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "29d9c7360c6330c528dd42a0dbb36407" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.73.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d27e0ab65cba2b26a17f5a75dc91b498" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.73.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "3e1f9ba59d7025421fc05172ddb4c7b7" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.74.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "87701fe36b5bfe0cdb0d3bf7a5e4cdb8" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.74.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "2b7ea29d70c3829a505c46070631822b" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.74.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ddb4f22d61ae5933ef11493be995464c" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.74.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c7d6dafa5879b787373efb00c9939f77" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.75.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "db91d08ebfa5af30598e9a3572f1ac9b" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.75.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "767fa1e8974a18a38a993632ed3a9774" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.75.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3609d341c354f2f6f1acad266bf25148" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.75.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0f5091333d0bf22e1d064eb17faa9625" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.76.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "9a03f9fd1b1c4470cc9e611937780b39" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.76.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "d6bf717400a0ea9618a40273b46c50c3" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.76.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "aea9b40ed316191af179dc3d4faed65f" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.76.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e08c38391028611decb66c281053ed53" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.77.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "9ce3b5fd1ab9591cde4e6e114acbce98" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.77.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "75c8ae67cdfcb80351b2b9c9074c14e7" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.77.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "778cef1f6e36cbbcca431c553c095f0a" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.77.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e947c363e37e1b60aa8d0f26117ee624" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.78.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "eff713fb9169e23aca0bf13e15179e04" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.78.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "447b7f2107dbe86edd3eea8ade928ba4" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.78.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "91fc4defde24a2315b904dc22036abaf" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.78.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c93a355bb752107c0af7870e501e94e4" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.79.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "3ec22ef95332fa4ba61dcfcabd3f17c8" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.79.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "dbcf113321e08d36ab313b38176a7fbb" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.79.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0fc2dd2acb58a16ffd586e71cff444cf" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.79.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "4593b1ea44c7208fc5683749153e323f" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.80.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "f88edc53a25decbbc5444064ed9ba785" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.80.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "d00aff9b673f03271d9838e395d9c665" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.80.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "17d15478f54b934da2276cd2879dae47" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.80.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0e0ba5e3ba24a37664d3e56a1b41c299" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "dc270cdaf9eb8cb5c980dbeff4708e89" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ac2023147ef870b70802cd55c0e979c9" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.81.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "8a46c2883b2bf724d899782e44e89b0b" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.81.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "a33e786ee2edbb61a7d28ca4e52d50fd" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.81.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bf0214bc7e6a6e29955e138f5c72d834" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.81.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2499264f070d65870de29be31cd6e529" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.82.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "597b29c76c6e33ac7f619868d7e75c41" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.82.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "86e078e50dce7dd57c24539168ab3aaa" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.82.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d6641681ce8cf075c4b57e6d02f70741" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.82.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "910d7ee50c7753cdb97d873e6aa9f993" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.83.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "66872790d912faca87cb7ac82eda28fa" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.83.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "416c79675b6354be5d311322f1c431e5" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.83.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7a5eee1332de39262e1f1118b2ee16b4" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.83.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "16f679f0047d0e367721ce7f7e4a7e86" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.84.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "8c69aff362378279a9b1cc68bffb5da7" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.84.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "43aa9100d63d818efc339a47af931e62" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.85.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "756bbb254564453e52a9c3360a1aa68c" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.85.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "0c31cbf876d687f38ed3d36e2c6fe8a4" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.85.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6ecb0cc30842f9ceb4305555ab6cb4ae" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.85.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0c9f00ccf479560f0c5a9a15def52462" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.84.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e98f73290415fff1b7cd5b6b3930d7db" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.84.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "21a5063393b7d8036d87607fb437e8ed" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.86.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "51f43b856eda8fd9a090af2a0ee22779" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.86.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "6ec722e13e328c73ccde36f1e69a9c48" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.86.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f8f34e196c25db6f9eda2f2dfe2e4bce" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.86.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "51dd1b4762e214cdb54af8be674755b8" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.87.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "d9e405d27802fd2cb9c9ce495e6cd276" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.87.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "067064d32d9b4a646c514e0b436fd525" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.87.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "326096fbfe151e9dddb3019ca7d3edc2" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.87.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c749ed8c64daf1e7610d2a76889ea716" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.88.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e1149b447edcb32f4d3a395df4597b31" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.88.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "76c7dba2f222e153c94cbe54910d37bf" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.88.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "584f51a530924f94fa84d848ef415cd1" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.89.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "18e78c026b0f6e4c81734895b93a6774" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.89.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "682e25187720b6343f2a92c9f49b32bd" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.89.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a6c65166eb6a07ceec22b8c05601a84f" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.89.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "8a82e028f45f1463d8c3544163718c88" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.88.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ae64ecd2b518869c91e0bdc62c5f2832" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.90.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "38c24e2ae0652249ae17a89d056f21c0" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.90.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "a661cf4f5621717c41940fb9b050b6dc" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.90.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7f086c728e0e73b4d08a7d63999f1a70" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.90.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "85a012f95802b3695a6295cb91336b6f" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.91.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "e3de83468b212153e61220947c48652d" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.91.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "91df9793228a8ac16681eb5097cbddec" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.91.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6e964b41a1c517750f466c03327f6ede" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.91.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "406f47c590fd930cc3825fbeb2f55fc6" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.92.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c003a20098eae83266f82c5bca010e8a" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.92.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d8921241e9e009d7c5e95860eb0fed1b" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 1610612736, "records": [ { "name": "model.layers.92.mlp.moe_down_proj.weight", "shape": [ 128, 4096, 1536 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1610612736, "byteOffset": 0 } ], "md5sum": "1574bf7baae09e222a06a3c8b3e0a0d6" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 3221225472, "records": [ { "name": "model.layers.92.mlp.moe_gate_up_proj.weight", "shape": [ 128, 3072, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3221225472, "byteOffset": 0 } ], "md5sum": "3dea36638d0df909a6859d28b0595543" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 33029632, "records": [ { "name": "model.layers.64.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.64.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 1048576 }, { "name": "model.layers.64.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 1048832 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 1049088 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 1057280 }, { "name": "model.layers.65.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 1065472 }, { "name": "model.layers.65.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 2114048 }, { "name": "model.layers.65.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 2114304 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2114560 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2122752 }, { "name": "model.layers.66.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 2130944 }, { "name": "model.layers.66.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3179520 }, { "name": "model.layers.66.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 3179776 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3180032 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3188224 }, { "name": "model.layers.67.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 3196416 }, { "name": "model.layers.67.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 4244992 }, { "name": "model.layers.67.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 4245248 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4245504 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4253696 }, { "name": "model.layers.68.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 4261888 }, { "name": "model.layers.68.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 5310464 }, { "name": "model.layers.68.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 5310720 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5310976 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5319168 }, { "name": "model.layers.69.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 5327360 }, { "name": "model.layers.69.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 6375936 }, { "name": "model.layers.69.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 6376192 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6376448 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6384640 }, { "name": "model.layers.70.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 6392832 }, { "name": "model.layers.70.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 7441408 }, { "name": "model.layers.70.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 7441664 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7441920 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7450112 }, { "name": "model.layers.8.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7458304 }, { "name": "model.layers.8.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 8506880 }, { "name": "model.layers.8.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 8507136 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8507392 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8515584 }, { "name": "model.layers.71.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8523776 }, { "name": "model.layers.71.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 9572352 }, { "name": "model.layers.71.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 9572608 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9572864 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9581056 }, { "name": "model.layers.72.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 9589248 }, { "name": "model.layers.72.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 10637824 }, { "name": "model.layers.72.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 10638080 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10638336 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10646528 }, { "name": "model.layers.73.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 10654720 }, { "name": "model.layers.73.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 11703296 }, { "name": "model.layers.73.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 11703552 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11703808 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11712000 }, { "name": "model.layers.74.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11720192 }, { "name": "model.layers.74.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12768768 }, { "name": "model.layers.74.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 12769024 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12769280 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12777472 }, { "name": "model.layers.75.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 12785664 }, { "name": "model.layers.75.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 13834240 }, { "name": "model.layers.75.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 13834496 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 13834752 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 13842944 }, { "name": "model.layers.76.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13851136 }, { "name": "model.layers.76.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14899712 }, { "name": "model.layers.76.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 14899968 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14900224 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14908416 }, { "name": "model.layers.77.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14916608 }, { "name": "model.layers.77.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 15965184 }, { "name": "model.layers.77.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 15965440 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 15965696 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 15973888 }, { "name": "model.layers.78.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 15982080 }, { "name": "model.layers.78.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 17030656 }, { "name": "model.layers.78.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 17030912 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17031168 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17039360 }, { "name": "model.layers.79.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 17047552 }, { "name": "model.layers.79.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18096128 }, { "name": "model.layers.79.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 18096384 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18096640 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18104832 }, { "name": "model.layers.80.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18113024 }, { "name": "model.layers.80.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 19161600 }, { "name": "model.layers.80.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 19161856 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19162112 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19170304 }, { "name": "model.layers.9.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19178496 }, { "name": "model.layers.9.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 20227072 }, { "name": "model.layers.9.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 20227328 }, { "name": "model.layers.80.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20227584 }, { "name": "model.layers.80.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20235776 }, { "name": "model.layers.81.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20243968 }, { "name": "model.layers.81.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21292544 }, { "name": "model.layers.81.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 21292800 }, { "name": "model.layers.81.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21293056 }, { "name": "model.layers.81.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21301248 }, { "name": "model.layers.82.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 21309440 }, { "name": "model.layers.82.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22358016 }, { "name": "model.layers.82.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 22358272 }, { "name": "model.layers.82.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22358528 }, { "name": "model.layers.82.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 22366720 }, { "name": "model.layers.83.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22374912 }, { "name": "model.layers.83.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23423488 }, { "name": "model.layers.83.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 23423744 }, { "name": "model.layers.83.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23424000 }, { "name": "model.layers.83.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23432192 }, { "name": "model.layers.84.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23440384 }, { "name": "model.layers.84.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23448576 }, { "name": "model.layers.85.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 23456768 }, { "name": "model.layers.85.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24505344 }, { "name": "model.layers.85.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 24505600 }, { "name": "model.layers.84.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24505856 }, { "name": "model.layers.84.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25554432 }, { "name": "model.layers.84.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 25554688 }, { "name": "model.layers.85.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25554944 }, { "name": "model.layers.85.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25563136 }, { "name": "model.layers.86.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25571328 }, { "name": "model.layers.86.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26619904 }, { "name": "model.layers.86.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 26620160 }, { "name": "model.layers.86.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26620416 }, { "name": "model.layers.86.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26628608 }, { "name": "model.layers.87.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26636800 }, { "name": "model.layers.87.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27685376 }, { "name": "model.layers.87.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 27685632 }, { "name": "model.layers.87.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27685888 }, { "name": "model.layers.87.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27694080 }, { "name": "model.layers.88.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27702272 }, { "name": "model.layers.88.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 27710464 }, { "name": "model.layers.89.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 27718656 }, { "name": "model.layers.89.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 28767232 }, { "name": "model.layers.89.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 28767488 }, { "name": "model.layers.88.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 28767744 }, { "name": "model.layers.88.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 29816320 }, { "name": "model.layers.88.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 29816576 }, { "name": "model.layers.89.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29816832 }, { "name": "model.layers.89.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29825024 }, { "name": "model.layers.90.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 29833216 }, { "name": "model.layers.90.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 30881792 }, { "name": "model.layers.90.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 30882048 }, { "name": "model.layers.90.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30882304 }, { "name": "model.layers.90.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30890496 }, { "name": "model.layers.91.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 30898688 }, { "name": "model.layers.91.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 31947264 }, { "name": "model.layers.91.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 31947520 }, { "name": "model.layers.91.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31947776 }, { "name": "model.layers.91.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31955968 }, { "name": "model.layers.92.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31964160 }, { "name": "model.layers.92.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33012736 }, { "name": "model.layers.92.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 33012992 }, { "name": "model.layers.92.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 33013248 }, { "name": "model.layers.92.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 33021440 } ], "md5sum": "cecd9883bfc0edf6603ca57695a2c95d" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.93.self_attn.c_attn.weight", "shape": [ 9216, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "61a66c6b2c1e2c496a654bbc67c720bb" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.93.self_attn.o_proj.weight", "shape": [ 4096, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a99c42892cdb71745b6adba72bf21784" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 1049088, "records": [ { "name": "model.layers.93.mlp.gate.weight", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.93.self_attn.k_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 1048576 }, { "name": "model.layers.93.self_attn.q_norm.weight", "shape": [ 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 256, "byteOffset": 1048832 } ], "md5sum": "b8f6b27afc0cec05e67354fb9fa58c33" } ] }