diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,5441 @@ +{ + "metadata": { + "ParamSize": 339, + "ParamBytes": 44494565376.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c1207cfb87904f17085a49e2ca0fb581" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "6624be85b1f1d872320e0e9f3046183d" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "c97cae87c2e3c496a30ee4df2c208d9d" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "8a168e916dcfe3ae172fa6e1444166fa" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c775a088e7bdaec7d309bfd16b4eddc9" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "5248f659005ed8ab72bd02e857c67de8" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "f927788506c79c1dd626bdf2cc7931e0" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "d6c701fd2af33f558e626c36b31fe4af" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "0ebbd48eeeaf48f9c80a9aab4d61becf" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c0bbe634cf2859dfc38e8228bd762f43" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "951db7ce1ff986f6a55c862fc69bbf07" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "28b62a707535f6e8d26859c003460ed7" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "0c82c8d7bb4285caecd74fc1575f2121" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "899318fd155d3f256c65c280925f351a" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "202f081980cdcb66a67923461d86952f" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "e3106c00a8dd630ff1917ee610ae18c5" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "4a6b4f75ecf743a6d59d5cc98a34b2ca" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "8bdf19bf8d374560ef051257e08064ce" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "4faea89131b9228f6280722c35139e47" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "064bed1f5a414d48075fea08460948ec" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "017f9cf43e6b1f1014bc931acac1535b" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4ba74eed20c0e0079fed8eabcdb26923" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "7244113f58c5fdc4a08b5c899064c753" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "e590f2d342b60ec8b90ab2949204fb3a" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "719c598cf7ee1df8ae7eea14404dd50c" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "cc581a93217b1290cf4c9338c5a153e4" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "1a764d1ddc7702c3e0835160909d25d4" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "4c119208ed7580175d208873729dd74a" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a6efb1742a53bdf612be70b6edc437b4" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "43e4432d9dae72a6c80cbb7ace2da404" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "384f914f63fb8da8bfdc6ea7708ce672" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "ab2b3d411f0645cdd533f675d9ea30db" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "13bcbe08188fb66bae1cc969542a6b2d" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3c87685270b69f56fa7372dc3de7bbe6" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "c69d6d839cfd1502c24bee0003389aed" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c5fdb74312396a134f91cc254b7c6113" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c2455f2ff5e8303407e38fd5044cec32" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "873ceb87f9a57edd8203b05ee3cf1359" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "1f0d86ad83ed2b2be152478f0fd4d786" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "9929ce41ce7cfc13f67b37d49995b06d" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2c46257cefa6bc10b630cdefdfc0d74e" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "478cc56b1f9a566cb5ffa6a394d352c3" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "89ca1d325c5aa3bfbb1e972be6ccf298" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "bef529cc51dcfee2d098b045bd66ebc0" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "13fa40b837f60e9f1ecb53168a1c4252" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "cb028f24402e66971c28fc2a68cd3f2f" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "4ffc9bd5c0d4f1616085cbf139a46a46" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "a777cc27d2e9064adcd8fba3de019287" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "862ee1c331712cad27f20a51afc57445" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ab766d48c4f582c63687bb81c0c0c32a" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "bcc10c76f7305a81b5025158bae0a89c" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "5648c0d50588655750b30926f27f6464" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a21a764beba6d18f2f323deaadfea67a" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "574a98c82e0fdf48c16a60564eb81caf" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "f96c83ecc8fe1fd50d79fcaa07f6b7d5" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c69bfacf08cd55d42c62d5f0f2d8c01f" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "71807b8dc2b38301f58f8ee576911292" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "bd69edc395b4d6f32a2d56bf83aaa7e8" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "9e590d3859c210c884e7d30396475384" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "206a1bf22990b93d01a0cb511016f388" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "08be3334ac79ed0bd991b1b275a04258" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "522be1a28ae769c604f5f437887d8630" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "c8b24cd42e3bcd5b12ca0c1c97ac7c0e" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "0925ce242ccdf71b6437f9b101515677" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "249ca08b58104900560a3d81ad3794be" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "94526102a76a9028d78acc4e6fbec396" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "c3f9065fab8dde836cc3e5892ec144cc" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c527cfe7d411e459fc315168275379d0" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5146b706a981bb75829ecdf9bb83aedc" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0765e45a78d5a1d0c04b42418568be13" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "ba90d0b003114798870ad92488a1a1a9" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "fbc7080b8896582a5bd1d46e84e36738" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "acb5d71f22c2267b54a17781f3f3ca5c" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "9a48371406753def7bd27bb3a6adc4d7" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "64b6a4e432f09a166a88e5401cb8a5ab" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "002edde149ce79b7b894b5eec1deca8c" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "58aa8b7de95f3a90f7fc70a31c3b812c" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c3377fbff2d8cd1b3e6db043ace6064c" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "07ec7b2e79a595731e2c6d3c9713d5e7" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "e0169a9c9dc6705c16dfabd416cc264d" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "76c2b47efa44a83cf4a79f2e24a875fd" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d591f3b33270fbb49faec602167a3015" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "e4d14c2297e820471512d9f88785dca6" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "58aba06a76f749cf1ec52165c3e09deb" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3cd4adca97e01a9fce5e2541533c8a72" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "5ee8df917402a73ca72cfc65e85ff451" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "c8ea7f2ede95e32d5d80c04a0a56c6f4" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "53d366741aabcb2604dd9e1730884734" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "63f35d8f8a2eb90cb9b6e7860e5e66fd" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "5b2ba15cc881e37deeb82fdb6d98dcf6" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "dcba35a054ca97fa9c95a44c31eea503" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "c59ba36817891221b3a7577410e9d62a" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a70c2f959d9804e68f3a985487966cfd" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "58fba5b7657311a08ba99b2266d2b8e2" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "102094bf120f3666215bb41f9fced43a" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "e31d7cb387e9b925f57c2d519b8ca5eb" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "96c0763312d8743aded51b75bea71921" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "2aa14e6e86390124a130ce965ab5b3dd" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "a35d98ad96be8352ebbab3c065552a45" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "92600c8521c5f200376a35bb077861e9" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "a4c0deb3d90da0a6a1810ce2cea95426" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "45be7f10624e4254b7f92530e539f8b2" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "69545b612606fccc22f12c3c31f3754b" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "881d543a785f9593898241dfa0c260bd" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2adca63b686586e5ed6c7f4058cc1b2e" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f88cbf9fd4e39f46d703f0aec03fe92f" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "508df900efef6bf4a7a0e239fe04d7b4" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "9e13f4aec69fb8ca9044916ae0e81e1a" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f2b232d463af1373d73d4de9a92314d9" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "0d88f3b8a8146d17147f85f9de51d496" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "cfc3b7a6e7d0255a90bc9ddea6ad809f" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b0e2dd398cdc738c1160fdc5d7dfdfe9" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2721239614a4c90e342b9b2d408d2faf" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "35b71fd579d5c793ff1b03e371bf5889" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "5c5d38d29b0acd291e50ad52a6d1c0b0" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "bbfb2d82a2e6f2343a6b4f882dc7817e" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "36c1f19a42e241a7582f300c03698856" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "1e7dbd32fd7a72740bd53c44f780cb94" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "9ca72a59e0bdc97ebe6e76ac90a07fcc" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "be34d45fc64b5cd15bd9e9a7af5ae751" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "26b35af87284125eac4370935f1cae53" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "64b43d98a6e70ddeeaf85ecbe664137f" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "4ad19b7e87f2fe20a578df05ebdc5d08" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "50c85d89a59f0e662a18d769a32d701b" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "1555a9cf02a0647027a234489d4c3a3b" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "b505922f55c7549a5cd9bc7418b3eff0" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "78d596487a87429a355af01d707f063a" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "41f5018bd8d92abe059ea195cd86e293" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3ede93ae43f9120b9cfb42fc8a236e6c" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "21169c33013de14b517f5b0d207bdff9" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "db51dab7a55cebf59ee05b27a3144753" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "0f6f7b66e6dae6a3cf7a5b962af8fdf4" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "7be5146cfcd0c3ddc1729c8282608348" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "2ebb4875abcdc885bdc07ec0d3f153b4" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "324e96107ac110479f38e4417026ad61" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "745f1ed053764ea3c9b2c6c329634fd2" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3557e15f6a6c2c14f4940bd88cfea555" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c67bbef621b6f64f7a3bd88c350c393c" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "c2824727a8188dae8fa2c1ffa66be0fe" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "e5ac202c8b3b68671a90eba39e66ea59" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "0e78bcd62487e483633def8989352265" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "519ab58a54e46ccd9df5297b6ab68530" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "80d76b55cb9afc68d0c782268f9c4853" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "b8411a21a31b8243904a0096b7ea085d" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f687efe47d2c9c94521bb8da186a3a7e" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "e063668c8f20b4df04dfd076cf005616" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "caf62f716817d2d65fc970b616c59d50" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "d79f37d47d4a8c18c8a3a874cad49c4c" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d4feb59d3da5518e2fbb77c36e6a0f2d" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d5c9a3920bbc7c1807970f9261616c7b" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "823ac3cdf896c771b176bfcfb6b426a9" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "bc47fd53042f6a88e2bf4819f8519dae" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e95cb94d22e8d34d3179007777033f8f" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "dbe5f811a114ca4b145bbcf3a95f6782" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "d5a73e2bb0de0440f301c089d644e37f" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "44b4dbdacccd0429a10858185ae03ccd" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "69e0fc6510e17aa5aff0587ccc875d9e" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d27ec2f7d0b741be32253616321db189" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "ff4ac4ae711b7f2eaed3ba87e1a28712" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "2a51b589cfe0420482d915bf35a045c1" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ad1f42e5549dde839cb2fbc49354b9ef" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a94f878615126957fdab6aa1e706f018" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "c2c5d1a17a051eaf282d64b7e77e1e1d" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "82d4fe1b4d220eca8fdc7fb84409dafc" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "0cdb673e8e0855e7dc524eb1394ba5a6" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "07f1e0b89b695581015f5054f62112c6" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "8ab4e894c5d77d9394c8cc7e18364920" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "f9d800f33559a1e29f6cc2c9c5b60d65" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.46.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e841b26221b888eded39cc53c3a28865" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "22c2fa81c7b29182a3d2b4ffccf9aec4" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "275abe0de446cbd516e3e263575e1dcc" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "1e4ecce432003ef2bebcbb977b0f277a" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.47.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "12c24a4d92a51dcc9d9e78ac1611348f" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4fe297e23fda3862f72b4f45087be2a9" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "5cd66a79405ced4a89048363fcedd6bc" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "1800434d5a26bce8b01963d99261565a" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.48.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3641d809f4fd47b7ae3bfa0f25e713a6" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "69f1b9662e69187bdc39044c2bc203fe" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "1e23bda65762e9cfe6492ea01e813cee" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "1d2bee332580329fae9e300910dd9708" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.49.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "00128846f650b744f22e7744288733e7" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "dfd86ae200f9fc446aaadc28c7911a3e" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "fcfeeb1548d83d7e2d17697b8b659b1a" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "05fab68d29f0fdee6a56699521b3617e" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e5cb71bdba6d6093971e97b52f8ba0d2" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3574045c9aa1a00a34708c0830e7de01" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "475a08d6e33f52a053bf3a1b42189536" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "fa0e4379489c9fe223523e895a0f79fe" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.50.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e1ca6074c8cf3177f55b8a4de304f4bc" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "87e58dba5fca1f5e58f0378ffcd161cd" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "dcd8c1bcfda7c593465d64f44edc6a7f" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "ade96607de7d4bd52999755f561230b9" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.51.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d2ac24be766e1ece815e9bb238ad8cfc" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3baae47732e4e9563250087b5f7cbb5e" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "e8181b891ff94e435107c3916055c2a0" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "fec7cf02b5f6477cc160fc3cc9a7de0d" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.52.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8986f1e88891868226a333c6b744b2c1" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d957b32ce150381cbfb56fd55f70c59b" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "b96692e4347a567b11c14eac8ed19fb3" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "df77a4cee48601742ede70136221453a" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.53.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "769dc296ff676fc2c56254801ad24c20" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "7f09042a3dbf12404521dc92f48f906c" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "2aa5d3acffe5895ee1efd44ecd7babda" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "caaa423a2fc3ac4401af96bd6dafdfe8" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.54.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "5fdfdbc4c5e06274eb63c67eb2329736" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "753787616c5133e4ea2045434da4e3b2" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "806ed2d8dca63922240eaf6b6ada1a29" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "95168a86cb267f7d964e0505725f8f50" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.55.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "893b9912c15e503e33a39985c42ad21c" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d0a8fe9970b1b6216645654a358fb763" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "759949f8a6467da870e39b8ca7c5aefe" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "71ed954e7a8e30ee6bae4d887a191850" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f3ac14aae8a52ee9b52ed3c784063f7f" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "74db162c0eb2508db7a2a28cd085f7d2" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "1330dfaa0257dfa9e037ac5dd5979d19" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "9d94a36807ca7ed8876f3e169764da6d" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "77a4634e44d3b0fec10f60e260075ea0" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a2658bd47fedf3606e9d94a2ee8a98c0" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "b34e75085b3490427c7fd281377c16df" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "8c4c52c3a9f00918ef7d7f5bcadf9eaa" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "fdcde06d68bc89a077de959c7b1a15e2" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4a6695dfc72d7bdc145621d1bbff277c" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 6144, + 16384 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "81f6b78a84e87edfc24b6377c3ef2668" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 402653184, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 32768, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 402653184, + "byteOffset": 0 + } + ], + "md5sum": "775037907742a6d4a92186e931cb36dd" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "53c705e5ebc14909e4088e3707e946bf" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 6144, + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "43b66f4c83236b000bdf2e9d332e9270" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 1388544, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 12288 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 24576 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 36864 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 49152 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 61440 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 73728 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 86016 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 98304 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 110592 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 122880 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 135168 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 147456 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 159744 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 172032 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 184320 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 196608 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 208896 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 221184 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 233472 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 245760 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 258048 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 270336 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 282624 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 294912 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 307200 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 319488 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 331776 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 344064 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 356352 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 368640 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 380928 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 393216 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 405504 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 417792 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 430080 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 442368 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 454656 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 466944 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 479232 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 491520 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 503808 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 516096 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 528384 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 540672 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 552960 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 565248 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 577536 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 589824 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 602112 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 614400 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 626688 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 638976 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 651264 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 663552 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 675840 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 688128 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 700416 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 712704 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 724992 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 737280 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 749568 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 761856 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 774144 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 786432 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 798720 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 811008 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 823296 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 835584 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 847872 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 860160 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 872448 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 884736 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 897024 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 909312 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 921600 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 933888 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 946176 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 958464 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 970752 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 983040 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 995328 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1007616 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1019904 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1032192 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1044480 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1056768 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1069056 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1081344 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1093632 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1105920 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1118208 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1130496 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1142784 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1155072 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1167360 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1179648 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1191936 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1204224 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1216512 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1228800 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1241088 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1253376 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1265664 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1277952 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1290240 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1302528 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1314816 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1327104 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1339392 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1351680 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1363968 + }, + { + "name": "model.norm.weight", + "shape": [ + 6144 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12288, + "byteOffset": 1376256 + } + ], + "md5sum": "88c8c3cb1c7c261614ce184141204a42" + } + ] +} \ No newline at end of file