|
{ |
|
"metadata": { |
|
"ParamSize": 339, |
|
"ParamBytes": 44494565376.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c1207cfb87904f17085a49e2ca0fb581" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6624be85b1f1d872320e0e9f3046183d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c97cae87c2e3c496a30ee4df2c208d9d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8a168e916dcfe3ae172fa6e1444166fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c775a088e7bdaec7d309bfd16b4eddc9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5248f659005ed8ab72bd02e857c67de8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f927788506c79c1dd626bdf2cc7931e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d6c701fd2af33f558e626c36b31fe4af" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0ebbd48eeeaf48f9c80a9aab4d61becf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c0bbe634cf2859dfc38e8228bd762f43" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "951db7ce1ff986f6a55c862fc69bbf07" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "28b62a707535f6e8d26859c003460ed7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0c82c8d7bb4285caecd74fc1575f2121" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "899318fd155d3f256c65c280925f351a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "202f081980cdcb66a67923461d86952f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e3106c00a8dd630ff1917ee610ae18c5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4a6b4f75ecf743a6d59d5cc98a34b2ca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8bdf19bf8d374560ef051257e08064ce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4faea89131b9228f6280722c35139e47" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "064bed1f5a414d48075fea08460948ec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "017f9cf43e6b1f1014bc931acac1535b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4ba74eed20c0e0079fed8eabcdb26923" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7244113f58c5fdc4a08b5c899064c753" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e590f2d342b60ec8b90ab2949204fb3a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "719c598cf7ee1df8ae7eea14404dd50c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cc581a93217b1290cf4c9338c5a153e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1a764d1ddc7702c3e0835160909d25d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4c119208ed7580175d208873729dd74a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a6efb1742a53bdf612be70b6edc437b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43e4432d9dae72a6c80cbb7ace2da404" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "384f914f63fb8da8bfdc6ea7708ce672" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ab2b3d411f0645cdd533f675d9ea30db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "13bcbe08188fb66bae1cc969542a6b2d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c87685270b69f56fa7372dc3de7bbe6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c69d6d839cfd1502c24bee0003389aed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c5fdb74312396a134f91cc254b7c6113" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c2455f2ff5e8303407e38fd5044cec32" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "873ceb87f9a57edd8203b05ee3cf1359" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f0d86ad83ed2b2be152478f0fd4d786" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9929ce41ce7cfc13f67b37d49995b06d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2c46257cefa6bc10b630cdefdfc0d74e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "478cc56b1f9a566cb5ffa6a394d352c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "89ca1d325c5aa3bfbb1e972be6ccf298" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bef529cc51dcfee2d098b045bd66ebc0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "13fa40b837f60e9f1ecb53168a1c4252" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cb028f24402e66971c28fc2a68cd3f2f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4ffc9bd5c0d4f1616085cbf139a46a46" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a777cc27d2e9064adcd8fba3de019287" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "862ee1c331712cad27f20a51afc57445" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ab766d48c4f582c63687bb81c0c0c32a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bcc10c76f7305a81b5025158bae0a89c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5648c0d50588655750b30926f27f6464" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a21a764beba6d18f2f323deaadfea67a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "574a98c82e0fdf48c16a60564eb81caf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f96c83ecc8fe1fd50d79fcaa07f6b7d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c69bfacf08cd55d42c62d5f0f2d8c01f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "71807b8dc2b38301f58f8ee576911292" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bd69edc395b4d6f32a2d56bf83aaa7e8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9e590d3859c210c884e7d30396475384" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "206a1bf22990b93d01a0cb511016f388" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08be3334ac79ed0bd991b1b275a04258" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "522be1a28ae769c604f5f437887d8630" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c8b24cd42e3bcd5b12ca0c1c97ac7c0e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0925ce242ccdf71b6437f9b101515677" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "249ca08b58104900560a3d81ad3794be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "94526102a76a9028d78acc4e6fbec396" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c3f9065fab8dde836cc3e5892ec144cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c527cfe7d411e459fc315168275379d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5146b706a981bb75829ecdf9bb83aedc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0765e45a78d5a1d0c04b42418568be13" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ba90d0b003114798870ad92488a1a1a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fbc7080b8896582a5bd1d46e84e36738" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "acb5d71f22c2267b54a17781f3f3ca5c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9a48371406753def7bd27bb3a6adc4d7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "64b6a4e432f09a166a88e5401cb8a5ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "002edde149ce79b7b894b5eec1deca8c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "58aa8b7de95f3a90f7fc70a31c3b812c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c3377fbff2d8cd1b3e6db043ace6064c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "07ec7b2e79a595731e2c6d3c9713d5e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e0169a9c9dc6705c16dfabd416cc264d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "76c2b47efa44a83cf4a79f2e24a875fd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d591f3b33270fbb49faec602167a3015" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e4d14c2297e820471512d9f88785dca6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "58aba06a76f749cf1ec52165c3e09deb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3cd4adca97e01a9fce5e2541533c8a72" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5ee8df917402a73ca72cfc65e85ff451" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c8ea7f2ede95e32d5d80c04a0a56c6f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "53d366741aabcb2604dd9e1730884734" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "63f35d8f8a2eb90cb9b6e7860e5e66fd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5b2ba15cc881e37deeb82fdb6d98dcf6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dcba35a054ca97fa9c95a44c31eea503" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c59ba36817891221b3a7577410e9d62a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a70c2f959d9804e68f3a985487966cfd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "58fba5b7657311a08ba99b2266d2b8e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "102094bf120f3666215bb41f9fced43a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e31d7cb387e9b925f57c2d519b8ca5eb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96c0763312d8743aded51b75bea71921" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2aa14e6e86390124a130ce965ab5b3dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a35d98ad96be8352ebbab3c065552a45" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "92600c8521c5f200376a35bb077861e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a4c0deb3d90da0a6a1810ce2cea95426" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "45be7f10624e4254b7f92530e539f8b2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69545b612606fccc22f12c3c31f3754b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "881d543a785f9593898241dfa0c260bd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2adca63b686586e5ed6c7f4058cc1b2e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f88cbf9fd4e39f46d703f0aec03fe92f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "508df900efef6bf4a7a0e239fe04d7b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9e13f4aec69fb8ca9044916ae0e81e1a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f2b232d463af1373d73d4de9a92314d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0d88f3b8a8146d17147f85f9de51d496" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cfc3b7a6e7d0255a90bc9ddea6ad809f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b0e2dd398cdc738c1160fdc5d7dfdfe9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2721239614a4c90e342b9b2d408d2faf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "35b71fd579d5c793ff1b03e371bf5889" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5c5d38d29b0acd291e50ad52a6d1c0b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bbfb2d82a2e6f2343a6b4f882dc7817e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "36c1f19a42e241a7582f300c03698856" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1e7dbd32fd7a72740bd53c44f780cb94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9ca72a59e0bdc97ebe6e76ac90a07fcc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be34d45fc64b5cd15bd9e9a7af5ae751" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "26b35af87284125eac4370935f1cae53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "64b43d98a6e70ddeeaf85ecbe664137f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4ad19b7e87f2fe20a578df05ebdc5d08" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50c85d89a59f0e662a18d769a32d701b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1555a9cf02a0647027a234489d4c3a3b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b505922f55c7549a5cd9bc7418b3eff0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "78d596487a87429a355af01d707f063a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "41f5018bd8d92abe059ea195cd86e293" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3ede93ae43f9120b9cfb42fc8a236e6c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "21169c33013de14b517f5b0d207bdff9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "db51dab7a55cebf59ee05b27a3144753" |
|
}, |
|
{ |
|
"dataPath": "params_shard_131.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0f6f7b66e6dae6a3cf7a5b962af8fdf4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_132.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7be5146cfcd0c3ddc1729c8282608348" |
|
}, |
|
{ |
|
"dataPath": "params_shard_133.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2ebb4875abcdc885bdc07ec0d3f153b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_134.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "324e96107ac110479f38e4417026ad61" |
|
}, |
|
{ |
|
"dataPath": "params_shard_135.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "745f1ed053764ea3c9b2c6c329634fd2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_136.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3557e15f6a6c2c14f4940bd88cfea555" |
|
}, |
|
{ |
|
"dataPath": "params_shard_137.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c67bbef621b6f64f7a3bd88c350c393c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_138.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c2824727a8188dae8fa2c1ffa66be0fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_139.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e5ac202c8b3b68671a90eba39e66ea59" |
|
}, |
|
{ |
|
"dataPath": "params_shard_140.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0e78bcd62487e483633def8989352265" |
|
}, |
|
{ |
|
"dataPath": "params_shard_141.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "519ab58a54e46ccd9df5297b6ab68530" |
|
}, |
|
{ |
|
"dataPath": "params_shard_142.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "80d76b55cb9afc68d0c782268f9c4853" |
|
}, |
|
{ |
|
"dataPath": "params_shard_143.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b8411a21a31b8243904a0096b7ea085d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_144.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f687efe47d2c9c94521bb8da186a3a7e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_145.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e063668c8f20b4df04dfd076cf005616" |
|
}, |
|
{ |
|
"dataPath": "params_shard_146.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "caf62f716817d2d65fc970b616c59d50" |
|
}, |
|
{ |
|
"dataPath": "params_shard_147.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d79f37d47d4a8c18c8a3a874cad49c4c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_148.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d4feb59d3da5518e2fbb77c36e6a0f2d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_149.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d5c9a3920bbc7c1807970f9261616c7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_150.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "823ac3cdf896c771b176bfcfb6b426a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_151.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bc47fd53042f6a88e2bf4819f8519dae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_152.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e95cb94d22e8d34d3179007777033f8f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_153.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dbe5f811a114ca4b145bbcf3a95f6782" |
|
}, |
|
{ |
|
"dataPath": "params_shard_154.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d5a73e2bb0de0440f301c089d644e37f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_155.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "44b4dbdacccd0429a10858185ae03ccd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_156.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69e0fc6510e17aa5aff0587ccc875d9e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_157.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d27ec2f7d0b741be32253616321db189" |
|
}, |
|
{ |
|
"dataPath": "params_shard_158.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ff4ac4ae711b7f2eaed3ba87e1a28712" |
|
}, |
|
{ |
|
"dataPath": "params_shard_159.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2a51b589cfe0420482d915bf35a045c1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_160.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ad1f42e5549dde839cb2fbc49354b9ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_161.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a94f878615126957fdab6aa1e706f018" |
|
}, |
|
{ |
|
"dataPath": "params_shard_162.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c2c5d1a17a051eaf282d64b7e77e1e1d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_163.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "82d4fe1b4d220eca8fdc7fb84409dafc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_164.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0cdb673e8e0855e7dc524eb1394ba5a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_165.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "07f1e0b89b695581015f5054f62112c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_166.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8ab4e894c5d77d9394c8cc7e18364920" |
|
}, |
|
{ |
|
"dataPath": "params_shard_167.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f9d800f33559a1e29f6cc2c9c5b60d65" |
|
}, |
|
{ |
|
"dataPath": "params_shard_168.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e841b26221b888eded39cc53c3a28865" |
|
}, |
|
{ |
|
"dataPath": "params_shard_169.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "22c2fa81c7b29182a3d2b4ffccf9aec4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_170.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "275abe0de446cbd516e3e263575e1dcc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_171.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1e4ecce432003ef2bebcbb977b0f277a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_172.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "12c24a4d92a51dcc9d9e78ac1611348f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_173.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4fe297e23fda3862f72b4f45087be2a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_174.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.48.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5cd66a79405ced4a89048363fcedd6bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_175.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.48.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1800434d5a26bce8b01963d99261565a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_176.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.48.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3641d809f4fd47b7ae3bfa0f25e713a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_177.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.48.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69f1b9662e69187bdc39044c2bc203fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_178.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.49.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1e23bda65762e9cfe6492ea01e813cee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_179.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.49.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1d2bee332580329fae9e300910dd9708" |
|
}, |
|
{ |
|
"dataPath": "params_shard_180.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.49.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00128846f650b744f22e7744288733e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_181.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.49.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dfd86ae200f9fc446aaadc28c7911a3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_182.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fcfeeb1548d83d7e2d17697b8b659b1a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_183.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "05fab68d29f0fdee6a56699521b3617e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_184.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e5cb71bdba6d6093971e97b52f8ba0d2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_185.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3574045c9aa1a00a34708c0830e7de01" |
|
}, |
|
{ |
|
"dataPath": "params_shard_186.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.50.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "475a08d6e33f52a053bf3a1b42189536" |
|
}, |
|
{ |
|
"dataPath": "params_shard_187.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.50.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fa0e4379489c9fe223523e895a0f79fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_188.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.50.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e1ca6074c8cf3177f55b8a4de304f4bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_189.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.50.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "87e58dba5fca1f5e58f0378ffcd161cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_190.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.51.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dcd8c1bcfda7c593465d64f44edc6a7f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_191.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.51.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ade96607de7d4bd52999755f561230b9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_192.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.51.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d2ac24be766e1ece815e9bb238ad8cfc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_193.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.51.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3baae47732e4e9563250087b5f7cbb5e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_194.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.52.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e8181b891ff94e435107c3916055c2a0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_195.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.52.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fec7cf02b5f6477cc160fc3cc9a7de0d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_196.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.52.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8986f1e88891868226a333c6b744b2c1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_197.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.52.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d957b32ce150381cbfb56fd55f70c59b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_198.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.53.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b96692e4347a567b11c14eac8ed19fb3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_199.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.53.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "df77a4cee48601742ede70136221453a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_200.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.53.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "769dc296ff676fc2c56254801ad24c20" |
|
}, |
|
{ |
|
"dataPath": "params_shard_201.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.53.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7f09042a3dbf12404521dc92f48f906c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_202.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.54.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2aa5d3acffe5895ee1efd44ecd7babda" |
|
}, |
|
{ |
|
"dataPath": "params_shard_203.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.54.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "caaa423a2fc3ac4401af96bd6dafdfe8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_204.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.54.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5fdfdbc4c5e06274eb63c67eb2329736" |
|
}, |
|
{ |
|
"dataPath": "params_shard_205.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.54.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "753787616c5133e4ea2045434da4e3b2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_206.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.55.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "806ed2d8dca63922240eaf6b6ada1a29" |
|
}, |
|
{ |
|
"dataPath": "params_shard_207.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.55.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "95168a86cb267f7d964e0505725f8f50" |
|
}, |
|
{ |
|
"dataPath": "params_shard_208.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.55.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "893b9912c15e503e33a39985c42ad21c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_209.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.55.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d0a8fe9970b1b6216645654a358fb763" |
|
}, |
|
{ |
|
"dataPath": "params_shard_210.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "759949f8a6467da870e39b8ca7c5aefe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_211.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "71ed954e7a8e30ee6bae4d887a191850" |
|
}, |
|
{ |
|
"dataPath": "params_shard_212.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f3ac14aae8a52ee9b52ed3c784063f7f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_213.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "74db162c0eb2508db7a2a28cd085f7d2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_214.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1330dfaa0257dfa9e037ac5dd5979d19" |
|
}, |
|
{ |
|
"dataPath": "params_shard_215.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d94a36807ca7ed8876f3e169764da6d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_216.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "77a4634e44d3b0fec10f60e260075ea0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_217.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a2658bd47fedf3606e9d94a2ee8a98c0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_218.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b34e75085b3490427c7fd281377c16df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_219.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8c4c52c3a9f00918ef7d7f5bcadf9eaa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_220.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fdcde06d68bc89a077de959c7b1a15e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_221.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4a6695dfc72d7bdc145621d1bbff277c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_222.bin", |
|
"format": "raw-shard", |
|
"nbytes": 201326592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.weight", |
|
"shape": [ |
|
6144, |
|
16384 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 201326592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "81f6b78a84e87edfc24b6377c3ef2668" |
|
}, |
|
{ |
|
"dataPath": "params_shard_223.bin", |
|
"format": "raw-shard", |
|
"nbytes": 402653184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
32768, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 402653184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "775037907742a6d4a92186e931cb36dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_224.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "53c705e5ebc14909e4088e3707e946bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_225.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.weight", |
|
"shape": [ |
|
6144, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43b66f4c83236b000bdf2e9d332e9270" |
|
}, |
|
{ |
|
"dataPath": "params_shard_226.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1388544, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 12288 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 24576 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 36864 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 49152 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 61440 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 73728 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 86016 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 98304 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 110592 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 122880 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 135168 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 147456 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 159744 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 172032 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 184320 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 196608 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 208896 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 221184 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 233472 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 245760 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 258048 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 270336 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 282624 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 294912 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 307200 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 319488 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 331776 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 344064 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 356352 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 368640 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 380928 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 393216 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 405504 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 417792 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 430080 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 442368 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 454656 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 466944 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 479232 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 491520 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 503808 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 516096 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 528384 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 540672 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 552960 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 565248 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 577536 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 589824 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 602112 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 614400 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 626688 |
|
}, |
|
{ |
|
"name": "model.layers.32.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 638976 |
|
}, |
|
{ |
|
"name": "model.layers.32.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 651264 |
|
}, |
|
{ |
|
"name": "model.layers.33.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 663552 |
|
}, |
|
{ |
|
"name": "model.layers.33.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 675840 |
|
}, |
|
{ |
|
"name": "model.layers.34.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 688128 |
|
}, |
|
{ |
|
"name": "model.layers.34.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 700416 |
|
}, |
|
{ |
|
"name": "model.layers.35.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 712704 |
|
}, |
|
{ |
|
"name": "model.layers.35.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 724992 |
|
}, |
|
{ |
|
"name": "model.layers.36.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 737280 |
|
}, |
|
{ |
|
"name": "model.layers.36.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 749568 |
|
}, |
|
{ |
|
"name": "model.layers.37.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 761856 |
|
}, |
|
{ |
|
"name": "model.layers.37.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 774144 |
|
}, |
|
{ |
|
"name": "model.layers.38.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 786432 |
|
}, |
|
{ |
|
"name": "model.layers.38.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 798720 |
|
}, |
|
{ |
|
"name": "model.layers.39.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 811008 |
|
}, |
|
{ |
|
"name": "model.layers.39.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 823296 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 835584 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 847872 |
|
}, |
|
{ |
|
"name": "model.layers.40.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 860160 |
|
}, |
|
{ |
|
"name": "model.layers.40.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 872448 |
|
}, |
|
{ |
|
"name": "model.layers.41.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 884736 |
|
}, |
|
{ |
|
"name": "model.layers.41.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 897024 |
|
}, |
|
{ |
|
"name": "model.layers.42.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 909312 |
|
}, |
|
{ |
|
"name": "model.layers.42.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 921600 |
|
}, |
|
{ |
|
"name": "model.layers.43.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 933888 |
|
}, |
|
{ |
|
"name": "model.layers.43.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 946176 |
|
}, |
|
{ |
|
"name": "model.layers.44.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 958464 |
|
}, |
|
{ |
|
"name": "model.layers.44.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 970752 |
|
}, |
|
{ |
|
"name": "model.layers.45.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 983040 |
|
}, |
|
{ |
|
"name": "model.layers.45.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 995328 |
|
}, |
|
{ |
|
"name": "model.layers.46.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1007616 |
|
}, |
|
{ |
|
"name": "model.layers.46.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1019904 |
|
}, |
|
{ |
|
"name": "model.layers.47.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1032192 |
|
}, |
|
{ |
|
"name": "model.layers.47.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1044480 |
|
}, |
|
{ |
|
"name": "model.layers.48.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1056768 |
|
}, |
|
{ |
|
"name": "model.layers.48.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1069056 |
|
}, |
|
{ |
|
"name": "model.layers.49.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1081344 |
|
}, |
|
{ |
|
"name": "model.layers.49.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1093632 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1105920 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1118208 |
|
}, |
|
{ |
|
"name": "model.layers.50.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1130496 |
|
}, |
|
{ |
|
"name": "model.layers.50.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1142784 |
|
}, |
|
{ |
|
"name": "model.layers.51.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1155072 |
|
}, |
|
{ |
|
"name": "model.layers.51.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1167360 |
|
}, |
|
{ |
|
"name": "model.layers.52.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1179648 |
|
}, |
|
{ |
|
"name": "model.layers.52.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1191936 |
|
}, |
|
{ |
|
"name": "model.layers.53.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1204224 |
|
}, |
|
{ |
|
"name": "model.layers.53.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1216512 |
|
}, |
|
{ |
|
"name": "model.layers.54.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1228800 |
|
}, |
|
{ |
|
"name": "model.layers.54.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1241088 |
|
}, |
|
{ |
|
"name": "model.layers.55.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1253376 |
|
}, |
|
{ |
|
"name": "model.layers.55.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1265664 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1277952 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1290240 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1302528 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1314816 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1327104 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1339392 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1351680 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1363968 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 12288, |
|
"byteOffset": 1376256 |
|
} |
|
], |
|
"md5sum": "88c8c3cb1c7c261614ce184141204a42" |
|
} |
|
] |
|
} |