| { | |
| "metadata": { | |
| "ParamSize": 305, | |
| "ParamBytes": 4319821824.0, | |
| "BitsPerParam": 5.000976230824355 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 209715200, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_weight", | |
| "shape": [ | |
| 102400, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 209715200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "606880b7a8dc848647d0f4f96a4b89ea" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b6da9737170d1c0b645180a4de7eddcd" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8492c07d2f5760fbc9219bdd3b678b50" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29040640, | |
| "records": [ | |
| { | |
| "name": "lm_head.q_scale", | |
| "shape": [ | |
| 102400, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 26214400, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 26214400 | |
| }, | |
| { | |
| "name": "model.layers.22.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 26222592 | |
| } | |
| ], | |
| "md5sum": "a1e27e3b38f9d5dbefa1a60a33844a7b" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "882786af5e221402ac6a93cfbe558d18" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31014912, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 5644288 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 5652480 | |
| }, | |
| { | |
| "name": "model.layers.23.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 28196864 | |
| } | |
| ], | |
| "md5sum": "4b2506de5078dd4f8676e493bd57dde5" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 30810112, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 5636096 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 5644288 | |
| } | |
| ], | |
| "md5sum": "2016cd57aa151424f83b8a11866639b5" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1676066fb9d039225b9db1a7025a435b" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ed1f5c885371df0971b2d2eb1290b5a8" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "45266bce7406124aa81a10a348fde366" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32587776, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 3145728 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 11534336 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 12591104 | |
| }, | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 15409152 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21045248 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 24199168 | |
| } | |
| ], | |
| "md5sum": "a088038b26d0b22649806cc1d028fdf9" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d67505ab892df80fa16122767fd50761" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "da71da4dbe03b54fe30fd3cd21d3e8f9" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32063488, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 1056768 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 23601152 | |
| }, | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 26419200 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32055296 | |
| } | |
| ], | |
| "md5sum": "f49e54b4bb128fc91b502800f3fe2e5c" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "52bb43a590afc5a108144c97e1545541" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "895d44bf500994768e69cc4acb0fe917" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5e2ccbcac2ce04eac264d6a85fa38ba6" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32587776, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 3145728 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 11534336 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 12591104 | |
| }, | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 15409152 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21045248 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 24199168 | |
| } | |
| ], | |
| "md5sum": "bad478a89ded29503b7ad5247596bab1" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6c78425185d2ed4231feb8fa7d0ac144" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ccbde98336037080f6518ea492561a6f" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32063488, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 1056768 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 23601152 | |
| }, | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 26419200 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32055296 | |
| } | |
| ], | |
| "md5sum": "845793104fed7cbf95ee1534a2d21659" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "20d73c91a3d72233d81982ce621a72c3" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "97661861b9330740351d75485aabdce9" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b4f341001044c445a5366ba03e97dbd1" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32587776, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 3145728 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 11534336 | |
| }, | |
| { | |
| "name": "model.layers.28.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 12591104 | |
| }, | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 15409152 | |
| }, | |
| { | |
| "name": "model.layers.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21045248 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 24199168 | |
| } | |
| ], | |
| "md5sum": "1982a151e5efe66f797dffdcb665e8bd" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fa27b88003a19faf081a3c9565bd1b77" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "52865cb1714cf5630b995a5583ea2df0" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32063488, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.29.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 1056768 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 23601152 | |
| }, | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 26419200 | |
| }, | |
| { | |
| "name": "model.layers.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32055296 | |
| } | |
| ], | |
| "md5sum": "20b2f49b7fa06db00725aa97c436b952" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 209715200, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_weight", | |
| "shape": [ | |
| 102400, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 209715200, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cb7f2f3c841f0ee8657397755d0dbdd3" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 26214400, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.q_scale", | |
| "shape": [ | |
| 102400, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 26214400, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "247d530128a3acc07316362bf878ef5b" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "538bfee51b0cd51ad8c57cef968c1b81" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8de562a563fd2c319e18751f54a9a258" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1fa5bff53cc592bb798d1075ead4c32b" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32595968, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 3145728 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 11534336 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12591104 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 12599296 | |
| }, | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 15417344 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 21061632 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 24207360 | |
| } | |
| ], | |
| "md5sum": "e5647ccc8edffb18918b73b175d14772" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "61802bfcda32eb42d51fae14f9d8d6fb" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f2fc8896ab3e23fc577de0d2a5daf98f" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32063488, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 1056768 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 23601152 | |
| }, | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 26419200 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32055296 | |
| } | |
| ], | |
| "md5sum": "4c543de50f9e1ad35153c1ba5a66e058" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b9818960ffc4bd9a71205e649bf97e89" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9b8747e93fbccd762a88ad4e768a8f40" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "df63e726070f154a4fd009e4a252587b" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32587776, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 3145728 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 11534336 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 12591104 | |
| }, | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 15409152 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21045248 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 24199168 | |
| } | |
| ], | |
| "md5sum": "18052ca573905c87811a83b199e457e2" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f005300edf1ab989f6e9e36de6bdda70" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "840c3be3ac1b54b4af55ca53b2a8a002" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32063488, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 1056768 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 23601152 | |
| }, | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 26419200 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32055296 | |
| } | |
| ], | |
| "md5sum": "0388837eef43c12f07f59b200704206d" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6ab677658eb4afa9234655d6261cfdba" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3033c3ef797fc34862a4f5d1113748ab" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a75291043a554d9f9c32475111cfbe14" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32587776, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 3145728 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 11534336 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 12591104 | |
| }, | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 15409152 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21045248 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 24199168 | |
| } | |
| ], | |
| "md5sum": "57fda20ee30ec1e11afa4858a8f9bc0d" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5754cc13d26ce5bd7cd6bbc429dcc37d" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ee3e18ecc01bbd548beb64d76bc2fd1d" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32063488, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 1056768 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 23601152 | |
| }, | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 26419200 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32055296 | |
| } | |
| ], | |
| "md5sum": "8ef26c4416be17c6da3b37f59eb43a4e" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "325ab20541b280b55ff2037492d905ed" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2074661de6b0194cd2f24cef5a3edc7f" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1873dd4562e9ed7c493e77effac3ad80" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32587776, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 3145728 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 11534336 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 12591104 | |
| }, | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 15409152 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21045248 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 24199168 | |
| } | |
| ], | |
| "md5sum": "a522d3059f4f9305f7f3b82a0f8474c2" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bfc566d2e359a21811584069fdb6dfd6" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4b56610368dd9a1efa9179a2e6b8fea1" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32063488, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 1056768 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 23601152 | |
| }, | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 26419200 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32055296 | |
| } | |
| ], | |
| "md5sum": "46d6389203559e24c81a891ce53f13d2" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4e30fe4386b207489f8af6b9e44c1381" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "df0953c545bebe4c332c05b9e87918ee" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a49caeb6e52d4f009a05a386d275b189" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32587776, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 3145728 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 11534336 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 12591104 | |
| }, | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 15409152 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21045248 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 24199168 | |
| } | |
| ], | |
| "md5sum": "7c22a6609bd775f754302364776bac3b" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "96ed8151fb856a871150dfb828ce66ca" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8f3cbfc6d5255e7ae1c468b71149e870" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32063488, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 1056768 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 23601152 | |
| }, | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 26419200 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32055296 | |
| } | |
| ], | |
| "md5sum": "c7bc07ccbcef367393c389e32b0f237d" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0806c61004ab81f16f2a0bba1264d6ba" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bcf10dd970bf297fc8442125fdc6c12b" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "81e693dffdc97f72728d8cc0c0bfb90b" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32587776, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 3145728 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 11534336 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 12591104 | |
| }, | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 15409152 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21045248 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 24199168 | |
| } | |
| ], | |
| "md5sum": "cc6681044aeb3c1339b59890e1b39206" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ef9dce04f7a42cdb15b7f3332f3cca46" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e2deca87c61d92102516851c4fe9bf68" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32063488, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 1056768 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 23601152 | |
| }, | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 26419200 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32055296 | |
| } | |
| ], | |
| "md5sum": "a1a4c25aaca07739b86d44a661b0f199" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "87b45b61bc6024979fced00e910e4ace" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "632bf70a29517427aae921f5f6f46fbc" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "02d5b26cb847690b7d0b3d775d276279" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32587776, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 3145728 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 11534336 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 12591104 | |
| }, | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 15409152 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21045248 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 24199168 | |
| } | |
| ], | |
| "md5sum": "bc8ddd14b1ff9303d6393d7c979d2e53" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4d8c8a3584c50927613034b8f8cea42d" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6bc8e84046d80beab429a1cf7c6edfc4" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32063488, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 1056768 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 23601152 | |
| }, | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 26419200 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 32055296 | |
| } | |
| ], | |
| "md5sum": "4b23d8cfa463acb0553955d5347eb0ea" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e25872cd1502cc268068612bf3e810a6" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cb772f1c323ca7f4ada0327f7b8f0151" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "51ee6a441d70c5438780aa43057a3423" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32587776, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 3145728 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 11534336 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 12582912 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 12591104 | |
| }, | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 15409152 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21045248 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 24199168 | |
| } | |
| ], | |
| "md5sum": "cc223878af98d797a73cb50991a85d65" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 29360128, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 1048576 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 26214400 | |
| } | |
| ], | |
| "md5sum": "027462be1b2294837b184dd763415a34" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31989760, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 9445376 | |
| } | |
| ], | |
| "md5sum": "5a8dad096acda11755fd1043218f08b0" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "206813d374e06f6bb22562b704ee8827" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e5865516f88c52b7e7e5c3f90cb1eef6" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "24e60c84726af450b6c80164d70858da" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "48a911cd98a3967ffba6773f7f2f2ed8" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d7cfb15ba54a4117678cca83caedd157" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32661504, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 2818048 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8454144 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 8462336 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 11608064 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 19996672 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21045248 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 23871488 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 29507584 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 29515776 | |
| } | |
| ], | |
| "md5sum": "e68f7cb78dec00e6ca96693eed60d900" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31989760, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 9445376 | |
| } | |
| ], | |
| "md5sum": "133da966c46812b369e7c3881543c5fc" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "73a90e1bc10515347f4cbdd8085ed61e" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fc4a8187566cafcda9190d00f1bf5946" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "790fe54ef8c522f5eaaba8b6ba8073eb" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a57f0e2b9da6daebd9899f604beaae85" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b6279ae29c6ade77a8621f8caffa756c" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32661504, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 2818048 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8454144 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 8462336 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 11608064 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 19996672 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21045248 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 23871488 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 29507584 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 29515776 | |
| } | |
| ], | |
| "md5sum": "1ff9166596f291d0a0d455f320801ccb" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31989760, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 9445376 | |
| } | |
| ], | |
| "md5sum": "9e572a685c864ab28a96e6b093f3b280" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5798be8e470dae8d8b6d2913fd518fa0" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "33149eac79b8edc244cf49321a5ad60d" | |
| }, | |
| { | |
| "dataPath": "params_shard_101.bin", | |
| "format": "raw-shard", | |
| "nbytes": 22544384, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5e3ed348453f0e7739f1a0ee0a723ab1" | |
| }, | |
| { | |
| "dataPath": "params_shard_102.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1e5657f2e0aa96e009efef532227ae0e" | |
| }, | |
| { | |
| "dataPath": "params_shard_103.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f617da8d732289a198f2b8d2e46f2d88" | |
| }, | |
| { | |
| "dataPath": "params_shard_104.bin", | |
| "format": "raw-shard", | |
| "nbytes": 32661504, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 2818048 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8454144 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 8462336 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 11608064 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 19996672 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 21045248 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 21053440 | |
| }, | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 23871488 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 29507584 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 29515776 | |
| } | |
| ], | |
| "md5sum": "0e813d19b44087e77c8f7994edf91632" | |
| }, | |
| { | |
| "dataPath": "params_shard_105.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31989760, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 8388608 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 9437184 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 1376 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 22544384, | |
| "byteOffset": 9445376 | |
| } | |
| ], | |
| "md5sum": "03da8c7e5307360c72ec479b33fd170c" | |
| }, | |
| { | |
| "dataPath": "params_shard_106.bin", | |
| "format": "raw-shard", | |
| "nbytes": 45088768, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_weight", | |
| "shape": [ | |
| 22016, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 45088768, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2baa643cd4b5af6ce825f412ceb5e171" | |
| }, | |
| { | |
| "dataPath": "params_shard_107.bin", | |
| "format": "raw-shard", | |
| "nbytes": 25165824, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_weight", | |
| "shape": [ | |
| 12288, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 25165824, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d279f693f9548f33dfdb6896de219857" | |
| }, | |
| { | |
| "dataPath": "params_shard_108.bin", | |
| "format": "raw-shard", | |
| "nbytes": 21045248, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 344 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 2818048, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.q_scale", | |
| "shape": [ | |
| 22016, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5636096, | |
| "byteOffset": 2818048 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 4096 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8192, | |
| "byteOffset": 8454144 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.qkv_proj.q_scale", | |
| "shape": [ | |
| 12288, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 3145728, | |
| "byteOffset": 8462336 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_weight", | |
| "shape": [ | |
| 4096, | |
| 512 | |
| ], | |
| "dtype": "uint32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 8388608, | |
| "byteOffset": 11608064 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.q_scale", | |
| "shape": [ | |
| 4096, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "format": "f32-to-bf16", | |
| "nbytes": 1048576, | |
| "byteOffset": 19996672 | |
| } | |
| ], | |
| "md5sum": "15ce253834790194fc711813c6711d45" | |
| } | |
| ] | |
| } |