|
{ |
|
"metadata": { |
|
"ParamSize": 226, |
|
"ParamBytes": 3441149952.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 622329856, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.weight", |
|
"shape": [ |
|
151936, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 622329856, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "edecbcfbf260d28509ad58884d2905b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "337bdef2a834f49f9d803f2ed1f728f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 16777216, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c936e8f95ec91899a6e1c9ebed09b80c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174272, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 4096 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "e95ac34ed942865f40f59325c504c6fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "07149f1ec1558130e6358f469770d763" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d640bcf8d13e069f072fd455f61c4522" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "62d216a3e9728f58aa178bd74dc01c3b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "16806ad0a2a334136d4b036811dc15eb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e19614d2ecb2f1758ceb765c1caa1707" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "1157de38fca31b3d46c232776f8ab3ee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "19739fbb872c88ef9a4fd521b3dd69a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a30965425be3f71e8a49f0c91fe705ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "bf9f1de554fcceb59330989c6e834edb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "78155df1542c80dc1908969e265c4b28" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3beec9d4a5d0aed3b8f40617daa280f0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "556ab1f7974347fbb7f6b9572ff9c35e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c68764067e0a5ca97c71d8d02f1583bb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "18f98642162ae3212795fec8beb8edb0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "2db3054cdc12939da45b1725daaaa57a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8066abf6f19c7b7c8bc02fb644d8c982" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8c26cd7f0ccf64ec4c3ff88244b7e033" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "566944ca5a07c48a05b55ddeae35cff6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "703f8c4ae3508299b04a35114fcafe7e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "811b3e73f01960f310d7b62af12fabbe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "5cd675b821cb65641c88962e121f9f71" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "24a4815ad6a6c93cd9ffdff8aacd545f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "84f0289e6d9b892393c1ca5435bb30e5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "2c0fbac2b979d4a249346cfc8494d603" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8053ce303b421e30ff8228554b3a1ab9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "327b91c0b483ea818d366e7599460bea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "b1d47223b72214d3c89d2877d3b32caa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b73208055c98f5ee7ba500085bfed45e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f731adbc9a9c496403feaab010aa7e44" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "832b7c7eabc23d5ffb2926aaf6843971" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8ac84e27d82e2c279d4a3c516f095b8d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "27d6cea7d0dbd3bbf8dcd46f1fb75ff4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "ea95789a8ab23ca8777402b7f0d30b86" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4ccd25b8a2e3e3d598530fe519235969" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "57aed186f559a47c5c620091426261f8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "a35be3e2caa06e82b201932518ec5309" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "07561983222c58ac2043ef376e1a3a7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "61090cb6fce40a5497668731e4390c69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "abc040a957cf603f5d89b8dfd077d3b9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7e1ff18ff036ca5e4d23d4702d7a6e63" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5a600eb1e7e13cfd6297a325ee75d399" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "b5bfe1177983a068cd4bd4ddb54657cf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1cedac8ce90dc03f72ce0992a48621c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4beef29c9b8cee57009f2317a1c32d94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "ad282a989b74d3556f053dbafc9735c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7bb4632e80de69b09e5ae5e86f50ee7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "089b1cfd55c7858ea8a78ff331fd1bbd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "66e13c3dbc18342d7ffc6e3e5efbde6f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7ee0849f9d70f6c2916d85b3f4c2d44e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d47947c0610a4503709fed6eb9f0085" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "2a6b7bff7b776714270b12181a2f21ce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "edb61ae808391d3be9c013eb345c3b8a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd81f39baa964eb524b4ba50de6cebda" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "7b66df8156bba7fb9c4cf1b69b561291" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7ea2da53c2c4c23ce9a07d1bcce6934c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "017feefcdb137e9ec4f2a35b3391d8cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "f82239b9d1f71f25d29c15d28575732c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5228891c328e6f10580c33eeb0fd3a09" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b621556096685251003535288a70af6f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "ba990f3459ed519cf6234bb7d1c2c56c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f8c4373f9b0e86c95e72caaeb9261f6f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7584dac798048519015dc007fbe46e0c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "bf2ed67f5791b3da84b4f200ec5d7539" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "44f5c40690cd3b9e3e58e59af2c0b42c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "57eb68f17a3e67c75075012c44d5d03e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "cbe4c9c10e0d44317d07cd1e770d8d8e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "61b2ce8d10305ad3e323580347db1fb5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5fb9893479fb03e4397cc1fb83362ea6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "a1e79894645a17b26b7e84305511764b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "58293d194624479f12a430b6dfb01a70" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be5e1544692a25e5a23a6b3432af0f8d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "af2516b2ce12b3c41e2184d14ddb82cb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "23fc7be894b838e2e7dcec596bcf9366" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "90922fbbb426f8f410087bf32409e9bd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "0e6b59bfdc8d83853b8c6f5cb75880cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "87e101f533f008fdcf814028bb2ee8f5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "05a4aedb76fad783ef93f23d336a3a12" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "8b1e2e9580975f5931e128cd4a597a8c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
6144 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "16b3ef90ac276e8668484ab3bd8ef5d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
12288, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be4438c4ba062b7bf55163dbfede1524" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174528, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8392960 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.k_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8397056 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8397312 |
|
} |
|
], |
|
"md5sum": "5745a51b3d70ae50620d7d74619e0b32" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 8392960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.q_norm.weight", |
|
"shape": [ |
|
128 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 256, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8388864 |
|
} |
|
], |
|
"md5sum": "e98e2cce7e63600d37e2927a15af6541" |
|
} |
|
] |
|
} |