|
{ |
|
"metadata": { |
|
"ParamSize": 392, |
|
"ParamBytes": 690311168.0, |
|
"BitsPerParam": 4.505168401162159 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50462720, |
|
"records": [ |
|
{ |
|
"name": "transformer.wte.q_weight", |
|
"shape": [ |
|
49280, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50462720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "37752cf9591772a22574b2b1fcb29a93" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32608768, |
|
"records": [ |
|
{ |
|
"name": "transformer.wte.q_scale", |
|
"shape": [ |
|
49280, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6307840, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.wpe.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 6307840 |
|
}, |
|
{ |
|
"name": "transformer.wpe.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 8404992 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8667136 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 8671232 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 8675328 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 11034624 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 11329536 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 11334144 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 13431296 |
|
}, |
|
{ |
|
"name": "transformer.h.0.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 13693440 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 13697536 |
|
}, |
|
{ |
|
"name": "transformer.h.0.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 13701632 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 13705728 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 22094336 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23142912 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 23159296 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 31547904 |
|
}, |
|
{ |
|
"name": "transformer.h.0.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 32596480 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 32600576 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 32604672 |
|
} |
|
], |
|
"md5sum": "e15870fe91891b563bec51bcddbefd35" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28963840, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.1.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 2359296 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 2654208 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 2658816 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 4755968 |
|
}, |
|
{ |
|
"name": "transformer.h.1.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5018112 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5022208 |
|
}, |
|
{ |
|
"name": "transformer.h.1.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5026304 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 5030400 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 13419008 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 14467584 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 14483968 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 22872576 |
|
}, |
|
{ |
|
"name": "transformer.h.1.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23921152 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23925248 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23929344 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 26292736 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 26587648 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26592256 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28689408 |
|
}, |
|
{ |
|
"name": "transformer.h.2.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28951552 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28955648 |
|
}, |
|
{ |
|
"name": "transformer.h.2.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28959744 |
|
} |
|
], |
|
"md5sum": "142d0a395f60dfe2c7a26f32f144f05f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33387008, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.2.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 17842176 |
|
}, |
|
{ |
|
"name": "transformer.h.2.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18890752 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18894848 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18903040 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 21262336 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 21557248 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 21561856 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 23659008 |
|
}, |
|
{ |
|
"name": "transformer.h.3.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23921152 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23925248 |
|
}, |
|
{ |
|
"name": "transformer.h.3.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23929344 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 32322048 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 33370624 |
|
} |
|
], |
|
"md5sum": "cf13d7f71dd1bc461d13faca89277ef3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33382912, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.3.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.3.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9441280 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9445376 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 9449472 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 11808768 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 12103680 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 12108288 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 14205440 |
|
}, |
|
{ |
|
"name": "transformer.h.4.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14467584 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14471680 |
|
}, |
|
{ |
|
"name": "transformer.h.4.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14475776 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 14479872 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 22868480 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23917056 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 32322048 |
|
}, |
|
{ |
|
"name": "transformer.h.4.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33370624 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33374720 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33378816 |
|
} |
|
], |
|
"md5sum": "c5ba04382fe711770f84107be85a31c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28963840, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.5.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 2359296 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 2654208 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 2658816 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 4755968 |
|
}, |
|
{ |
|
"name": "transformer.h.5.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5018112 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5022208 |
|
}, |
|
{ |
|
"name": "transformer.h.5.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5026304 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 5030400 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 13419008 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 14467584 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 14483968 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 22872576 |
|
}, |
|
{ |
|
"name": "transformer.h.5.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23921152 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23925248 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23929344 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 26292736 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 26587648 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26592256 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28689408 |
|
}, |
|
{ |
|
"name": "transformer.h.6.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28951552 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28955648 |
|
}, |
|
{ |
|
"name": "transformer.h.6.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28959744 |
|
} |
|
], |
|
"md5sum": "3252c3f5c0fd31bbeb3cab51b7b712e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33387008, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.6.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 17842176 |
|
}, |
|
{ |
|
"name": "transformer.h.6.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18890752 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18894848 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18903040 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 21262336 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 21557248 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 21561856 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 23659008 |
|
}, |
|
{ |
|
"name": "transformer.h.7.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23921152 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23925248 |
|
}, |
|
{ |
|
"name": "transformer.h.7.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23929344 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 32322048 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 33370624 |
|
} |
|
], |
|
"md5sum": "25177ac9ee7018d30a389c8f71591565" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33382912, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.7.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.7.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9441280 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9445376 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 9449472 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 11808768 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 12103680 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 12108288 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 14205440 |
|
}, |
|
{ |
|
"name": "transformer.h.8.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14467584 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14471680 |
|
}, |
|
{ |
|
"name": "transformer.h.8.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14475776 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 14479872 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 22868480 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23917056 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 32322048 |
|
}, |
|
{ |
|
"name": "transformer.h.8.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33370624 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33374720 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33378816 |
|
} |
|
], |
|
"md5sum": "c47f82d20e857ad7aff61bf2dab8e61c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28963840, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.9.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 2359296 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 2654208 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 2658816 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 4755968 |
|
}, |
|
{ |
|
"name": "transformer.h.9.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5018112 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5022208 |
|
}, |
|
{ |
|
"name": "transformer.h.9.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5026304 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 5030400 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 13419008 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 14467584 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 14483968 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 22872576 |
|
}, |
|
{ |
|
"name": "transformer.h.9.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23921152 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23925248 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23929344 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 26292736 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 26587648 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26592256 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28689408 |
|
}, |
|
{ |
|
"name": "transformer.h.10.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28951552 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28955648 |
|
}, |
|
{ |
|
"name": "transformer.h.10.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28959744 |
|
} |
|
], |
|
"md5sum": "00806b1f373302a9d68949200bdd1c42" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33387008, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.10.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 17842176 |
|
}, |
|
{ |
|
"name": "transformer.h.10.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18890752 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18894848 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18903040 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 21262336 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 21557248 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 21561856 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 23659008 |
|
}, |
|
{ |
|
"name": "transformer.h.11.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23921152 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23925248 |
|
}, |
|
{ |
|
"name": "transformer.h.11.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23929344 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 32322048 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 33370624 |
|
} |
|
], |
|
"md5sum": "968bad8365fd4a556283fdf34875974d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33382912, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.11.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.11.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9441280 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9445376 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 9449472 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 11808768 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 12103680 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 12108288 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 14205440 |
|
}, |
|
{ |
|
"name": "transformer.h.12.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14467584 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14471680 |
|
}, |
|
{ |
|
"name": "transformer.h.12.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14475776 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 14479872 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 22868480 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23917056 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 32322048 |
|
}, |
|
{ |
|
"name": "transformer.h.12.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33370624 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33374720 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33378816 |
|
} |
|
], |
|
"md5sum": "be7dcf200906df6f86a9bfc3cb55f0b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28963840, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.13.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 2359296 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 2654208 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 2658816 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 4755968 |
|
}, |
|
{ |
|
"name": "transformer.h.13.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5018112 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5022208 |
|
}, |
|
{ |
|
"name": "transformer.h.13.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5026304 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 5030400 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 13419008 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 14467584 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 14483968 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 22872576 |
|
}, |
|
{ |
|
"name": "transformer.h.13.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23921152 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23925248 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23929344 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 26292736 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 26587648 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26592256 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28689408 |
|
}, |
|
{ |
|
"name": "transformer.h.14.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28951552 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28955648 |
|
}, |
|
{ |
|
"name": "transformer.h.14.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28959744 |
|
} |
|
], |
|
"md5sum": "e82e9c4fda7cfa51ff997e46661bacdb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33387008, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.14.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 17842176 |
|
}, |
|
{ |
|
"name": "transformer.h.14.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18890752 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18894848 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18903040 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 21262336 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 21557248 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 21561856 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 23659008 |
|
}, |
|
{ |
|
"name": "transformer.h.15.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23921152 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23925248 |
|
}, |
|
{ |
|
"name": "transformer.h.15.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23929344 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 32322048 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 33370624 |
|
} |
|
], |
|
"md5sum": "f2fe56417ba5feb375bfbff3bb330777" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33382912, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.15.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.15.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9441280 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9445376 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 9449472 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 11808768 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 12103680 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 12108288 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 14205440 |
|
}, |
|
{ |
|
"name": "transformer.h.16.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14467584 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14471680 |
|
}, |
|
{ |
|
"name": "transformer.h.16.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14475776 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 14479872 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 22868480 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23917056 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 32322048 |
|
}, |
|
{ |
|
"name": "transformer.h.16.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33370624 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33374720 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33378816 |
|
} |
|
], |
|
"md5sum": "a52393c9d9025bf27ea12a597ca0ff04" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28963840, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.17.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 2359296 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 2654208 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 2658816 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 4755968 |
|
}, |
|
{ |
|
"name": "transformer.h.17.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5018112 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5022208 |
|
}, |
|
{ |
|
"name": "transformer.h.17.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5026304 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 5030400 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 13419008 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 14467584 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 14483968 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 22872576 |
|
}, |
|
{ |
|
"name": "transformer.h.17.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23921152 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23925248 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23929344 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 26292736 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 26587648 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26592256 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28689408 |
|
}, |
|
{ |
|
"name": "transformer.h.18.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28951552 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28955648 |
|
}, |
|
{ |
|
"name": "transformer.h.18.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28959744 |
|
} |
|
], |
|
"md5sum": "fd15a3c071525656f07424171cbb46e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33387008, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.18.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 17842176 |
|
}, |
|
{ |
|
"name": "transformer.h.18.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18890752 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18894848 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18903040 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 21262336 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 21557248 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 21561856 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 23659008 |
|
}, |
|
{ |
|
"name": "transformer.h.19.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23921152 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23925248 |
|
}, |
|
{ |
|
"name": "transformer.h.19.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23929344 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 32322048 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 33370624 |
|
} |
|
], |
|
"md5sum": "e333e89d3d0e04646d09a103c11d9725" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33382912, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.19.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.19.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9441280 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9445376 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 9449472 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 11808768 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 12103680 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 12108288 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 14205440 |
|
}, |
|
{ |
|
"name": "transformer.h.20.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14467584 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14471680 |
|
}, |
|
{ |
|
"name": "transformer.h.20.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 14475776 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 14479872 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 22868480 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 23917056 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 32322048 |
|
}, |
|
{ |
|
"name": "transformer.h.20.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33370624 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33374720 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 33378816 |
|
} |
|
], |
|
"md5sum": "cf0962fb2fd7bc2c5a89a79692b56d06" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 28963840, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.21.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 2359296 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 2654208 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 2658816 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 4755968 |
|
}, |
|
{ |
|
"name": "transformer.h.21.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5018112 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5022208 |
|
}, |
|
{ |
|
"name": "transformer.h.21.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 5026304 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 5030400 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 13419008 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 14467584 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 14483968 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 22872576 |
|
}, |
|
{ |
|
"name": "transformer.h.21.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23921152 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23925248 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23929344 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 26292736 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 26587648 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 26592256 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 28689408 |
|
}, |
|
{ |
|
"name": "transformer.h.22.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28951552 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28955648 |
|
}, |
|
{ |
|
"name": "transformer.h.22.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 28959744 |
|
} |
|
], |
|
"md5sum": "cecccd0a0634f308c850742bbf1bc8d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33387008, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.22.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 17842176 |
|
}, |
|
{ |
|
"name": "transformer.h.22.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18890752 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_1.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18894848 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_1.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_attn.q_weight", |
|
"shape": [ |
|
2304, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296, |
|
"byteOffset": 18903040 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_attn.q_scale", |
|
"shape": [ |
|
2304, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 294912, |
|
"byteOffset": 21262336 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_attn.bias", |
|
"shape": [ |
|
2304 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4608, |
|
"byteOffset": 21557248 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2097152, |
|
"byteOffset": 21561856 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 262144, |
|
"byteOffset": 23659008 |
|
}, |
|
{ |
|
"name": "transformer.h.23.attn.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23921152 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_2.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23925248 |
|
}, |
|
{ |
|
"name": "transformer.h.23.ln_2.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 23929344 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_fc.q_weight", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 23933440 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_fc.q_scale", |
|
"shape": [ |
|
8192, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 32322048 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_fc.bias", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 33370624 |
|
} |
|
], |
|
"md5sum": "ce80d822f52c4a96a4529a5f45671bc9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50462720, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
49280, |
|
256 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50462720, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "37752cf9591772a22574b2b1fcb29a93" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 15757312, |
|
"records": [ |
|
{ |
|
"name": "transformer.h.23.mlp.c_proj.q_weight", |
|
"shape": [ |
|
2048, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_proj.q_scale", |
|
"shape": [ |
|
2048, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1048576, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "transformer.h.23.mlp.c_proj.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "transformer.ln_f.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9441280 |
|
}, |
|
{ |
|
"name": "transformer.ln_f.bias", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 9445376 |
|
}, |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
49280, |
|
64 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6307840, |
|
"byteOffset": 9449472 |
|
} |
|
], |
|
"md5sum": "c7d5887dd10810c0f22e810413f4c29a" |
|
} |
|
] |
|
} |