Qwen3-0.6B-q4f32_1-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
569279d verified
raw
history blame contribute delete
129 kB
{
"metadata": {
"ParamSize": 339,
"ParamBytes": 372752384.0,
"BitsPerParam": 3.9673903566134796
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 77791232,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
151936,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 77791232,
"byteOffset": 0
}
],
"md5sum": "ca24fb347517c781378a3dc5b3b507e9"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 32740608,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
151936,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9723904,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 9723904
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 9725952
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 11298816
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11495424
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14641152
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 15034368
},
{
"name": "model.layers.0.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 15036416
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 15036672
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 17133824
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 17395968
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 18444544
},
{
"name": "model.layers.0.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 18575616
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 18575872
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 18577920
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 20150784
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 20347392
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 23493120
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 23886336
},
{
"name": "model.layers.1.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23888384
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23888640
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 25985792
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 26247936
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 27296512
},
{
"name": "model.layers.1.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 27427584
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 27427840
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 27429888
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 29002752
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29199360
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32345088
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 32738304
},
{
"name": "model.layers.10.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 32740352
}
],
"md5sum": "1439ec38884c91811f57578b4aab38fc"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 31866624,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 2097152
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 2359296
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 3407872
},
{
"name": "model.layers.10.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3538944
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 3539200
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 3541248
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 5114112
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 5310720
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 8456448
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 8849664
},
{
"name": "model.layers.11.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8851712
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 8851968
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 10949120
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11211264
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 12259840
},
{
"name": "model.layers.11.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 12390912
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 12391168
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12393216
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 13966080
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14162688
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 17308416
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 17701632
},
{
"name": "model.layers.12.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17703680
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 17703936
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19801088
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20063232
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 21111808
},
{
"name": "model.layers.12.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21242880
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 21243136
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 21245184
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 22818048
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23014656
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 26160384
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 26553600
},
{
"name": "model.layers.13.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 26555648
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26555904
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28653056
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28915200
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 29963776
},
{
"name": "model.layers.13.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 30094848
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 30095104
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 30097152
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 31670016
}
],
"md5sum": "24de7d3fdd183edf1b3798257ea58c6a"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33505024,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 3145728
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 3538944
},
{
"name": "model.layers.14.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3540992
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 3541248
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 5638400
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 5900544
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 6949120
},
{
"name": "model.layers.14.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 7080192
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 7080448
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 7082496
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 8655360
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8851968
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 11997696
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 12390912
},
{
"name": "model.layers.15.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 12392960
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 12393216
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 14490368
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 14752512
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 15801088
},
{
"name": "model.layers.15.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 15932160
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 15932416
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15934464
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 17507328
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 17703936
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 20849664
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 21242880
},
{
"name": "model.layers.16.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21244928
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 21245184
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 23342336
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 23604480
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 24653056
},
{
"name": "model.layers.16.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24784128
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 24784384
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 24786432
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 26359296
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 26555904
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 29701632
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 30094848
},
{
"name": "model.layers.17.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 30096896
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 30097152
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 32194304
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 32456448
}
],
"md5sum": "5172c9bc03927201f9985de4a9c1046b"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 32000000,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 131072
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 131328
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 133376
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 1706240
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 1902848
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 5048576
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 5441792
},
{
"name": "model.layers.18.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 5443840
},
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5444096
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7541248
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 7803392
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 8851968
},
{
"name": "model.layers.18.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8983040
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 8983296
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 8985344
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 10558208
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 10754816
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 13900544
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 14293760
},
{
"name": "model.layers.19.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 14295808
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14296064
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 16393216
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 16655360
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 17703936
},
{
"name": "model.layers.19.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17835008
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 17835264
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 17837312
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 19410176
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 19606784
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22752512
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 23145728
},
{
"name": "model.layers.2.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23147776
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23148032
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 25245184
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 25507328
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 26555904
},
{
"name": "model.layers.2.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 26686976
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 26687232
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 26689280
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 28262144
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 28458752
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 31604480
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 31997696
},
{
"name": "model.layers.20.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31999744
}
],
"md5sum": "dec8fa27fe3f44a1ca9358902917242a"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 31866624,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 2097152
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 2359296
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 3407872
},
{
"name": "model.layers.20.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3538944
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 3539200
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 3541248
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 5114112
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 5310720
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 8456448
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 8849664
},
{
"name": "model.layers.21.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8851712
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 8851968
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 10949120
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11211264
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 12259840
},
{
"name": "model.layers.21.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 12390912
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 12391168
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12393216
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 13966080
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14162688
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 17308416
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 17701632
},
{
"name": "model.layers.22.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17703680
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 17703936
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19801088
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20063232
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 21111808
},
{
"name": "model.layers.22.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21242880
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 21243136
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 21245184
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 22818048
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23014656
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 26160384
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 26553600
},
{
"name": "model.layers.23.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 26555648
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26555904
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28653056
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28915200
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 29963776
},
{
"name": "model.layers.23.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 30094848
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 30095104
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 30097152
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 31670016
}
],
"md5sum": "eccd2877aed39c7d129e2fc521d0a9b8"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33505024,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 3145728
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 3538944
},
{
"name": "model.layers.24.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3540992
},
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 3541248
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 5638400
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 5900544
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 6949120
},
{
"name": "model.layers.24.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 7080192
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 7080448
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 7082496
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 8655360
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8851968
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 11997696
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 12390912
},
{
"name": "model.layers.25.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 12392960
},
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 12393216
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 14490368
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 14752512
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 15801088
},
{
"name": "model.layers.25.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 15932160
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 15932416
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 15934464
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 17507328
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 17703936
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 20849664
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 21242880
},
{
"name": "model.layers.26.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21244928
},
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 21245184
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 23342336
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 23604480
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 24653056
},
{
"name": "model.layers.26.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 24784128
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 24784384
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 24786432
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 26359296
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 26555904
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 29701632
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 30094848
},
{
"name": "model.layers.27.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 30096896
},
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 30097152
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 32194304
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 32456448
}
],
"md5sum": "6e30df4f8088a7b96938af9f74b14467"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 32000000,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 131072
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 131328
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 133376
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 1706240
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 1902848
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 5048576
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 5441792
},
{
"name": "model.layers.3.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 5443840
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 5444096
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 7541248
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 7803392
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 8851968
},
{
"name": "model.layers.3.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8983040
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 8983296
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 8985344
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 10558208
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 10754816
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 13900544
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 14293760
},
{
"name": "model.layers.4.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 14295808
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 14296064
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 16393216
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 16655360
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 17703936
},
{
"name": "model.layers.4.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17835008
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 17835264
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 17837312
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 19410176
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 19606784
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 22752512
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 23145728
},
{
"name": "model.layers.5.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 23147776
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 23148032
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 25245184
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 25507328
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 26555904
},
{
"name": "model.layers.5.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 26686976
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 26687232
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 26689280
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 28262144
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 28458752
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 31604480
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 31997696
},
{
"name": "model.layers.6.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31999744
}
],
"md5sum": "3f025dcb4b80165a68f416ea94320279"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 30097152,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 2097152
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 2359296
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 3407872
},
{
"name": "model.layers.6.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 3538944
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 3539200
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 3541248
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 5114112
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 5310720
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 8456448
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 8849664
},
{
"name": "model.layers.7.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8851712
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 8851968
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 10949120
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11211264
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 12259840
},
{
"name": "model.layers.7.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 12390912
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 12391168
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12393216
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 13966080
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14162688
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 17308416
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 17701632
},
{
"name": "model.layers.8.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 17703680
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 17703936
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 19801088
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 20063232
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 21111808
},
{
"name": "model.layers.8.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 21242880
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 21243136
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1024,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 21245184
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1024,
96
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 22818048
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
6144,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 23014656
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 26160384
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 26553600
},
{
"name": "model.layers.9.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 26555648
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
4096,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2097152,
"byteOffset": 26555904
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
4096,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 28653056
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
1024,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 28915200
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1024,
64
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 131072,
"byteOffset": 29963776
},
{
"name": "model.layers.9.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 30094848
},
{
"name": "model.norm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 30095104
}
],
"md5sum": "0b0723e7e52856fcfee24c0088f79bf7"
}
]
}