numen-tech's picture
Add weights
57cc003
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 3631664128.0,
"BitsPerParam": 2.6739310072364444
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 211365888,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
128256,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 211365888,
"byteOffset": 0
}
],
"md5sum": "74c693f00b8f2ba35f34247ccef52613"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "9e29f8b9b20c9f07483abb746d594030"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "881806b9f10c44c7afb0b67d3891694f"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29369856,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
128256,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 26420736,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26420736
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 26428928
}
],
"md5sum": "c0713d8a7d8611b57dc353e4d43bc45b"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "7dac7bc148bf1a4b5d78ab826a874a43"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "7c437c0095157449edf3d53bdb5fc09f"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 5914624
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 12664832
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 13508608
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 23633920
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "d8a2d4243e437d4e9de05b08f0d72fe4"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 211365888,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
128256,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 211365888,
"byteOffset": 0
}
],
"md5sum": "d4ae5780baecc9445e48e502f4866e59"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 26420736,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
128256,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 26420736,
"byteOffset": 0
}
],
"md5sum": "ca49e6f3357151c10d2c1141d3513a32"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "4c2c5aae19fced25ddf642a97b9ce137"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "96e94f66828327db30cc35bb3218c315"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 27856896,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24907776
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24915968
}
],
"md5sum": "ce5294f0e1e403dd82d542666ce3264e"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "b9138567609334a749320908c3698a8f"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "46ac599ed73039141a8b995585846b26"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "bf880c2d5a3b3419a44df37707948854"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "d4c47ef37b73a0feae4f674b628a3dc9"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "e0e8dfb252f81ff1e601c2e111771944"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "c47a3457217c3239264280a2796af6d7"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "8821de14848eb7d398f578cb56af4eaf"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "08eca2331dc7cc4a77a6ef9fea5ffb3b"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "090c0d142fe58b663526bc45584e7b39"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "a4c572c8d0f9b21a1bf228fb73cede0c"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "39941071c72909733e067ab8824eae60"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "f415d56f93a4232f9829e0f218ac9706"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "bdae284530214727554767de19f2e11c"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "7f09fe75ec8846e0e6a70f8e2cf8cdd5"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "cdcd409de33deb42a101d26f37be788c"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "7dff571b70f7790329fefd5d6d97c4d7"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "47cf53b77720a634f93c1f12ceb54f51"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "606e75b1c355099ed953f029d3932e93"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "ba469c33dac3453515f4643d5e000e13"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "e397d9373ba1103094402a20eb2377e9"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "5045ba8734af9590de4df9d11136a621"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "4d8e1d1ad4b7f0f3064e02d6aaa1dbbd"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 30806016,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 24899584
}
],
"md5sum": "f71b107643d6fa289921be823a0a5232"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "57977ed3aff8dfd879db32229eb3f3da"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "91b3849df5511b37a7176579a90445a7"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 10125312
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 11390976
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 18141184
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18984960
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 18993152
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 21934080
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27840512
}
],
"md5sum": "a28689207a72932a90b2ce4197647d01"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "5bc4a9f6b85d7da8246ecdbd48f6a49d"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "eca886ed37e879ecf06e3a907f0b119c"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 10125312
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 11390976
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 18141184
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18984960
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 18993152
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 21934080
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27840512
}
],
"md5sum": "dd142687b34920a58784140978d957dc"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "7dc3c3e67d95156d0d867da5f8ce43d2"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "f161a16b7ab6656374a1e03091f31f01"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 10125312
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 11390976
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 18141184
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18984960
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 18993152
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 21934080
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27840512
}
],
"md5sum": "f3299495c581a84986b3a4964cd51069"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "fc5cb5c4ed7a4fc455152e78927fb438"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "5327084916b361ed309a5d25b388e78f"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 10125312
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 11390976
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 18141184
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18984960
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 18993152
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 21934080
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27840512
}
],
"md5sum": "28740240f4657743e7726ab0c76ffc49"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "ff39295e4119ee63bd02b0b68563694f"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "e9152605ef87d34ea13ae3f8b3a26011"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 10125312
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 11390976
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 18141184
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18984960
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 18993152
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 21934080
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27840512
}
],
"md5sum": "a41cf6329b1f814cc8e9f8c7801aec0d"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "f4d59599390dd599e590a0a4eade8a3e"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "dd4c47ef739dc57fb432c2aead9a3626"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 10125312
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 11390976
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 18141184
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18984960
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 18993152
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 21934080
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27840512
}
],
"md5sum": "6e64e5951fa6d7c5206357302e5be61e"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "aaa6bb1e9ae719587438c429ffbce784"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "72f1ad86d26669cdaf160a4a383083a8"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 10125312
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 11390976
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 18141184
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18984960
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 18993152
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 21934080
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27840512
}
],
"md5sum": "37f44f491a96d629a75f0955db0a67c1"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "e8eb1a1b7966474fd4d2715839fb3397"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "41a5b423a85d93d41c79efdfb99df191"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 10125312
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 11390976
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 18141184
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18984960
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 18993152
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 21934080
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27840512
}
],
"md5sum": "3a63b974356eecf8214afe31eb0b15c1"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "e2fac52dfdd18169c7e57ac62686ae9f"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "12adc9e42cc91a8389b5f9db26292f88"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 10125312
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 11390976
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 18141184
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 18984960
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 18993152
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 21934080
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27840512
}
],
"md5sum": "a6b8a929b2243f1b9f183c1069ff622f"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 30375936,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 10125312
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 11390976
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 18141184
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 18984960
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 29110272
}
],
"md5sum": "dde45d7c917c68446f725aa86102aa5d"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 31129600,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 7602176
}
],
"md5sum": "3a786a8b38e1ea43bfedf4bd45328af1"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "08ff017411d7934f2db37f09682ec928"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 29425664,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2940928
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2949120
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 2957312
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 26484736
}
],
"md5sum": "a9fa4477fbefacad0ced3431fbc1f8a5"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "d94c7c21b56fb3c15381b6b515bc811b"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "1633cd176a49763f7399b91a12b3ba21"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "e298b5ed2d2ed68882035b3b62a7cb94"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "3f8106fc02afcda677e9dc9098b39176"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 32391168,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5914624
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 5922816
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 29450240
}
],
"md5sum": "b2133b7461f057761f5fe637183b531b"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "a71983edabc5c0dd830b7df7147c6831"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "dc98a14fd0629c44aa455e00880db306"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "b5c307131b9a6c90ef422392dd4b0694"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "31b410ab5c7b2a0af0d9f64d08979095"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "39a63620221823514390b4b6fc8d77dc"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "0ff9253fd17bd3eff9482f542c7dd976"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "99fd896efed7299043a7ae37b01605f8"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "0b4badab19412a6d9f9dafedf34b493c"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "de937dbeafe1cd32f51c61e58ecc664d"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "6cf17f449e196261c03cc6e8e7d661b8"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "d2b74221435345ade17ba884b8f176cf"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "372f18d368b6c34d940618a12de64bcf"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "994e6d137917c31a41a2bf1b33ee8987"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "ded82d26bcac1fca9cad0848c612e77b"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "7765ec26c24b2fc584cb5e77f0d1315d"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "723a3d53c5eda451a0d9e01eea33e614"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "7343bf43372d68951ebd2390dc327500"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "169bcf5488214d6384e73d968586bb22"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "4943cada5e2af6d7be0427b85b6ae64e"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "0da5039a9f4aad7e43d3f36ceba1fef2"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "2b8ec535dea52a8705d6fe6084a2ee74"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "d57acf76c316b0164705e807bcf0e840"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "a602461d6440a1357d5bc82e71b5e72c"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "9bdf3221e36afd5be72197d4664f9c9c"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 23527424,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1436
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 23527424,
"byteOffset": 0
}
],
"md5sum": "6a0a2df8b68d3d69ff461cc32f1645e1"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 47251456,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 47251456,
"byteOffset": 0
}
],
"md5sum": "5fa117291153a2ed57fbf4f0c0d21528"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 27848704,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24899584
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
359
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2940928,
"byteOffset": 24907776
}
],
"md5sum": "c4521db6603a9dd4fe1aa2be6e8fd720"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 24899584,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5906432,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5906432
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10125312,
"byteOffset": 5914624
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1265664,
"byteOffset": 16039936
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 17305600
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 24055808
}
],
"md5sum": "4fd5c98a121caa707605b09876c566c2"
}
]
}