Llama-2-7b-chat-hf-q3f16_1 / ndarray-cache.json
hongyij's picture
Initial commit
175cd7c
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 3048549376.0,
"BitsPerParam": 3.619307029695688
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 52736000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 52736000,
"byteOffset": 0
}
],
"md5sum": "c0676f03fc3b05f74cb6101f885dd778"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "51ca0626954f5d604646656cd7e01edb"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "3cd35eebd1bc345f5e97952df522af60"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 31492608,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 6592000,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 6592000
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 6600192
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 24688128
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 26949120
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 31484416
}
],
"md5sum": "a7196c7e313876e2597d3fb0c2b2e08a"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "c85477d1a834a038edcad6752c6eb49b"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "1eb8a5f5db8296800580f2a5fb9e1c91"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "0cbb2658052e3b7f7ce241e4d70c7a57"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "d22080dccf24c8f72eb91b129dd83c74"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "c5b08a532d49d67ba7b55789a4ac4ad9"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "14159b83f40fcb272b6c2fe98d142fe1"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "e088aff2f56359dd61f2faecd491c4e8"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "113ec20c355c73f46faa8b12e9e29c56"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "0ad2ddbe34ca55bfc0f9a29890069f58"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "bfa19fac173678e4e55429f516e276b7"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "8ee5228d6df69ef98f7f495346b2e1ed"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "f3a57e10d57128786ef4767676603b47"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "8ff90d5d2504ba5433c97a92732829f5"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "8b648d681763760dcb5919debe8c4180"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "102114710078e0ea4b3058cc0e973c43"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "6018c3630a6471a1a6e26527d0d87f0c"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "e3e2757c78fbffebf9d258f46802ff4c"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "45248b3b1968925a03d0cec5756722e2"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "c4f9d39af22a83eac84e9aa02893dce1"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "42e87662ab20cc46f4bd19138022f586"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "c61d01b07e91d6ba4c29499a9480ef95"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 52736000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 52736000,
"byteOffset": 0
}
],
"md5sum": "d1ddcaea2b2921ddbe501527c197dd71"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 32290304,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 6592000,
"byteOffset": 7602176
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 14194176
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 14202368
}
],
"md5sum": "4c815bd09b7cbf5ad5a3b8e64b813331"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "47159d99b09415620c688a8afab0b9b5"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 29586432,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 2260992
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 6796288
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 6804480
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 27055104
}
],
"md5sum": "4133fa70267b578ef22d6974d399f932"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "3587a175fb21ba218df60d30b0521e99"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "68636dd54cda43083b6c43794af5c5a6"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "02c09cd8ef48288cd1a5fa79d982f590"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "9145bc06ad7837f80db22d2b524cb261"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "84c2e735f2e2d5bc28110dd002c5f868"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "a7f6a603afd7b833e2ccd6943f326287"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "f28db7ec829a8f5d2198a69352e0ee7d"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "2961fb6500a094b457f18277c3a64613"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "690b990b3f4f65507c5e1f5e1fd43a66"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "a92ce7113b4ef9c317f852448e09fe01"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "e62be00aa6e44f201de088c99da703be"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "817e3312b327bb1c7cf73d0b5197afff"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "9150adccae6b0ab653517fa5e01773c8"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "72438e9da7577c85dc881bc48e26f499"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "f785c0a4a4b60c7a018ad21ebf2acaeb"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "a991a2c8dbc06684d2938e6f7039bb8e"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "e2b002ff0ba23bac72bd7b0b23b946a4"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "d0b941ec0699a1e4cc2b8946491f0e7a"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "a4b533db749cf6a93c3aca91e8d6846b"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "08d41cdee466430c03b0901cf2b48bec"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "a6c0af8e80d771996dedf758f925d83e"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "7ef3a0cbb4fc7ee9ae3763ac74ade50b"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "eeb7e923ab33565a48d1b6aea87b5842"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "ef56d363b1d029dec84502342a3a8d12"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "1cc2e832ff9291ea4a9cf032582536d5"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "a30df28d0c16102dc945e0b9022933a4"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "3a615cc3ff3f6e0acb288740386251ad"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "765349a31c950853ae00588b666492d2"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "101c19c88ddf22767c1cb11267e377c9"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "222ec0e2359ed8762b7483d72d5a81c2"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "12c6dadc38417751d9c5fc2833b5d84e"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "fd0f3836da9b5edc47c0548a57e5fe52"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "79ccd136a01e4b568fd7017bf76da2b2"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "3a790e2feed025e07b0301a96cc38a28"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "6ab6bfca056c0f21b8d71f9ae944be6c"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "7f17612a178481039cea857468288575"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "0fbe394c539fc9800b49243eaa8246ba"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "53d405f605c1639216a36feb702b9f96"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "04bd3ba6a1ce56ab97ca34c9a627f987"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "8b73eb6ea2f450c66d89ed04a8534f1f"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "34d62832e255d53e84b5a4a980e4e8ba"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "d77f36ee367a1a902cc9527d0b3828a2"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "7f276bec236f75156221a98b9db11f5a"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "9ff8d2ceb6d8389955feb05cd449b0ef"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "603b667ec66f2a9644d184207619c92a"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "ac61eca6d8a6cc76025f8e1458c806a0"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "4c9703083994ac7c1bbd488d59bb631b"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "b8d5f8e101afac557c316992e7e37152"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "5147b6ff0c931662ca86dd4307c4522b"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "4281f8686590e82eb39dfc454bcf3d3c"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "1bdcbe2b4ca9da89b2c162830c402859"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "36486f1ca3e3a3a37fc30fa2e0ff1ec1"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "21702c3c45db82dd049a3e2247705ac4"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "2bf568c0c933d9b80669a733c66ebd0c"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "b899e41018535955a644e531ed8f4a66"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "6b5d29d696224cfaf69d95a4c6ccdb25"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "8177e1e8b413acbb780fd9fdae3f5d9e"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "cdf1d6abef4dd30a88f9f547347008c9"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "0466466c5b67b0c09737fdd06b9da93f"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "287f56d0cb3fd56fa4fbfb3c0c21c9b6"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "3d3cc2f8e97b8edffad08ff664f3c4f1"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "405a5a57eeb28fdef19319e862ca2ff7"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "5ee628ffbc393565bbf9f1fc08df27cd"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "61eb49109e1dde0d595670b2d843631c"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "334e4e90004b4210dbabd2a91d173598"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "2ccdc1b2abb58910ed1dc419ade169a4"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "8ea5f4fd6fc810fadaa0e13a40218d5f"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "d3faff1a5333014dd0d3e1aa710fbaa8"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "raw",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "raw",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "raw",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "889992dde940292ce409220ad5a17535"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 10125312,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "raw",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "raw",
"nbytes": 843776,
"byteOffset": 9281536
}
],
"md5sum": "515dcd71094db14415613f4c78250be8"
}
]
}