Qwen3-1.7B-q0f16-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
9f62174 verified
raw
history blame contribute delete
103 kB
{
"metadata": {
"ParamSize": 226,
"ParamBytes": 3441149952.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 622329856,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
151936,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 622329856,
"byteOffset": 0
}
],
"md5sum": "edecbcfbf260d28509ad58884d2905b1"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "337bdef2a834f49f9d803f2ed1f728f4"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "c936e8f95ec91899a6e1c9ebed09b80c"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 25174272,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 4096
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
},
{
"name": "model.layers.0.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 25174016
}
],
"md5sum": "e95ac34ed942865f40f59325c504c6fa"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "07149f1ec1558130e6358f469770d763"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d640bcf8d13e069f072fd455f61c4522"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.1.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.1.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "62d216a3e9728f58aa178bd74dc01c3b"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "16806ad0a2a334136d4b036811dc15eb"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "e19614d2ecb2f1758ceb765c1caa1707"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.10.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.10.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "1157de38fca31b3d46c232776f8ab3ee"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "19739fbb872c88ef9a4fd521b3dd69a8"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a30965425be3f71e8a49f0c91fe705ab"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.11.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.11.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "bf9f1de554fcceb59330989c6e834edb"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "78155df1542c80dc1908969e265c4b28"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "3beec9d4a5d0aed3b8f40617daa280f0"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.12.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.12.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "556ab1f7974347fbb7f6b9572ff9c35e"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c68764067e0a5ca97c71d8d02f1583bb"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "18f98642162ae3212795fec8beb8edb0"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.13.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.13.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "2db3054cdc12939da45b1725daaaa57a"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8066abf6f19c7b7c8bc02fb644d8c982"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "8c26cd7f0ccf64ec4c3ff88244b7e033"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.14.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.14.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "566944ca5a07c48a05b55ddeae35cff6"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "703f8c4ae3508299b04a35114fcafe7e"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "811b3e73f01960f310d7b62af12fabbe"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.15.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.15.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "5cd675b821cb65641c88962e121f9f71"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "24a4815ad6a6c93cd9ffdff8aacd545f"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "84f0289e6d9b892393c1ca5435bb30e5"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.16.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.16.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "2c0fbac2b979d4a249346cfc8494d603"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8053ce303b421e30ff8228554b3a1ab9"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "327b91c0b483ea818d366e7599460bea"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.17.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.17.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "b1d47223b72214d3c89d2877d3b32caa"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b73208055c98f5ee7ba500085bfed45e"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f731adbc9a9c496403feaab010aa7e44"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.18.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.18.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "832b7c7eabc23d5ffb2926aaf6843971"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8ac84e27d82e2c279d4a3c516f095b8d"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "27d6cea7d0dbd3bbf8dcd46f1fb75ff4"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.19.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.19.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "ea95789a8ab23ca8777402b7f0d30b86"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4ccd25b8a2e3e3d598530fe519235969"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "57aed186f559a47c5c620091426261f8"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.2.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.2.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "a35be3e2caa06e82b201932518ec5309"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "07561983222c58ac2043ef376e1a3a7b"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "61090cb6fce40a5497668731e4390c69"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.20.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.20.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "abc040a957cf603f5d89b8dfd077d3b9"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7e1ff18ff036ca5e4d23d4702d7a6e63"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5a600eb1e7e13cfd6297a325ee75d399"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.21.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.21.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "b5bfe1177983a068cd4bd4ddb54657cf"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1cedac8ce90dc03f72ce0992a48621c7"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4beef29c9b8cee57009f2317a1c32d94"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.22.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.22.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "ad282a989b74d3556f053dbafc9735c2"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7bb4632e80de69b09e5ae5e86f50ee7b"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "089b1cfd55c7858ea8a78ff331fd1bbd"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.23.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.23.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "66e13c3dbc18342d7ffc6e3e5efbde6f"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7ee0849f9d70f6c2916d85b3f4c2d44e"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "6d47947c0610a4503709fed6eb9f0085"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.24.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.24.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "2a6b7bff7b776714270b12181a2f21ce"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "edb61ae808391d3be9c013eb345c3b8a"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "dd81f39baa964eb524b4ba50de6cebda"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.25.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.25.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "7b66df8156bba7fb9c4cf1b69b561291"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7ea2da53c2c4c23ce9a07d1bcce6934c"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "017feefcdb137e9ec4f2a35b3391d8cd"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.26.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.26.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "f82239b9d1f71f25d29c15d28575732c"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5228891c328e6f10580c33eeb0fd3a09"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "b621556096685251003535288a70af6f"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.27.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.27.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "ba990f3459ed519cf6234bb7d1c2c56c"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f8c4373f9b0e86c95e72caaeb9261f6f"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7584dac798048519015dc007fbe46e0c"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.3.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.3.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "bf2ed67f5791b3da84b4f200ec5d7539"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "44f5c40690cd3b9e3e58e59af2c0b42c"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "57eb68f17a3e67c75075012c44d5d03e"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.4.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.4.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "cbe4c9c10e0d44317d07cd1e770d8d8e"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "61b2ce8d10305ad3e323580347db1fb5"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5fb9893479fb03e4397cc1fb83362ea6"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.5.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.5.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "a1e79894645a17b26b7e84305511764b"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "58293d194624479f12a430b6dfb01a70"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "be5e1544692a25e5a23a6b3432af0f8d"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.6.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.6.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "af2516b2ce12b3c41e2184d14ddb82cb"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "23fc7be894b838e2e7dcec596bcf9366"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "90922fbbb426f8f410087bf32409e9bd"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.7.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.7.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "0e6b59bfdc8d83853b8c6f5cb75880cd"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "87e101f533f008fdcf814028bb2ee8f5"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "05a4aedb76fad783ef93f23d336a3a12"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.8.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.8.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "8b1e2e9580975f5931e128cd4a597a8c"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
2048,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "16b3ef90ac276e8668484ab3bd8ef5d3"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
12288,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "be4438c4ba062b7bf55163dbfede1524"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 25174528,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8392960
},
{
"name": "model.layers.9.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8397056
},
{
"name": "model.layers.9.self_attn.c_attn.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8397312
}
],
"md5sum": "5745a51b3d70ae50620d7d74619e0b32"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 8392960,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 8388608
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 8388864
}
],
"md5sum": "e98e2cce7e63600d37e2927a15af6541"
}
]
}