{ "metadata": { "ParamSize": 648, "ParamBytes": 7137087488.0, "BitsPerParam": 3.6092729746843064 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "lm_head.q_weight", "shape": [ 49152, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "08e67fc89dfe9920284571c1ecce3580" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.38.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "65210ff0006702f826d9e3a8a95901d8" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 32256000, "records": [ { "name": "lm_head.q_scale", "shape": [ 49152, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.38.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 15138816 }, { "name": "transformer.h.38.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 15151104 }, { "name": "transformer.h.38.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 30289920 }, { "name": "transformer.h.38.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 32182272 }, { "name": "transformer.h.38.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 32194560 }, { "name": "transformer.h.38.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 32206848 } ], "md5sum": "3f4381292568d434b3b0cbc576d40177" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.38.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "9302396d5f345ae77ff7e1486608eea6" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 32904704, "records": [ { "name": "transformer.h.38.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 0 }, { "name": "transformer.h.38.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 7569408 }, { "name": "transformer.h.38.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 7581696 }, { "name": "transformer.h.39.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 15138816 }, { "name": "transformer.h.39.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 15151616 }, { "name": "transformer.h.39.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 30921216 }, { "name": "transformer.h.39.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 32892416 } ], "md5sum": "9b123944450d2d1314842dfc3c50d2c7" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.39.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "eee96aaa6d229a7d7df61d9e307ef9ef" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.39.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "481ea66d938604bba0df984c78881b08" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 32305664, "records": [ { "name": "transformer.h.39.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.39.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.39.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.39.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.39.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.39.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.39.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.39.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.h.39.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24698880 }, { "name": "transformer.h.39.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24711168 }, { "name": "transformer.ln_f.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 32268288 }, { "name": "transformer.ln_f.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 32280576 }, { "name": "transformer.h.0.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 32292864 } ], "md5sum": "3baf4c628a4e826ee73fa82bfb34ca22" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 32891904, "records": [ { "name": "transformer.h.0.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 0 }, { "name": "transformer.h.0.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 15769600 }, { "name": "transformer.h.0.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17740800 }, { "name": "transformer.h.0.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 17753088 } ], "md5sum": "cf00311b0b7b987d43d8454c0db8fc59" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.0.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "0a62f5e7efe95721345de7d7be3078d8" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.0.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "7fd33d8b9952e1edfb9f7987020859c7" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 32911872, "records": [ { "name": "transformer.h.0.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 0 }, { "name": "transformer.h.0.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1892352 }, { "name": "transformer.h.0.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1904640 }, { "name": "transformer.h.0.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1916928 }, { "name": "transformer.h.0.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1929216 }, { "name": "transformer.h.0.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1941504 }, { "name": "transformer.h.0.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 1990656 }, { "name": "transformer.h.0.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9560064 }, { "name": "transformer.h.0.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 9572352 }, { "name": "transformer.h.1.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 17129472 }, { "name": "transformer.h.1.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 17142272 } ], "md5sum": "cf000b76899082167cbe5713f59251f4" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.1.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "d251ca3f170567e1ff0e2daa17bffe8b" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.1.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "c413af1958ce2db5be0c5711613a5c40" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 26694656, "records": [ { "name": "transformer.h.1.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 0 }, { "name": "transformer.h.1.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1971200 }, { "name": "transformer.h.1.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 1983488 }, { "name": "transformer.h.1.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 17122304 }, { "name": "transformer.h.1.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19014656 }, { "name": "transformer.h.1.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19026944 }, { "name": "transformer.h.1.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19039232 }, { "name": "transformer.h.1.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19051520 }, { "name": "transformer.h.1.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 19063808 }, { "name": "transformer.h.1.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 19112960 }, { "name": "transformer.h.1.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26682368 } ], "md5sum": "b2d947dbb88b4bb327f12e7ee76f317c" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25323008, "records": [ { "name": "transformer.h.1.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 0 }, { "name": "transformer.h.2.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 7557120 }, { "name": "transformer.h.2.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 7569920 }, { "name": "transformer.h.2.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 23339520 }, { "name": "transformer.h.2.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25310720 } ], "md5sum": "c30db6ae55ef72ffa0d413448c12ebf3" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.2.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "0130f7b4535a8c0a303fc8ed55402229" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.2.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "13fafa9488673685351b9d2a9c26bea1" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 32281088, "records": [ { "name": "transformer.h.2.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.2.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.2.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.2.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.2.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.2.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.2.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.2.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.h.2.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24698880 }, { "name": "transformer.h.2.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24711168 }, { "name": "transformer.h.3.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 32268288 } ], "md5sum": "57764d4b11296567d374eb4d97a2dfe8" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 32891904, "records": [ { "name": "transformer.h.3.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 0 }, { "name": "transformer.h.3.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 15769600 }, { "name": "transformer.h.3.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17740800 }, { "name": "transformer.h.3.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 17753088 } ], "md5sum": "fbdeb2a069b832ad1b4a3708a4bbdf34" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.3.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "3bca164a7abb780821ac51624aedda37" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.3.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "eb82e855ec657c3817ceb78542bb306d" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 32911872, "records": [ { "name": "transformer.h.3.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 0 }, { "name": "transformer.h.3.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1892352 }, { "name": "transformer.h.3.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1904640 }, { "name": "transformer.h.3.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1916928 }, { "name": "transformer.h.3.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1929216 }, { "name": "transformer.h.3.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1941504 }, { "name": "transformer.h.3.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 1990656 }, { "name": "transformer.h.3.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9560064 }, { "name": "transformer.h.3.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 9572352 }, { "name": "transformer.h.4.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 17129472 }, { "name": "transformer.h.4.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 17142272 } ], "md5sum": "06cef7c8c33ac0d9ca1497995f1dc0c4" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.4.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "63adac2abdad63f0c0a5066ab041b8a3" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.4.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "15d8bd8e8c596a410051a79f2aa4bb41" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 26694656, "records": [ { "name": "transformer.h.4.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 0 }, { "name": "transformer.h.4.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1971200 }, { "name": "transformer.h.4.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 1983488 }, { "name": "transformer.h.4.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 17122304 }, { "name": "transformer.h.4.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19014656 }, { "name": "transformer.h.4.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19026944 }, { "name": "transformer.h.4.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19039232 }, { "name": "transformer.h.4.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19051520 }, { "name": "transformer.h.4.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 19063808 }, { "name": "transformer.h.4.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 19112960 }, { "name": "transformer.h.4.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26682368 } ], "md5sum": "a495852193ac51223f11901e399a3138" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25323008, "records": [ { "name": "transformer.h.4.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 0 }, { "name": "transformer.h.5.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 7557120 }, { "name": "transformer.h.5.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 7569920 }, { "name": "transformer.h.5.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 23339520 }, { "name": "transformer.h.5.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25310720 } ], "md5sum": "a00967be9d02bdadee85b1ab9fedc481" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.5.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "5f5374cf73e851b64801111dc7ee3453" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 20185088, "records": [ { "name": "transformer.wpe.q_weight", "shape": [ 8192, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20185088, "byteOffset": 0 } ], "md5sum": "36446746451e93f6c5773e29204a954a" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 121110528, "records": [ { "name": "transformer.wte.q_weight", "shape": [ 49152, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 121110528, "byteOffset": 0 } ], "md5sum": "08e67fc89dfe9920284571c1ecce3580" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 27222016, "records": [ { "name": "transformer.h.5.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.5.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.5.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.5.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.5.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.5.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.5.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.5.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.wpe.q_scale", "shape": [ 8192, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2523136, "byteOffset": 24698880 } ], "md5sum": "605d30a700b38a7664228eafe33bb7df" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 32904704, "records": [ { "name": "transformer.wte.q_scale", "shape": [ 49152, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.10.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 15138816 }, { "name": "transformer.h.10.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 15151616 }, { "name": "transformer.h.10.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 30921216 }, { "name": "transformer.h.10.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 32892416 } ], "md5sum": "ef927424e92db0adc9e6fa944e3bc219" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.10.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "303034166faa159a2e6d8812a4e88496" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.10.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "21cec455be38bc891506201c7ca7725c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 32281088, "records": [ { "name": "transformer.h.10.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.10.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.10.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.10.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.10.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.10.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.10.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.10.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.h.10.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24698880 }, { "name": "transformer.h.10.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24711168 }, { "name": "transformer.h.11.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 32268288 } ], "md5sum": "5f8f0895b5319c72387395fe13c97f37" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 32891904, "records": [ { "name": "transformer.h.11.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 0 }, { "name": "transformer.h.11.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 15769600 }, { "name": "transformer.h.11.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17740800 }, { "name": "transformer.h.11.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 17753088 } ], "md5sum": "0c474bddc4f9bca0bb4a2ae5ca6ed643" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.11.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "f9e22562f5d62fbc4a30e07602716236" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.11.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "a58ee2cec1798905866ae80c70f108bf" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 32911872, "records": [ { "name": "transformer.h.11.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 0 }, { "name": "transformer.h.11.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1892352 }, { "name": "transformer.h.11.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1904640 }, { "name": "transformer.h.11.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1916928 }, { "name": "transformer.h.11.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1929216 }, { "name": "transformer.h.11.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1941504 }, { "name": "transformer.h.11.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 1990656 }, { "name": "transformer.h.11.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9560064 }, { "name": "transformer.h.11.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 9572352 }, { "name": "transformer.h.12.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 17129472 }, { "name": "transformer.h.12.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 17142272 } ], "md5sum": "9f496ede88c1e9de42132bbd58fc275c" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.5.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "629329e4869611f87cbc7bc746c2a9ae" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 27331072, "records": [ { "name": "transformer.h.12.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 0 }, { "name": "transformer.h.12.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1971200 }, { "name": "transformer.h.12.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1983488 }, { "name": "transformer.h.5.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1995776 }, { "name": "transformer.h.5.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 2008064 }, { "name": "transformer.h.6.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9565184 }, { "name": "transformer.h.6.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 9577984 }, { "name": "transformer.h.6.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 25347584 }, { "name": "transformer.h.6.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 27318784 } ], "md5sum": "7a54768e494bace27b4690e9e63bdf13" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.6.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "8eace8bea59705d4a3687ab1e962acf6" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.6.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "a68536821675630143e7d01931058199" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 32281088, "records": [ { "name": "transformer.h.6.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.6.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.6.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.6.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.6.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.6.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.6.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.6.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.h.6.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24698880 }, { "name": "transformer.h.6.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24711168 }, { "name": "transformer.h.7.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 32268288 } ], "md5sum": "a5e278137ce001794ad695b4217c15d0" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 32891904, "records": [ { "name": "transformer.h.7.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 0 }, { "name": "transformer.h.7.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 15769600 }, { "name": "transformer.h.7.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17740800 }, { "name": "transformer.h.7.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 17753088 } ], "md5sum": "7032e4aa8259748ce91f23e234b37544" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.7.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "53eec555e63f4446a8d75283681d8469" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.7.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "27c9e6096454a2f1331468838fc32b63" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 32911872, "records": [ { "name": "transformer.h.7.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 0 }, { "name": "transformer.h.7.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1892352 }, { "name": "transformer.h.7.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1904640 }, { "name": "transformer.h.7.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1916928 }, { "name": "transformer.h.7.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1929216 }, { "name": "transformer.h.7.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1941504 }, { "name": "transformer.h.7.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 1990656 }, { "name": "transformer.h.7.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9560064 }, { "name": "transformer.h.7.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 9572352 }, { "name": "transformer.h.8.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 17129472 }, { "name": "transformer.h.8.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 17142272 } ], "md5sum": "9c302990370d7748b31d476299a06490" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.8.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "e6483e4d2a68ba4ee0bc5bf3c24b79f1" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.8.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "73fdccc86f2e69974d122617a8d3da87" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 26694656, "records": [ { "name": "transformer.h.8.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 0 }, { "name": "transformer.h.8.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1971200 }, { "name": "transformer.h.8.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 1983488 }, { "name": "transformer.h.8.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 17122304 }, { "name": "transformer.h.8.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19014656 }, { "name": "transformer.h.8.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19026944 }, { "name": "transformer.h.8.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19039232 }, { "name": "transformer.h.8.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19051520 }, { "name": "transformer.h.8.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 19063808 }, { "name": "transformer.h.8.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 19112960 }, { "name": "transformer.h.8.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26682368 } ], "md5sum": "2b51f644ea7c0b74fe058358e056f5dc" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 25323008, "records": [ { "name": "transformer.h.8.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 0 }, { "name": "transformer.h.9.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 7557120 }, { "name": "transformer.h.9.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 7569920 }, { "name": "transformer.h.9.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 23339520 }, { "name": "transformer.h.9.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25310720 } ], "md5sum": "980e7f1c97fee44bc6024da75ebae364" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.9.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "884f3fe130e8b581c80b16ffe14ea550" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.9.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "ef450e5893992cf8783c8624bc016da3" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32280576, "records": [ { "name": "transformer.h.9.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.9.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.9.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.9.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.9.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.9.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.9.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.9.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.h.9.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24698880 }, { "name": "transformer.h.9.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24711168 }, { "name": "transformer.h.12.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 32268288 } ], "md5sum": "7d08a7dbb621cab02d492942e1388567" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.12.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "7d2e06586ff0028ead5c57c727a04bdc" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.12.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "0d1e73a36e1ee974ce755dc6d3634303" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 32256512, "records": [ { "name": "transformer.h.12.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.12.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.12.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.12.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.12.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17055744 }, { "name": "transformer.h.12.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17104896 }, { "name": "transformer.h.12.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24674304 }, { "name": "transformer.h.12.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24686592 }, { "name": "transformer.h.13.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 32243712 } ], "md5sum": "844458ea1005739ad3f7688b557320c7" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 32891904, "records": [ { "name": "transformer.h.13.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 0 }, { "name": "transformer.h.13.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 15769600 }, { "name": "transformer.h.13.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17740800 }, { "name": "transformer.h.13.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 17753088 } ], "md5sum": "afe302b41fc888362ea47caeb9301bb2" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.13.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "3c3ec1f601d8e6b5cced7156061afc38" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.13.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "bbe84ba60475fcb2d28e3ead98035a1d" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32911872, "records": [ { "name": "transformer.h.13.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 0 }, { "name": "transformer.h.13.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1892352 }, { "name": "transformer.h.13.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1904640 }, { "name": "transformer.h.13.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1916928 }, { "name": "transformer.h.13.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1929216 }, { "name": "transformer.h.13.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1941504 }, { "name": "transformer.h.13.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 1990656 }, { "name": "transformer.h.13.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9560064 }, { "name": "transformer.h.13.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 9572352 }, { "name": "transformer.h.14.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 17129472 }, { "name": "transformer.h.14.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 17142272 } ], "md5sum": "22dd9cda896be91e766d8db20801b62c" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.14.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "8c56a1fa2ff425534918da2c9f496698" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.14.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "cf0888c26c4b9edbae3be0b7ae798be6" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 26694656, "records": [ { "name": "transformer.h.14.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 0 }, { "name": "transformer.h.14.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1971200 }, { "name": "transformer.h.14.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 1983488 }, { "name": "transformer.h.14.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 17122304 }, { "name": "transformer.h.14.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19014656 }, { "name": "transformer.h.14.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19026944 }, { "name": "transformer.h.14.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19039232 }, { "name": "transformer.h.14.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19051520 }, { "name": "transformer.h.14.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 19063808 }, { "name": "transformer.h.14.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 19112960 }, { "name": "transformer.h.14.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26682368 } ], "md5sum": "093996306cfcf00b51266383eee7a5d0" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 25323008, "records": [ { "name": "transformer.h.14.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 0 }, { "name": "transformer.h.15.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 7557120 }, { "name": "transformer.h.15.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 7569920 }, { "name": "transformer.h.15.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 23339520 }, { "name": "transformer.h.15.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25310720 } ], "md5sum": "162af605314e5c5b9dcd30a346da1635" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.15.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "eea02481e677bdbfd966e7e992f35ed4" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.15.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "30f62a3ff8fcc4f2d4f2528b3b2db2b8" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 32281088, "records": [ { "name": "transformer.h.15.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.15.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.15.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.15.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.15.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.15.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.15.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.15.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.h.15.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24698880 }, { "name": "transformer.h.15.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24711168 }, { "name": "transformer.h.16.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 32268288 } ], "md5sum": "70470eb01f85fbce0f00710c6c4dd94c" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 32891904, "records": [ { "name": "transformer.h.16.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 0 }, { "name": "transformer.h.16.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 15769600 }, { "name": "transformer.h.16.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17740800 }, { "name": "transformer.h.16.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 17753088 } ], "md5sum": "8c3e84b736ac45896f1ed842ee59e7e3" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.16.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "09cb3b305ead6d11150d1794da85f176" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.16.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "2a2344b40f06cc913c9158cbcb060997" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 32911872, "records": [ { "name": "transformer.h.16.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 0 }, { "name": "transformer.h.16.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1892352 }, { "name": "transformer.h.16.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1904640 }, { "name": "transformer.h.16.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1916928 }, { "name": "transformer.h.16.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1929216 }, { "name": "transformer.h.16.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1941504 }, { "name": "transformer.h.16.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 1990656 }, { "name": "transformer.h.16.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9560064 }, { "name": "transformer.h.16.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 9572352 }, { "name": "transformer.h.17.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 17129472 }, { "name": "transformer.h.17.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 17142272 } ], "md5sum": "53a8b06db906ddfdc6f3f152681f9dd9" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.17.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "7b97a6b50b128209e136d9a5aeb47edc" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.17.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "90604df483dbea68eae1ed1d3aa8385c" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 26694656, "records": [ { "name": "transformer.h.17.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 0 }, { "name": "transformer.h.17.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1971200 }, { "name": "transformer.h.17.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 1983488 }, { "name": "transformer.h.17.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 17122304 }, { "name": "transformer.h.17.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19014656 }, { "name": "transformer.h.17.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19026944 }, { "name": "transformer.h.17.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19039232 }, { "name": "transformer.h.17.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19051520 }, { "name": "transformer.h.17.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 19063808 }, { "name": "transformer.h.17.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 19112960 }, { "name": "transformer.h.17.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26682368 } ], "md5sum": "9db31eb5fd0ffda0af86b542587405f1" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25323008, "records": [ { "name": "transformer.h.17.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 0 }, { "name": "transformer.h.18.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 7557120 }, { "name": "transformer.h.18.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 7569920 }, { "name": "transformer.h.18.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 23339520 }, { "name": "transformer.h.18.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25310720 } ], "md5sum": "1f26544ad632da0520f8602276e2a6f4" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.18.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "d1127ad6d4160342e99ef76e4636a2c3" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.18.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "f5cbc249ea190d5692cfbabde17f9e2e" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 32281088, "records": [ { "name": "transformer.h.18.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.18.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.18.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.18.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.18.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.18.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.18.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.18.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.h.18.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24698880 }, { "name": "transformer.h.18.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24711168 }, { "name": "transformer.h.19.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 32268288 } ], "md5sum": "734951c7c827772850f043964a530e9b" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 32891904, "records": [ { "name": "transformer.h.19.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 0 }, { "name": "transformer.h.19.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 15769600 }, { "name": "transformer.h.19.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17740800 }, { "name": "transformer.h.19.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 17753088 } ], "md5sum": "717c60bf5c3c84d904f186c33aeddc09" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.19.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "e54ee7c7fdfed475df3dc3805790bac1" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.19.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "7cca97167da75bba9fc5020f3651cfc8" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 32911872, "records": [ { "name": "transformer.h.19.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 0 }, { "name": "transformer.h.19.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1892352 }, { "name": "transformer.h.19.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1904640 }, { "name": "transformer.h.19.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1916928 }, { "name": "transformer.h.19.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1929216 }, { "name": "transformer.h.19.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1941504 }, { "name": "transformer.h.19.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 1990656 }, { "name": "transformer.h.19.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9560064 }, { "name": "transformer.h.19.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 9572352 }, { "name": "transformer.h.20.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 17129472 }, { "name": "transformer.h.20.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 17142272 } ], "md5sum": "4c45b1aa408289c8920d7948558d0c6b" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.20.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "1be0a9543acc8abfe2f9b39b47ed7056" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.20.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "1b0abbcb246a8f1dbf9c5b639c2ac6f2" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 26694656, "records": [ { "name": "transformer.h.20.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 0 }, { "name": "transformer.h.20.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1971200 }, { "name": "transformer.h.20.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 1983488 }, { "name": "transformer.h.20.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 17122304 }, { "name": "transformer.h.20.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19014656 }, { "name": "transformer.h.20.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19026944 }, { "name": "transformer.h.20.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19039232 }, { "name": "transformer.h.20.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19051520 }, { "name": "transformer.h.20.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 19063808 }, { "name": "transformer.h.20.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 19112960 }, { "name": "transformer.h.20.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26682368 } ], "md5sum": "0b12e8a4f04278245d357c398916b8ff" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 25323008, "records": [ { "name": "transformer.h.20.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 0 }, { "name": "transformer.h.21.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 7557120 }, { "name": "transformer.h.21.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 7569920 }, { "name": "transformer.h.21.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 23339520 }, { "name": "transformer.h.21.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25310720 } ], "md5sum": "f0202b527d86b76cf5a992e8eda2768a" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.21.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "975991e7582dbe6662fc606f3b3b4e9e" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.21.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "e0e584476ebf86808361ab3e374c49ac" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 32281088, "records": [ { "name": "transformer.h.21.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.21.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.21.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.21.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.21.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.21.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.21.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.21.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.h.21.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24698880 }, { "name": "transformer.h.21.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24711168 }, { "name": "transformer.h.22.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 32268288 } ], "md5sum": "44a45f9988b94be391540bb4b96bd5b2" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 32891904, "records": [ { "name": "transformer.h.22.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 0 }, { "name": "transformer.h.22.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 15769600 }, { "name": "transformer.h.22.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17740800 }, { "name": "transformer.h.22.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 17753088 } ], "md5sum": "d92d7d7ea6d7fc313aebabf96b04d500" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.22.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "ef7c507568b87c81150fbdb29eaa73e1" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.22.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "03a53d75fb54a7fd9e20ab93c394104a" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 32911872, "records": [ { "name": "transformer.h.22.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 0 }, { "name": "transformer.h.22.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1892352 }, { "name": "transformer.h.22.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1904640 }, { "name": "transformer.h.22.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1916928 }, { "name": "transformer.h.22.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1929216 }, { "name": "transformer.h.22.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1941504 }, { "name": "transformer.h.22.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 1990656 }, { "name": "transformer.h.22.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9560064 }, { "name": "transformer.h.22.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 9572352 }, { "name": "transformer.h.23.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 17129472 }, { "name": "transformer.h.23.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 17142272 } ], "md5sum": "2f0e70a42c64fc1fc3526c5eece0120f" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.23.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "5c39dcd4e7ab09273d39bf9ae8a599c3" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.23.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "6e177dd7df6594c2923eccc281a248f8" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 26694656, "records": [ { "name": "transformer.h.23.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 0 }, { "name": "transformer.h.23.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1971200 }, { "name": "transformer.h.23.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 1983488 }, { "name": "transformer.h.23.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 17122304 }, { "name": "transformer.h.23.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19014656 }, { "name": "transformer.h.23.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19026944 }, { "name": "transformer.h.23.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19039232 }, { "name": "transformer.h.23.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19051520 }, { "name": "transformer.h.23.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 19063808 }, { "name": "transformer.h.23.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 19112960 }, { "name": "transformer.h.23.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26682368 } ], "md5sum": "35c1245d7d20bc1e5423e6d3a0599930" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 25323008, "records": [ { "name": "transformer.h.23.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 0 }, { "name": "transformer.h.24.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 7557120 }, { "name": "transformer.h.24.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 7569920 }, { "name": "transformer.h.24.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 23339520 }, { "name": "transformer.h.24.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25310720 } ], "md5sum": "e678030b44101053b70e14137c1491a0" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.24.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "ed599cc953621da88498f0cc77ee4d77" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.24.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "11f598e16cacd4c96101495b68b10636" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 32281088, "records": [ { "name": "transformer.h.24.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.24.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.24.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.24.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.24.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.24.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.24.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.24.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.h.24.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24698880 }, { "name": "transformer.h.24.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24711168 }, { "name": "transformer.h.25.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 32268288 } ], "md5sum": "b3743f74651e99f65f26d1d0f8e65473" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 32916480, "records": [ { "name": "transformer.h.25.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 0 }, { "name": "transformer.h.25.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 15769600 }, { "name": "transformer.h.25.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17740800 }, { "name": "transformer.h.25.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17753088 }, { "name": "transformer.h.25.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17765376 }, { "name": "transformer.h.25.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 17777664 } ], "md5sum": "2b450ff69b63b72b1d0938db61a78095" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.25.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "d02d5ce97dfe03b681dacb6f7cf6a800" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.25.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "07d757a9e3118fcc9391c89a7c55bc8d" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 32887296, "records": [ { "name": "transformer.h.25.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 0 }, { "name": "transformer.h.25.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1892352 }, { "name": "transformer.h.25.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1904640 }, { "name": "transformer.h.25.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1916928 }, { "name": "transformer.h.25.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 1966080 }, { "name": "transformer.h.25.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9535488 }, { "name": "transformer.h.25.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 9547776 }, { "name": "transformer.h.26.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 17104896 }, { "name": "transformer.h.26.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 17117696 } ], "md5sum": "9c35299bd5988b438dff6e822b798f09" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.26.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "7bf9bf6e31d6247bdc48b92a2d4c184f" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.26.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "4a9bf8d8b2365fe59a97f7232a36cc41" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 26694656, "records": [ { "name": "transformer.h.26.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 0 }, { "name": "transformer.h.26.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1971200 }, { "name": "transformer.h.26.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 1983488 }, { "name": "transformer.h.26.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 17122304 }, { "name": "transformer.h.26.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19014656 }, { "name": "transformer.h.26.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19026944 }, { "name": "transformer.h.26.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19039232 }, { "name": "transformer.h.26.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19051520 }, { "name": "transformer.h.26.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 19063808 }, { "name": "transformer.h.26.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 19112960 }, { "name": "transformer.h.26.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26682368 } ], "md5sum": "3de3170f3f7d2473839d0667c3ed0fe9" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 25323008, "records": [ { "name": "transformer.h.26.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 0 }, { "name": "transformer.h.27.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 7557120 }, { "name": "transformer.h.27.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 7569920 }, { "name": "transformer.h.27.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 23339520 }, { "name": "transformer.h.27.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25310720 } ], "md5sum": "4354c55f0f4ee485c620402c11356e2c" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.27.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "529adcb23bcb0e714418dcdd0ce98aba" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.27.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "5694556b93f8e41f2e6ec124a95c9d0f" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 32281088, "records": [ { "name": "transformer.h.27.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.27.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.27.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.27.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.27.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.27.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.27.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.27.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.h.27.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24698880 }, { "name": "transformer.h.27.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24711168 }, { "name": "transformer.h.28.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 32268288 } ], "md5sum": "db708cecc9c0e1db114def2fa2f3e63b" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 32891904, "records": [ { "name": "transformer.h.28.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 0 }, { "name": "transformer.h.28.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 15769600 }, { "name": "transformer.h.28.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17740800 }, { "name": "transformer.h.28.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 17753088 } ], "md5sum": "4fed18b3c01992c312825cbd58331da4" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.28.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "c7585fbf9699a6dc292be09aa16ec914" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.28.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "8107e9c496cecbc16f1ce02ef8b168a8" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 32911872, "records": [ { "name": "transformer.h.28.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 0 }, { "name": "transformer.h.28.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1892352 }, { "name": "transformer.h.28.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1904640 }, { "name": "transformer.h.28.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1916928 }, { "name": "transformer.h.28.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1929216 }, { "name": "transformer.h.28.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1941504 }, { "name": "transformer.h.28.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 1990656 }, { "name": "transformer.h.28.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9560064 }, { "name": "transformer.h.28.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 9572352 }, { "name": "transformer.h.29.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 17129472 }, { "name": "transformer.h.29.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 17142272 } ], "md5sum": "89d03de07f154a5ae8fb9a26a7293ba8" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.29.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "f9a6ad1f4848b0755fc9490be87926ef" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.29.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "eb6f5abbddf0980c3abc133183e5043c" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 26694656, "records": [ { "name": "transformer.h.29.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 0 }, { "name": "transformer.h.29.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1971200 }, { "name": "transformer.h.29.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 1983488 }, { "name": "transformer.h.29.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 17122304 }, { "name": "transformer.h.29.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19014656 }, { "name": "transformer.h.29.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19026944 }, { "name": "transformer.h.29.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19039232 }, { "name": "transformer.h.29.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19051520 }, { "name": "transformer.h.29.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 19063808 }, { "name": "transformer.h.29.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 19112960 }, { "name": "transformer.h.29.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26682368 } ], "md5sum": "d3e1a73ce080b3656da7ac25562e21c1" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 25323008, "records": [ { "name": "transformer.h.29.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 0 }, { "name": "transformer.h.30.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 7557120 }, { "name": "transformer.h.30.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 7569920 }, { "name": "transformer.h.30.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 23339520 }, { "name": "transformer.h.30.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25310720 } ], "md5sum": "556c4c42389d22c87b85c42c18d81321" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.30.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "8c0c1728483a425b8e9d88281ee0b170" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.30.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "a32ee825340cfc1dea3e8cd0441d507d" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 32281088, "records": [ { "name": "transformer.h.30.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.30.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.30.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.30.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.30.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.30.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.30.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.30.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.h.30.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24698880 }, { "name": "transformer.h.30.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24711168 }, { "name": "transformer.h.31.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 32268288 } ], "md5sum": "cff47423323d27f1406d82b6ed185242" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 32891904, "records": [ { "name": "transformer.h.31.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 0 }, { "name": "transformer.h.31.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 15769600 }, { "name": "transformer.h.31.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17740800 }, { "name": "transformer.h.31.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 17753088 } ], "md5sum": "fcd503ff0522985f9370854f15288e54" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.31.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "a4cbba40e9848307769ee1c30edeeaa5" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.31.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "02c24208dff904d0859a37619efd3c0a" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 32911872, "records": [ { "name": "transformer.h.31.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 0 }, { "name": "transformer.h.31.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1892352 }, { "name": "transformer.h.31.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1904640 }, { "name": "transformer.h.31.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1916928 }, { "name": "transformer.h.31.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1929216 }, { "name": "transformer.h.31.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1941504 }, { "name": "transformer.h.31.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 1990656 }, { "name": "transformer.h.31.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9560064 }, { "name": "transformer.h.31.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 9572352 }, { "name": "transformer.h.32.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 17129472 }, { "name": "transformer.h.32.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 17142272 } ], "md5sum": "bb6a0cc8ea6ad5b86ae9d21952e07dba" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.32.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "07733e78088d2d064da351022baf56c4" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.32.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "8a7a63b48b3bd9c85bccee1837f0740c" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 26694656, "records": [ { "name": "transformer.h.32.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 0 }, { "name": "transformer.h.32.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1971200 }, { "name": "transformer.h.32.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 1983488 }, { "name": "transformer.h.32.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 17122304 }, { "name": "transformer.h.32.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19014656 }, { "name": "transformer.h.32.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19026944 }, { "name": "transformer.h.32.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19039232 }, { "name": "transformer.h.32.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19051520 }, { "name": "transformer.h.32.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 19063808 }, { "name": "transformer.h.32.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 19112960 }, { "name": "transformer.h.32.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26682368 } ], "md5sum": "9956213f27b7076626b89c585d9f205f" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 25323008, "records": [ { "name": "transformer.h.32.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 0 }, { "name": "transformer.h.33.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 7557120 }, { "name": "transformer.h.33.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 7569920 }, { "name": "transformer.h.33.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 23339520 }, { "name": "transformer.h.33.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25310720 } ], "md5sum": "f5d7b00bafa56de1197f425e88e0c7ca" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.33.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "297484f2f5207e3dcc428a00abc3174e" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.33.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "9aaa170c2f5d1b8e75ce711f3ae419d7" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 32281088, "records": [ { "name": "transformer.h.33.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.33.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.33.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.33.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.33.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.33.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.33.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.33.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.h.33.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24698880 }, { "name": "transformer.h.33.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24711168 }, { "name": "transformer.h.34.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 32268288 } ], "md5sum": "4cc3a470680bd13091bb94d683944678" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 32891904, "records": [ { "name": "transformer.h.34.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 0 }, { "name": "transformer.h.34.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 15769600 }, { "name": "transformer.h.34.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17740800 }, { "name": "transformer.h.34.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 17753088 } ], "md5sum": "74d083d4aa6ad7e320bac0e250402aca" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.34.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "9bf5517c14a5a938891635a13f9856a8" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.34.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "2fe793949f4f21401e80d75466431eb8" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 32911872, "records": [ { "name": "transformer.h.34.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 0 }, { "name": "transformer.h.34.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1892352 }, { "name": "transformer.h.34.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1904640 }, { "name": "transformer.h.34.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1916928 }, { "name": "transformer.h.34.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1929216 }, { "name": "transformer.h.34.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1941504 }, { "name": "transformer.h.34.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 1990656 }, { "name": "transformer.h.34.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9560064 }, { "name": "transformer.h.34.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 9572352 }, { "name": "transformer.h.35.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 17129472 }, { "name": "transformer.h.35.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 17142272 } ], "md5sum": "36a08d65e4bdcb1d4cf1fc5da60ab21d" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.35.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "d8a7c85d042d82ed8f75d89cdd1064fc" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.35.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "47fd3f1dcdca8aa6b22bef4ed01e4e66" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 26694656, "records": [ { "name": "transformer.h.35.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 0 }, { "name": "transformer.h.35.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1971200 }, { "name": "transformer.h.35.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 1983488 }, { "name": "transformer.h.35.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 17122304 }, { "name": "transformer.h.35.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19014656 }, { "name": "transformer.h.35.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19026944 }, { "name": "transformer.h.35.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19039232 }, { "name": "transformer.h.35.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19051520 }, { "name": "transformer.h.35.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 19063808 }, { "name": "transformer.h.35.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 19112960 }, { "name": "transformer.h.35.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26682368 } ], "md5sum": "15a23ab95790dc7d615830cf51c59e9b" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 25323008, "records": [ { "name": "transformer.h.35.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 0 }, { "name": "transformer.h.36.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 7557120 }, { "name": "transformer.h.36.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 7569920 }, { "name": "transformer.h.36.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 23339520 }, { "name": "transformer.h.36.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 25310720 } ], "md5sum": "23044beb339019409079afd0e39fadcd" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.36.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "c373ddfa17d167debaa43d3db2bc1acb" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.36.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "0ef0aa2797f4f921a322a421570b8fe1" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 32281088, "records": [ { "name": "transformer.h.36.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 0 }, { "name": "transformer.h.36.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 15138816 }, { "name": "transformer.h.36.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17031168 }, { "name": "transformer.h.36.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17043456 }, { "name": "transformer.h.36.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17055744 }, { "name": "transformer.h.36.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17068032 }, { "name": "transformer.h.36.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 17080320 }, { "name": "transformer.h.36.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 17129472 }, { "name": "transformer.h.36.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 24698880 }, { "name": "transformer.h.36.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 24711168 }, { "name": "transformer.h.37.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 32268288 } ], "md5sum": "e5fcfb342b8bee0222630d5b8056b418" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 32891904, "records": [ { "name": "transformer.h.37.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 0 }, { "name": "transformer.h.37.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 15769600 }, { "name": "transformer.h.37.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17740800 }, { "name": "transformer.h.37.attn.c_proj.q_weight", "shape": [ 6144, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15138816, "byteOffset": 17753088 } ], "md5sum": "91a93ee815e15e6686c627189fdd9802" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 60555264, "records": [ { "name": "transformer.h.37.mlp.c_fc.q_weight", "shape": [ 24576, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60555264, "byteOffset": 0 } ], "md5sum": "235091dc32bd2c5940a56bf38a93b74b" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 60456960, "records": [ { "name": "transformer.h.37.mlp.c_proj.q_weight", "shape": [ 6144, 2460 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 60456960, "byteOffset": 0 } ], "md5sum": "5a24ff37616bd91f062464c1d58c5457" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 32911872, "records": [ { "name": "transformer.h.37.attn.c_proj.q_scale", "shape": [ 6144, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1892352, "byteOffset": 0 }, { "name": "transformer.h.37.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1892352 }, { "name": "transformer.h.37.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1904640 }, { "name": "transformer.h.37.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1916928 }, { "name": "transformer.h.37.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1929216 }, { "name": "transformer.h.37.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 1941504 }, { "name": "transformer.h.37.mlp.c_fc.q_scale", "shape": [ 24576, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7569408, "byteOffset": 1990656 }, { "name": "transformer.h.37.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9560064 }, { "name": "transformer.h.37.mlp.c_proj.q_scale", "shape": [ 6144, 615 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7557120, "byteOffset": 9572352 }, { "name": "transformer.h.38.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 17129472 }, { "name": "transformer.h.38.attn.c_attn.q_weight", "shape": [ 6400, 616 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15769600, "byteOffset": 17142272 } ], "md5sum": "f1153b9d8f2bd93ddb396f8bba73702c" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 1995776, "records": [ { "name": "transformer.h.38.attn.c_attn.q_scale", "shape": [ 6400, 154 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1971200, "byteOffset": 0 }, { "name": "transformer.h.38.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1971200 }, { "name": "transformer.h.38.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 1983488 } ], "md5sum": "74b011f32b1fdd177ce7594018282b16" } ] }