{ "metadata": { "ParamSize": 648, "ParamBytes": 8902356992.0, "BitsPerParam": 4.5019815935059295 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "lm_head.q_weight", "shape": [ 49152, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "babcefb413e4f053531159532b1603c5" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.38.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5ae40837c56fc53a1e6867cc93ee441f" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.38.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2397855968fbee540027008677ddf020" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.38.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "dea2f1f44cd50fc82df9cbeb7ca29dd1" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 30769152, "records": [ { "name": "lm_head.q_scale", "shape": [ 49152, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 }, { "name": "transformer.h.38.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "transformer.h.38.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "transformer.h.38.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21245952 }, { "name": "transformer.h.38.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21258240 }, { "name": "transformer.h.38.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 21270528 }, { "name": "transformer.h.38.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 21319680 }, { "name": "transformer.h.38.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30756864 } ], "md5sum": "bc501ee66f72f3f28eea2a0dda3e9872" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.39.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0886e966a07a80c3e1a4170221d333ab" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.38.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.39.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.39.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.39.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.39.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "e49fa3e84a78e3a2e53b084165a1c521" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.39.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0c8ee868d1e77ca9833a15d68538d538" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.39.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "562d7e70fd8ebe1e00887ffe19e1972c" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.0.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "b3a6a1ec978b9ed8f6262c9a12da107a" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.0.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "20ffc69b60fb065cac8601db108435f4" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.0.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a8ff096b5c3ea5e18273979807e9e68e" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 26309120, "records": [ { "name": "transformer.h.39.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.39.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.39.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.39.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.39.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.39.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.39.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.39.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.39.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.ln_f.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21344256 }, { "name": "transformer.ln_f.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21356544 }, { "name": "transformer.h.0.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21368832 }, { "name": "transformer.h.0.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21381632 }, { "name": "transformer.h.0.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23839232 }, { "name": "transformer.h.0.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23851520 }, { "name": "transformer.h.0.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.0.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.0.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26235392 }, { "name": "transformer.h.0.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26247680 }, { "name": "transformer.h.0.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26259968 } ], "md5sum": "856d05c256b46f8c834824faeb3ff628" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.0.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a6bbaca2578b4639dbd2d54510beddbf" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.1.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "80cf30393e14efab29306e340d0ce595" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.1.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "cffaac7091ff103b4fa07c4d4561caa3" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.1.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3d845f06b6924fb5da9d9e39a7da3894" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.1.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0752298d33b77e2a5abfff59e0002246" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.0.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.0.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.0.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.1.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.1.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.1.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.1.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.1.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.1.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.1.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.1.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.1.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.1.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.1.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "00388c94dedf014b0205e210d3d24a82" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.2.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "7563282d813947f7c4011a0e6765ed46" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.1.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.2.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.2.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.2.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.2.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "383a9bfbf85a08c9822f4479134e08c8" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.2.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a6093d6d592c71b32c331d92015de46a" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.2.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2be994479a3e9299ebd19ad19b468a5d" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.3.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "4d532ef18e39e73a53712d836f5572b7" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.3.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2e4b432961cd77e0dbf1c3966adada95" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.3.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8a5b7422608712fc1c85e7052939b372" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.2.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.2.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.2.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.2.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.2.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.2.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.2.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.2.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.2.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.3.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.3.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.3.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.3.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.3.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.3.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.3.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.3.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.3.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "dd48cc2df11d4cc8e5a0744d58e53434" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.3.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4e758a47111ac878b0d1004a320f77d6" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.4.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "6b2e75ee6fbadd41d4c05c769408f166" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.4.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "107cf68ffb72b7d2038f48ac7d70fc60" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.4.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8e143ab7f80422fc54459c4a1138c79b" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.4.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cf409852ac66d6c0f1a9f68ea9bce9e9" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.3.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.3.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.3.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.4.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.4.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.4.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.4.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.4.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.4.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.4.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.4.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.4.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.4.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.4.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "9b3380925f6348de1275f416981565e4" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.5.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "507aa7fbea7f5ccc11d7f47bd7dccded" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.4.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.5.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.5.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.5.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.5.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "49bfbd5c820118a6bb95ba5981d7b28d" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.5.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "94304452ad1b1ddba89430224cf5e2bb" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.wpe.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d222cfc4fbec8e71e417524cacb7bcfa" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 150994944, "records": [ { "name": "transformer.wte.q_weight", "shape": [ 49152, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 150994944, "byteOffset": 0 } ], "md5sum": "babcefb413e4f053531159532b1603c5" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.wte.q_scale", "shape": [ 49152, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2264d9cdd42deb032041fc1f527c7768" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.10.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "997171f727125282a44a75bd8e72af1e" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.10.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "411ad9d5c020f21d73b2c9859d15ccd8" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.10.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "69eaef092310789038670bc4e1d9d164" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.10.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "445a4ee984f81d497db02108e3422c2c" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29430272, "records": [ { "name": "transformer.h.5.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.5.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.5.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.5.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.5.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.5.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.5.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.wpe.q_scale", "shape": [ 8192, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 11894784 }, { "name": "transformer.h.10.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 15040512 }, { "name": "transformer.h.10.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 15053312 }, { "name": "transformer.h.10.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 17510912 }, { "name": "transformer.h.10.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 17523200 }, { "name": "transformer.h.10.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19882496 }, { "name": "transformer.h.10.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19894784 }, { "name": "transformer.h.10.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19907072 }, { "name": "transformer.h.10.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 19919360 }, { "name": "transformer.h.10.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 19931648 }, { "name": "transformer.h.10.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 19980800 }, { "name": "transformer.h.10.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 29417984 } ], "md5sum": "7dd4f3533ddef5fff3f934212d92ce7c" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.11.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "49587dab77a1a737f36e88db54281879" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.10.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.11.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.11.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.11.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.11.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "5d4d02f5632affbb85c67f79b1f9a2a4" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.11.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c852bea71100fcab68c49cf37ca98ddc" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.11.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "10fe275e31c5e312d15468acf3f6b94c" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.12.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "869d51a3fd6f40e08fdf3babf5ed92ed" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.5.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4444b8eb4ab2d35d7dab5faca96a6493" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.6.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "ee0ba99d566136756b211382c1ab1b1d" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33301504, "records": [ { "name": "transformer.h.11.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.11.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.11.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.11.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.11.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.11.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.11.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.11.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.11.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.12.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.12.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.12.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.12.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23826944 }, { "name": "transformer.h.5.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23839232 }, { "name": "transformer.h.5.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23851520 }, { "name": "transformer.h.6.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 33288704 } ], "md5sum": "e426935f44ffb1f68e4d0fedd3fbe18c" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.6.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "91c4f854c771d0cf03fbde9d20c7928e" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.6.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "32ee966573e0643ec25ffe1f36bedc3c" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33251328, "records": [ { "name": "transformer.h.6.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 0 }, { "name": "transformer.h.6.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2457600 }, { "name": "transformer.h.6.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 2469888 }, { "name": "transformer.h.6.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21344256 }, { "name": "transformer.h.6.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23703552 }, { "name": "transformer.h.6.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23715840 }, { "name": "transformer.h.6.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728128 }, { "name": "transformer.h.6.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740416 }, { "name": "transformer.h.6.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23752704 }, { "name": "transformer.h.6.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23801856 }, { "name": "transformer.h.6.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33239040 } ], "md5sum": "fcb61b52ac35392dce73b5274e3a4f6d" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.7.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "09ea0c114d334279668813cd4bde6ea6" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.6.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.7.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.7.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.7.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.7.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "f267f9afaee2414c60114d460c323b31" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.7.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7e2ebc903f1f7f37d1901b6b9ac38cf3" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.7.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b45f6894e1dc84f40d9390374f7dc290" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.8.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "6a474854afce52990d4a395bd2ae12f0" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.8.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "afbbd7c4b89f87d390cf2405d8ee8523" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.8.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "866348524c61711d88f824a1db5d5d6e" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.7.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.7.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.7.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.7.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.7.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.7.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.7.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.7.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.7.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.8.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.8.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.8.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.8.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.8.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.8.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.8.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.8.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.8.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "5dea17d4a41e64683a66498748dfc66e" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.8.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2c71df6d0ca289385dde0430939c7dd0" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.9.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "fdb3806ea16c6a5f9b7137022aa692ae" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.9.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9a72fc2e5b14dda8a84221c35c2be2d8" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.9.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "26291d69203dffdb4b623b4d075acb7a" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.9.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "639567378ea8a0b71c20489f94930e82" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.8.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.8.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.8.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.9.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.9.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.9.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.9.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.9.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.9.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.9.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.9.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.9.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.9.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.9.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "dfa35ae56731bc34e0a4416c6a802933" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.12.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d6c540589f42dbd5cc46255108b45e6e" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 30756864, "records": [ { "name": "transformer.h.9.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.12.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.12.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 9449472 }, { "name": "transformer.h.12.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 28323840 }, { "name": "transformer.h.12.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30683136 }, { "name": "transformer.h.12.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 30695424 }, { "name": "transformer.h.12.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 30707712 } ], "md5sum": "c50334c9be1c7b7165b44413a4229d2e" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.12.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e861e81980615136c9b6182a4381947f" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.13.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "bbd415368c2c5dc748f4cfbc7bf89956" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.13.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0374aba60b137e3fea7968fecd3f9b64" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.13.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "8dbb90af792131bac5df5b8a738a4467" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.13.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d8866a59d5aed528a1889eb2d11c0b91" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.12.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.12.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.12.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.13.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.13.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.13.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.13.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.13.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.13.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.13.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.13.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.13.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.13.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.13.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "f2ca060c4d6c99d620813b126b607255" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.14.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "584d95cca7023d6ebb1aed5751ba47c5" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.13.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.14.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.14.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.14.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.14.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "70443ea582b2dfe15a0dcad53e30af2d" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.14.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0a436b5781b6425e631ea1da821f2b71" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.14.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3f81ea6401fa1355b1fb6179c61f6fd3" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.15.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "52894e59d1012a18b7ee108af5f1241c" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.15.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "867fbc93f990ac267aaf08d8206a950b" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.15.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "68ba6c14f0ed7a4890400377da8fa5d9" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.14.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.14.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.14.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.14.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.14.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.14.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.14.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.14.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.14.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.15.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.15.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.15.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.15.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.15.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.15.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.15.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.15.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.15.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "f188961bef546fc4ce9816e6ed395155" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.15.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a36c755b2ebcfd9a9eabc96b8420e18e" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.16.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "62840090d164e0e7a9dacefb0c4c63da" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.16.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "37c77cd53e7071bc63459a36c15fd08b" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.16.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "fe7f24e364f1ad2d04260ffaa05a537c" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.16.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "04b7c1b29a28f73273a23469eb8ed983" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.15.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.15.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.15.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.16.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.16.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.16.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.16.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.16.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.16.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.16.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.16.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.16.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.16.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.16.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "7e4dfbab41e6509eb9b8d2773b28fed5" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.17.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ad85f1072ceba1420d5a48d7b0e24097" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.16.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.17.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.17.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.17.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.17.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "925535ab1de95a11bd0f8bf3eccd1827" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.17.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "260801454421f59c0f21adc8b4c8e75a" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.17.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "16b0cd04ec30fab0ee415f2996bed0fe" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.18.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "1c33c6ec81846fda7ec5049f24bfc6ff" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.18.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e43c9b17ecf4048039d23e4963a65826" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.18.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "fb58b5ecddf6d5e645e5c2f56b9ce4d9" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.17.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.17.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.17.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.17.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.17.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.17.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.17.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.17.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.17.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.18.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.18.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.18.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.18.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.18.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.18.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.18.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.18.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.18.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "44eedba83e95cb0fc371e85dad061a72" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.18.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "23776761b7d6d5dcbf8d0bd3daf77e04" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.19.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "d1f584fee39c9bf061fe8d5956fa1c4a" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.19.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bf04e079e21848e26d10f6727e2eb9cd" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.19.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "00e543d447c27cc2aeede2f5a2a71e08" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.19.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0a81513dbbece2c86c72a6438a3f7648" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.18.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.18.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.18.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.19.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.19.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.19.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.19.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.19.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.19.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.19.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.19.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.19.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.19.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.19.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "519d1c4d5b4ae091da308478ec77e8a7" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.20.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "48bd1b290ea35af99dd8f2ed381f1c36" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.19.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.20.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.20.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.20.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.20.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "d2762dfe4809e876d8c1d17162466002" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.20.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b31e63bb23b201f5ab4e8bc752786586" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.20.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "71531c3ae56ed1ae7e682fc1cd637fb0" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.21.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "eaf9bc6e47a7fd9db726dc1694da998a" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.21.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0edfd2ce951dfbf9aa89fb14d7e04a71" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.21.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "dc92d34ac3d42336e10d49ad40e9c04a" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.20.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.20.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.20.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.20.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.20.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.20.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.20.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.20.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.20.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.21.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.21.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.21.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.21.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.21.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.21.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.21.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.21.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.21.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "59bd9da2feef21831a5b25aad02ce3b6" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.21.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "189f0e21a7b2e5c8e4ed43b5647d68c2" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.22.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "0a535002c5bc73ad1d27229599f45ba4" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.22.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "2968a09e19563a0cce55be9217026b7a" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.22.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0cb3c352666aec53547619c43dd697a2" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.22.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e4a40f4630aef4ef9faa8734e4c047d5" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.21.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.21.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.21.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.22.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.22.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.22.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.22.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.22.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.22.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.22.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.22.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.22.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.22.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.22.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "f06a2aacc4166332127b94769ba09d4e" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.23.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "c4eefcafeb07017bd657c6240e5b2952" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.22.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.23.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.23.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.23.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.23.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "4be6e6792787aee972a1fbe47c15fadc" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.23.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "dbbf773a1072e3ee04571a100956a326" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.23.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "717e293bff506ab7670c3e0cc52d2056" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.24.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "9faa15643751e9937f6c311284fe4365" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.24.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9591babcff75f8437390e968c4f306b9" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.24.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "779b08ee797726a942e2964c59f70f6b" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.23.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.23.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.23.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.23.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.23.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.23.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.23.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.23.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.23.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.24.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.24.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.24.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.24.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.24.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.24.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.24.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.24.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.24.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "ef100068ffe87f883e3324f5a7c02064" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.24.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6210a54f0e211c91ed13ba3554dfaa6c" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.25.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "f2991d1b393db018a2798ba35a7e66d6" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.25.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "60981b9625473f545b68f51fc831174f" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.25.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "53e5c68c1878a3060f2ae2e9d050656b" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.25.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "0597e5b704ad33df151191ff1f2ffe94" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.24.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.24.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.24.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.25.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.25.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.25.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.25.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21369344 }, { "name": "transformer.h.25.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21381632 }, { "name": "transformer.h.25.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21393920 }, { "name": "transformer.h.25.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.25.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.25.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.25.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.25.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "d10ddc848165558fe15d1aad65f4bde3" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.26.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "59f2a44947767d9743d1c3c4c7a787eb" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.25.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.26.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.26.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.26.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.26.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "a5fa7bd89d39b2466de2a09b048d0c20" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.26.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ed69da31d0bb6d97e3fcd2f004fe9767" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.26.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7e12dcdd1b19edfb77ba9bd595f9e9c6" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.27.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "8a2f8f92a35b47201829ac4f43969314" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.27.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "848a9bcdaa4503454ec12f212530ccb6" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.27.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6bf09aa8a402fb07423d2bf14adf72ec" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.26.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.26.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.26.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.26.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.26.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.26.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.26.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.26.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.26.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.27.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.27.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.27.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.27.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.27.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.27.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.27.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.27.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.27.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "d8a22020a4fbbd4994c68f51e13625fb" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.27.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c8f74c523dcb8bd314733c76a5099bf1" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.28.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "56ed96c6c1eb3d8c19ed516b85a0de7a" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.28.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "063fd180bd3fc2be91501d946895813f" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.28.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ee6226a3decf7f4ed812982e86354928" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.28.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "259cba18c37e1179d0fc0adbe5ad20ea" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.27.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.27.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.27.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.28.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.28.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.28.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.28.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.28.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.28.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.28.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.28.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.28.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.28.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.28.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "5def0e37882ed405e93cb15824797932" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.29.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bab2b0db03aa2f9594d5d1b6e25f7c08" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.28.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.29.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.29.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.29.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.29.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "93152b73d70a5cd5a70a207f615f57c1" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.29.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bc6bbfdc2bb96882a519e4700584c901" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.29.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3acfe7797101f908cc98a7a50136ac35" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.30.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "8e200083e56277fd0cf98517df2f0b7d" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.30.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bd9bf98cb088230c1cb5e4b65b19b6d7" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.30.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "46bdd5a9f7aae703931bed7bd67c0b6e" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.29.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.29.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.29.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.29.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.29.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.29.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.29.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.29.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.29.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.30.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.30.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.30.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.30.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.30.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.30.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.30.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.30.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.30.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "246b45bc71ad5f8e53ac00ee1bffe5f8" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.30.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "582aa8a87bc78236d4fbd477eb8d5a71" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.31.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "dd7ba8d91260f6f9f427d3a2d5aa307e" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.31.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9ca002e848cba0e5620783d7b859b886" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.31.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d12f6ad5521046affacf2800ed678593" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.31.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "108b2da251b9ccd5a3468747ae821e40" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.30.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.30.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.30.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.31.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.31.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.31.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.31.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.31.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.31.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.31.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.31.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.31.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.31.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.31.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "e3f20fbdf0a039cd51c5dcc160a7c48d" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.32.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ec2181c190e6991fb8a6d43b57aec4b9" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.31.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.32.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.32.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.32.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.32.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "23ec1a5c30b3c2ed0544b096ab4d36fe" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.32.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7e9da361914dfce9df09676ac45bdce7" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.32.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "cab1cf77a4c6702480a96dc55a5b2c29" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.33.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "00ae4b7fc6e870fe17c9b88738c80e30" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.33.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "bc26bf7cdcee9ea400b1c5827556eeb1" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.33.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "57b2ff92feb583bfc61e1cb90d7c2a35" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.32.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.32.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.32.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.32.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.32.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.32.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.32.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.32.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.32.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.33.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.33.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.33.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.33.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.33.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.33.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.33.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.33.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.33.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "c0f0eda081711fcaca7f33a50bba8227" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.33.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b68979fb6ccde5ac89089f712415a814" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.34.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "b797ea40687985202fe7e41ecbd7dad2" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.34.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8304550c686860c00065e3f392b8b93d" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.34.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2d5b2e72c7e012d68cc951c4d3a7f49d" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.34.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "fa45f9fb41e4a46acdea87a68acff9e0" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.33.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.33.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.33.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.34.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.34.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.34.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.34.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.34.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.34.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.34.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.34.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.34.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.34.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.34.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "bb2fea1605ac8df7fdc731da13bb71bc" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.35.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1efabdeae2ae9fd4e83a7f9ac5253726" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 31580672, "records": [ { "name": "transformer.h.34.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.35.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.35.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.35.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.35.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 } ], "md5sum": "61ef8ebd9a50e3bfcfa1d8eb7b66d1aa" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.35.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4c82015285fb52e0e4b23a023ffef027" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.35.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1a9c4da81a1c623a45d69a4b7c1b6449" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.36.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "f3b2fb19fe71b291dd529243f417bddf" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.36.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "552c819d816545a1df8b6f240ccaf520" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.36.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "705c5e2b2225b9c534a36a0e3c288c4c" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 26284544, "records": [ { "name": "transformer.h.35.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "transformer.h.35.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2359296 }, { "name": "transformer.h.35.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2371584 }, { "name": "transformer.h.35.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2383872 }, { "name": "transformer.h.35.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 2396160 }, { "name": "transformer.h.35.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 2408448 }, { "name": "transformer.h.35.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 2457600 }, { "name": "transformer.h.35.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 11894784 }, { "name": "transformer.h.35.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 11907072 }, { "name": "transformer.h.36.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 21344256 }, { "name": "transformer.h.36.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 21357056 }, { "name": "transformer.h.36.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23814656 }, { "name": "transformer.h.36.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 23826944 }, { "name": "transformer.h.36.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26186240 }, { "name": "transformer.h.36.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26198528 }, { "name": "transformer.h.36.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26210816 }, { "name": "transformer.h.36.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 26223104 }, { "name": "transformer.h.36.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 26235392 } ], "md5sum": "6c89b81049a768bb00a2edc703096068" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.36.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "b711e552379d561d798a3f7b9eaad35f" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 19660800, "records": [ { "name": "transformer.h.37.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 0 } ], "md5sum": "45946233a139ee5eb0d1bd1286ba9a83" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "transformer.h.37.attn.c_proj.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4aefa39dcbdac601e387adbdf9af3e35" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.37.mlp.c_fc.q_weight", "shape": [ 24576, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "009466fa17ed38a4982e073fb5c45ed6" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "transformer.h.37.mlp.c_proj.q_weight", "shape": [ 6144, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "dd8a3c07257ae8438e9e5efb69370338" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 33276416, "records": [ { "name": "transformer.h.36.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.36.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 9437184 }, { "name": "transformer.h.36.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 9449472 }, { "name": "transformer.h.37.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 18886656 }, { "name": "transformer.h.37.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 18899456 }, { "name": "transformer.h.37.attn.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 21357056 }, { "name": "transformer.h.37.attn.c_proj.q_scale", "shape": [ 6144, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 21369344 }, { "name": "transformer.h.37.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23728640 }, { "name": "transformer.h.37.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23740928 }, { "name": "transformer.h.37.ln_2.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23753216 }, { "name": "transformer.h.37.ln_2.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 23765504 }, { "name": "transformer.h.37.mlp.c_fc.bias", "shape": [ 24576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 49152, "byteOffset": 23777792 }, { "name": "transformer.h.37.mlp.c_fc.q_scale", "shape": [ 24576, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 23826944 }, { "name": "transformer.h.37.mlp.c_proj.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 33264128 } ], "md5sum": "913f3518d60b3fe2473348030eb4e5f3" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 31592960, "records": [ { "name": "transformer.h.37.mlp.c_proj.q_scale", "shape": [ 6144, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9437184, "byteOffset": 0 }, { "name": "transformer.h.38.attn.c_attn.bias", "shape": [ 6400 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12800, "byteOffset": 9437184 }, { "name": "transformer.h.38.attn.c_attn.q_weight", "shape": [ 6400, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 19660800, "byteOffset": 9449984 }, { "name": "transformer.h.38.attn.c_attn.q_scale", "shape": [ 6400, 192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2457600, "byteOffset": 29110784 }, { "name": "transformer.h.38.ln_1.bias", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31568384 }, { "name": "transformer.h.38.ln_1.weight", "shape": [ 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 31580672 } ], "md5sum": "903d85cf3e729cd1dc5a7ab56c7dcfcc" } ] }