diff --git "a/tensor-cache.json" "b/tensor-cache.json" new file mode 100644--- /dev/null +++ "b/tensor-cache.json" @@ -0,0 +1,3110 @@ +{ + "metadata": { + "ParamSize": 194, + "ParamBytes": 7672043520.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 1229193216, + "records": [ + { + "name": "transformer.embd.weight", + "shape": [ + 200064, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1229193216, + "byteOffset": 0 + } + ], + "md5sum": "a6a0f770aa96e933898a58fd50c5d637" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.0.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9cfd2557e123b19d11695e7260fb0915" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.0.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "8b37bf7d7e8e9985737a2243b540d8cb" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.0.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "4da16eaf72d38f0285168f45514e4f0d" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.1.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "40b99c62203019626cffab96a1388a15" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.1.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "21b86e0dc1a1a40ea415c53ab3290ff5" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.1.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "d8493994eed748bf1c1966d832eff628" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.1.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "ecbffb89703eb7aea3237393e6f23445" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.10.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2cfdcf061496c0c6d493be8fdc64f949" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.10.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2ff2eb453de6102d8f88f4ab32af0de4" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.10.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "62d74c592707e8e6cd3d66aa5a9f189d" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.10.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "7227e1c8f613c7fcfe0a90df60d14bfc" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.11.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "676ee98fdefdd19fccfce02bb326dfc9" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.11.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "49146429e6da2b6f7784b6dd698a91f5" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.11.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0640b76678c115ce10dec062ef2a6fa0" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.11.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "82f495796b8d9c03eec81e588024fc87" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.12.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9c38c70ffb42607f4ddb809c8077d015" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.12.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "02c65433dc13ad88ba4bb0c3a8e0178f" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.12.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "0655f6c9097a3568709be390b456895c" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.12.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "ca8395dd8f5ff545b0e4a94c75e135ec" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.13.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "31ec8619f3a4f5281f6d328313a0a818" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.13.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "dc01abb1d0107a3173da98f0e0b7aea3" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.13.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "40f1dd341e9343663be3ad43a8195763" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.13.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "6f22184e6b6ecce9b6e4834268d7cd0f" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.14.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "cbeec6b41c35554989c63bdf63dd2c94" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.14.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "9e6b183ea0b0368013d748a223d3980e" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.14.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "df7421b29b4374eaffd2639f0dc8cdec" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.14.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "1102da53396ce122cfb087ff82fb69a7" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.15.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "bd1c2123e46231b2672c38c3ff20b297" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.15.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "05315931ad28e2f2851dc46b7ba17762" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.15.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "bf4899d6f7aa15897fb5beafeeba7838" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.15.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "e599d390bb239935097fe94b2a1fa44a" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.16.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "179a5b81024492859111af09335dcb21" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.16.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3d6e52af0fa62f76b8b9667eeb344b98" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.16.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "25fbddc2589ea773880ac503bfb397b6" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.16.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "19fe2c4b4dab419da56f4cde9bbab67f" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.17.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "94873b11c64f06eff2b4dfab0c9ee022" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.17.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2c0b73de143a7265160e9fc16e4a8e1f" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.17.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "39a159f13536661e8dfe2021d73b4f1c" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.17.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "848f18e2903dbcd69711a4ca98194853" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.18.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "cdc490e1bd084fef7104da6c1c21e757" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.18.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "f12654262ad667db747a6f21f3d495a0" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.2.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "ad867e74db5ee25a3d9ba6a1f321cf0a" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.2.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f590ad153aa6c00df91ad06af3ea45f6" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.2.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f90e064bfb24e006f7e93308dc41d9cc" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.2.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "4b1a354b74d9d243d8ed1c993764dc9b" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.3.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "fe524c0729b79e1209e520c510c11f43" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.3.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "bbbefd04eb98a1b5bfe08c11a0fb34e0" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.3.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "a3e1c53c20aa791d5eda1bfb66b30331" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.3.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "84438bb0693241db56d6c02b00f53ea7" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.4.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "634d230e3c7bb0a333170127a21b4718" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.4.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "08a1cd3e5d24f2de4100b631d387e79b" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.4.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "cc5d9d626e372015b34cb0c3a33f81c2" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.4.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "8545c3e679b26ded4f870d8bbc2e8386" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.5.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "1152cacff42bf2654949226094ffadb4" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.5.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "4da9077bed768f2b1ae7289e8c98ee64" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.5.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "50cc76b3779afd035ada784e954aa81b" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.5.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "5ed6ab31ba79e6b77be4d6e2bd8cc253" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.6.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2bb6a3d1a87ffea8bd57a5e35b4fb49b" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.6.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "2f7f55fbd442a07212af2a6e80d30d70" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.6.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "df472c28af999282faffd47e803f1c93" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.6.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "90bd3281eda0993105c396a45539272b" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.7.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "e45c166f49e61ad68e40a504d367273c" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.7.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "cf7f944a46a468b916b51b76482773fb" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.7.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "5bdfca44b47ddf56b6c20e9e1716dad2" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.7.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "c43dd2fd57a54108dc00f67ec2e498ad" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.8.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f0941f1e090def1c01f0de784ac14670" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.8.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d8946fd8b369b06d3cf7ed67e49c9533" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.8.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "adbbe425f01d94e3900c00e1bd126810" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.8.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "4dd0df11d831e7c5f8b30d1f60e0c4d0" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.9.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "16386495efbb5fd0b7044218a8b833a8" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.9.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f749c79ab9383e87b8a15f0def5ef531" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.9.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "bb56691de944334b790a3774f5f59b48" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.9.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "44382bdf57aa0e04a3942b40d75704e8" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.18.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "d6a0b7180eff364dbed0862580fbad90" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.18.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e77d472a18f7b1cb58708051b4f1c8fa" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.19.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "ab6a6de50593885e77435f52afb23ff0" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.19.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e9253d8865e46f851f8bd5cdd9afee81" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.19.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "8ad5e96d06a179ba71d607bc720851de" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.19.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "d7797cc47b94d5fa6c43244a8a6a585e" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.20.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f2b72440cd6a812762a8e1de6e3447f3" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.20.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "43b019e7ff3bf1eb9bbe73362259631b" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.20.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "9b5171df8a436f456a313c6fdf281a7a" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.20.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "6f7c97f7c57bfac34779db2acef9b272" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.21.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "18c004b0674f29278099b44e0ae980d5" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.21.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "ea1f24701d39dcafb5b712eabc133355" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.21.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "f7529605a38d493f4bbb28fcd451d1bb" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.21.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "797939fe8ce9b7376966abca454b4f65" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.22.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "be56543783f12eedb4f08c727912db7b" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.22.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "30a3efbcd8fb5a228a6c3c510dedd336" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.22.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "434776258ce7f5ffdbf01a4f79e5ed93" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.22.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "a633c128186a435d90b97a61b414da19" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.23.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "55ae6d47eab9d634b49ef8777f2b0811" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.23.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "4ced40290bc187a01a677ce6f1f32a05" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.23.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "aebeabdc04c01612a6beddeea49f5bf0" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.23.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "5a3c8b66da1ade2b75157f12258de8cb" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.24.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "0aa19b7ae62b586d20d675771cb3159f" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.24.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "76a2405f29edc4d175cd0a70eecc7359" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.24.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "c364110c7891ac5f1ad934b4a4dca5e2" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.24.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "c4effe1f99c15bc3e558324a55864ac2" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.25.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "0ec1df8f07030da33949cea6e71367e1" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.25.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "c4e1acafbbc88ff8ef30704dd7d02913" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.25.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "aa80aa0674ffba107caf35db890f23c6" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.25.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "9622d57728546e39176a48c62259840b" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.26.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "e7301d46563bff2600452ab29d4d7c2d" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.26.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "f32f04c8cd4e4e598e955f5a7141ef9f" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.26.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "733289838bec8f41d58a95d40ef1ae1f" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.26.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "bdd31388297ac0de1ce923d2a2377b3e" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.27.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "500a5828acccd74faa9da79109363a33" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.27.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "077bae3e5e557beb8523234c92113f6a" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.27.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "af9182cf301b81b095bcfc1f219771ca" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.27.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "cfd06a50a3c16e508459bfabeb3821be" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.28.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "940c13b9129884b57b9db625917d8b77" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.28.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "e40e44ecc5bc8abbd7e64054bc32c0e3" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.28.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "19b8bd68c961ee6dca4d48ded6e0b946" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.28.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "1c37c27a264fb7b793542cc79297e812" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.29.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "cf07181e4babe5844a2a8a4554eea9c0" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.29.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "3b1453f3d97612c6ead627ec84b0ecb5" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.29.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "af806c7d80f8626e17d6bff228c3f199" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.29.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "f454c72529141c34ea962ecdc296e0f5" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.30.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "d5d8b3945454e5aaf3081921643a8ecf" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.30.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "beab24d9146d718d09032150d982347b" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.30.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "653ba0b3f023d288d8eec4995f4aae1b" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.30.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "80b23147acb83e403d32ef6b11d502e3" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "transformer.h.31.mlp.down_proj.weight", + "shape": [ + 3072, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "178a2c2643c99024b3c233b20f5c327e" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 100663296, + "records": [ + { + "name": "transformer.h.31.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 100663296, + "byteOffset": 0 + } + ], + "md5sum": "d45bc79d093330dedc3f9651c1cdcfe2" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 18874368, + "records": [ + { + "name": "transformer.h.31.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 0 + } + ], + "md5sum": "17439b70fafd8188737d55dda4c6c35d" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 31457280, + "records": [ + { + "name": "transformer.h.31.mixer.qkv_proj.weight", + "shape": [ + 5120, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 31457280, + "byteOffset": 0 + } + ], + "md5sum": "b4dfd06ecb5202a7f03a30873012a7b5" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 19273728, + "records": [ + { + "name": "transformer.h.0.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 6144 + }, + { + "name": "transformer.h.0.mixer.out_proj.weight", + "shape": [ + 3072, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 18874368, + "byteOffset": 12288 + }, + { + "name": "transformer.h.1.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18886656 + }, + { + "name": "transformer.h.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18892800 + }, + { + "name": "transformer.h.10.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18898944 + }, + { + "name": "transformer.h.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18905088 + }, + { + "name": "transformer.h.11.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18911232 + }, + { + "name": "transformer.h.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18917376 + }, + { + "name": "transformer.h.12.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18923520 + }, + { + "name": "transformer.h.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18929664 + }, + { + "name": "transformer.h.13.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18935808 + }, + { + "name": "transformer.h.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18941952 + }, + { + "name": "transformer.h.14.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18948096 + }, + { + "name": "transformer.h.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18954240 + }, + { + "name": "transformer.h.15.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18960384 + }, + { + "name": "transformer.h.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18966528 + }, + { + "name": "transformer.h.16.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18972672 + }, + { + "name": "transformer.h.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18978816 + }, + { + "name": "transformer.h.17.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18984960 + }, + { + "name": "transformer.h.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18991104 + }, + { + "name": "transformer.h.2.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18997248 + }, + { + "name": "transformer.h.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19003392 + }, + { + "name": "transformer.h.3.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19009536 + }, + { + "name": "transformer.h.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19015680 + }, + { + "name": "transformer.h.4.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19021824 + }, + { + "name": "transformer.h.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19027968 + }, + { + "name": "transformer.h.5.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19034112 + }, + { + "name": "transformer.h.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19040256 + }, + { + "name": "transformer.h.6.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19046400 + }, + { + "name": "transformer.h.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19052544 + }, + { + "name": "transformer.h.7.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19058688 + }, + { + "name": "transformer.h.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19064832 + }, + { + "name": "transformer.h.8.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19070976 + }, + { + "name": "transformer.h.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19077120 + }, + { + "name": "transformer.h.9.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19083264 + }, + { + "name": "transformer.h.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19089408 + }, + { + "name": "transformer.h.18.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19095552 + }, + { + "name": "transformer.h.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19101696 + }, + { + "name": "transformer.h.19.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19107840 + }, + { + "name": "transformer.h.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19113984 + }, + { + "name": "transformer.h.20.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19120128 + }, + { + "name": "transformer.h.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19126272 + }, + { + "name": "transformer.h.21.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19132416 + }, + { + "name": "transformer.h.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19138560 + }, + { + "name": "transformer.h.22.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19144704 + }, + { + "name": "transformer.h.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19150848 + }, + { + "name": "transformer.h.23.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19156992 + }, + { + "name": "transformer.h.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19163136 + }, + { + "name": "transformer.h.24.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19169280 + }, + { + "name": "transformer.h.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19175424 + }, + { + "name": "transformer.h.25.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19181568 + }, + { + "name": "transformer.h.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19187712 + }, + { + "name": "transformer.h.26.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19193856 + }, + { + "name": "transformer.h.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19200000 + }, + { + "name": "transformer.h.27.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19206144 + }, + { + "name": "transformer.h.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19212288 + }, + { + "name": "transformer.h.28.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19218432 + }, + { + "name": "transformer.h.28.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19224576 + }, + { + "name": "transformer.h.29.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19230720 + }, + { + "name": "transformer.h.29.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19236864 + }, + { + "name": "transformer.h.30.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19243008 + }, + { + "name": "transformer.h.30.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19249152 + }, + { + "name": "transformer.h.31.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19255296 + }, + { + "name": "transformer.h.31.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19261440 + }, + { + "name": "transformer.norm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19267584 + } + ], + "md5sum": "4d8aca5887bb60a891441d6b8e7c5c79" + } + ] +} \ No newline at end of file