| { | |
| "metadata": { | |
| "ParamSize": 195, | |
| "ParamBytes": 7642159104.0, | |
| "BitsPerParam": 16.0 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 197001216, | |
| "records": [ | |
| { | |
| "name": "lm_head.weight", | |
| "shape": [ | |
| 32064, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 197001216, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "48a04baa06c8140d1ae4fd61ea2a51b1" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2065d9f788567547a774f5ffb9f1131e" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cb9728ad4fa6db615b1caad12129df66" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "93eb83c12f0c5e8a7c3108b394c91f84" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "608636bdc84e74e9c06edc92b184ad51" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "016c70a051cf51d54861397f6e389ba0" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.22.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6fda1306b81ea8a098567dc6d73b273a" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9fbb3dbb48f84112a1cfdd1a3dfc1309" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fe08a9496e9398f79dae924ceb0067eb" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "50bd13a3e1b4f4f677030aa126d58003" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.23.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a4bda16fe3cd9cc9a45a679920864915" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a7c2b8f5bc96ef8751dace0f4943eadd" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ebc48a19827d58234b2c8bf0681a127a" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4b73dc6122a84c6529a5f0ba77b6f9df" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.24.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8067191930fe75a1d628d5cd61a0bd37" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4600584c01f8b59b15bbb39157f83ea9" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "dee6947e469b73541a20200c4dfafc60" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "48e123914a2e6b87cf0f84b551c39274" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.25.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c97246a9dc9042ec19934b2cf8b30594" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cef6f7e1c2aef1f38f38ba9f0cef500d" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "491dd3fab5b55ea5b79732be5ac73ad9" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0cf916f8ba3a5b21acb9979546b353c8" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.26.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a09a3bb9780b39448747ff97bfb01f77" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bc70e157266b370a65b2260fb956ef05" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e3049c1414aa0312b11252bc0df8e012" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a3cca75a3b6fab52c9e6d2925ed8c740" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.27.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a8a572beb8324e103d72a1c492b447c8" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "838f4f01a65960266efa7b3dcfcd6f62" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "98d5793d5b479999176cc86adf256624" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "54d4c13f9f91500f02e4e550a952fa5a" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.28.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "07cb6d2348769db041143c3e0774ecab" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b292bd2096b63ce0b3a69d8017346d07" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "710dfac38663642c90b81a90955e53f0" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "184397f1a9cfa93527bd0b47bfc0f4d9" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.29.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c0b0cdb47135abd8be76c8cdf519687b" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "26b1c36720773b1715560a3380cfe5e8" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "65ab4c77f8d88808327b115184c44056" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0a969f022b81189c9675ca624838bb7b" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.30.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1f1118702f5f96cb08d2eadef0cd73a5" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f6391b80aa186aca2df8d816b3d5d20b" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cc354c852575d11406c100e4ecb6a360" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2f75fd301b667261b7b5cd83e306c61f" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.31.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e0774dd3e2453472fe59ba3797255c73" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 197001216, | |
| "records": [ | |
| { | |
| "name": "transformer.embd.weight", | |
| "shape": [ | |
| 32064, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 197001216, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2794efed4f90da2679835636c49453c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aca7213bce912376770af1f9a7805546" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7d45b5a1d117d0fb2f97c2566b85c080" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b29c48242ab28d7751664fd917cb53d9" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.0.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "43ffb7ce1c9bb16f42f8cd92533105ac" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fba6c01b38085811a0b169a689e9a8b0" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "754739f276a532078fb8adf8a358a84f" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b08e5972a3ee4dc0a5d2446c74b78667" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.1.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6a326d18c9a02a0ff844de3209d9d239" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5559680d21364578cee8b4f3b884c387" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b5a2560d04db862c4b0b44d480b3de11" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d7b4d5eb54d90c8b88cfcdcb7ab7a6d3" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.10.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a2f33fa311c565fc0ad4b0b2ac9c24cf" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2ea00513628cede454214266723b73fe" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6f031b6a01f57cb949b87939fe3a6fcf" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7629386137cde4883fe6092961628908" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.11.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "74f94e083aa4730d2e928a652bb93aff" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fe1dcf39db8e8c03468b5714d7574f1d" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c55494e7965fcddc67886f2719fa4f28" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "01823150ed94ebda1ac03ff514674d59" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.12.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f7606df9f1f8a7ae529f3c3d84812cfe" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5efd0889d320559ec7d2587d88560310" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fde3986982d33d33d6f9033954c4f76b" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2d6c2ac2c20271403136de2b690e2c10" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.13.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f3e6c1bc7e706a4ca274baf2bb296a3e" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a6206e415bbb24b99496af3833bde153" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ce3c652a54a9d59b42dfd107c214fedb" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "08432e0cda4f3d4efb5af8226d292545" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.14.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4da8e23a88baea7d245115482c783b88" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "61fe1b44bf673ca96c6f5febd6dacaaa" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d03015371c3aab576d7c554299a64a19" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ba6b9594289c02e4bd8405905e3b9345" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.15.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "30b9eab0df51695103e0d2c8281853d2" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a87a68606ce19491b283ebfcb4f261e1" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3bb222858ea27693a823cc6e86b89394" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aafaa8e5e6341a8b186315f30b33eeaf" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.16.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c0ec806ad19c5f93174e13e77343e9d1" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fa41b02a47344ce12ce2462ba41cddae" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8a5537e9896e145836ebf99dbb1d617b" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "56aff7c581b6f3aa14835e0cf733a113" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.17.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bcb98fa15aca763650cfb90aad95ff8a" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3d3b925b996f918f06902cb77831e82b" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "006e594d0ec96f0a2b3824eeabc4405b" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "73d5f7100db12528051f7f19aede3d60" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.18.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9f9e0dad8db22348008c009241057bba" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "64cba4f187a6c45a05dc06853132d7c4" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a10d189b10ace93718ab187f3f78b402" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6a8801a5037374d1317431d8ed723218" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.19.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "329a57ff77416e0507f6fe9e662681e7" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "40578ab77b8ea1571d74b9a25d46d821" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e88194e0aed044702686ba4edd832d5d" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "baca8426aae056bc7a3ed32f5f7fa3b1" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.2.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "43077c2fdc9511c9bc7750d89ad53321" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "16c500aee75a3f6fadfb79388040ce59" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "35e566f071950f559d9a68ed651cdc0e" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bc4c11294dcef27da24c0d0c41cddb81" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.20.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6c09843d6224c0c491c11052703711d4" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5301ec982f0f0bee7cbfd66c5f10db1e" | |
| }, | |
| { | |
| "dataPath": "params_shard_101.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ad69d774f1ef8d178b241ed77788034e" | |
| }, | |
| { | |
| "dataPath": "params_shard_102.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "71b49e1b1e72c73d95a73a80ff85cf56" | |
| }, | |
| { | |
| "dataPath": "params_shard_103.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0a14557ee27469ea0c91accf21f1882a" | |
| }, | |
| { | |
| "dataPath": "params_shard_104.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.3.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1f612196539cf259ff0ba018a1d3fdd8" | |
| }, | |
| { | |
| "dataPath": "params_shard_105.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "125b30cce73232acdd57017b751b8ec0" | |
| }, | |
| { | |
| "dataPath": "params_shard_106.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "174b431f56124195a88de449325221d5" | |
| }, | |
| { | |
| "dataPath": "params_shard_107.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f47846816d3009156669ef65dd815b9c" | |
| }, | |
| { | |
| "dataPath": "params_shard_108.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.4.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fc5eab9b081abe8d88f8713d7340a33e" | |
| }, | |
| { | |
| "dataPath": "params_shard_109.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "57feb42a7dad4058566e19deebdacfdf" | |
| }, | |
| { | |
| "dataPath": "params_shard_110.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "122ada6175d0fb97ac9bd9c1c28f4a83" | |
| }, | |
| { | |
| "dataPath": "params_shard_111.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4c27a252315d601d17df5b45c4ed394b" | |
| }, | |
| { | |
| "dataPath": "params_shard_112.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.5.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "73184c595beddb967a0a1fb4adb8a697" | |
| }, | |
| { | |
| "dataPath": "params_shard_113.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e4e3eed244b4ffe0542a0e6e950d5c23" | |
| }, | |
| { | |
| "dataPath": "params_shard_114.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4f529e42dabff29fb4432f509073c9e0" | |
| }, | |
| { | |
| "dataPath": "params_shard_115.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "48c4aca8ffbb252d17c5f5effb09a532" | |
| }, | |
| { | |
| "dataPath": "params_shard_116.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.6.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8a0f2cc6cd07a2208027ced9f52c3ad0" | |
| }, | |
| { | |
| "dataPath": "params_shard_117.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9657e1f37ef7a3a950ff28e1b1c8d288" | |
| }, | |
| { | |
| "dataPath": "params_shard_118.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6c071534349e4c2b500aa75b5f207769" | |
| }, | |
| { | |
| "dataPath": "params_shard_119.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8e49ccfc80aa0ffe5526847f8c955f83" | |
| }, | |
| { | |
| "dataPath": "params_shard_120.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.7.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "db54d2b3c23b16af51bc756ecf127307" | |
| }, | |
| { | |
| "dataPath": "params_shard_121.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f9ab4d31e88e01d5d03b3cc9de36a71c" | |
| }, | |
| { | |
| "dataPath": "params_shard_122.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "13f0af026f446e192ec681cae51413d4" | |
| }, | |
| { | |
| "dataPath": "params_shard_123.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "084906da91b4132050bf217661c40b19" | |
| }, | |
| { | |
| "dataPath": "params_shard_124.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.8.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e5c8f3b8d7bc024599ea7c12e86e12b6" | |
| }, | |
| { | |
| "dataPath": "params_shard_125.bin", | |
| "format": "raw-shard", | |
| "nbytes": 50331648, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.down_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 8192 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 50331648, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "eaad0fe95f377541e343e2b65c526166" | |
| }, | |
| { | |
| "dataPath": "params_shard_126.bin", | |
| "format": "raw-shard", | |
| "nbytes": 100663296, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 16384, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 100663296, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f32f91ad037860d6cd65911faac9277c" | |
| }, | |
| { | |
| "dataPath": "params_shard_127.bin", | |
| "format": "raw-shard", | |
| "nbytes": 18874368, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "442e739c1bd8c0ad4a7528a629d4f556" | |
| }, | |
| { | |
| "dataPath": "params_shard_128.bin", | |
| "format": "raw-shard", | |
| "nbytes": 56623104, | |
| "records": [ | |
| { | |
| "name": "transformer.h.9.mixer.qkv_proj.weight", | |
| "shape": [ | |
| 9216, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 56623104, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cba65d66e4a5aa25c15a78e798ac7d5f" | |
| }, | |
| { | |
| "dataPath": "params_shard_129.bin", | |
| "format": "raw-shard", | |
| "nbytes": 19273728, | |
| "records": [ | |
| { | |
| "name": "transformer.h.21.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "transformer.h.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 6144 | |
| }, | |
| { | |
| "name": "transformer.h.22.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 12288 | |
| }, | |
| { | |
| "name": "transformer.h.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18432 | |
| }, | |
| { | |
| "name": "transformer.h.22.mixer.out_proj.weight", | |
| "shape": [ | |
| 3072, | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 18874368, | |
| "byteOffset": 24576 | |
| }, | |
| { | |
| "name": "transformer.h.23.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18898944 | |
| }, | |
| { | |
| "name": "transformer.h.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18905088 | |
| }, | |
| { | |
| "name": "transformer.h.24.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18911232 | |
| }, | |
| { | |
| "name": "transformer.h.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18917376 | |
| }, | |
| { | |
| "name": "transformer.h.25.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18923520 | |
| }, | |
| { | |
| "name": "transformer.h.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18929664 | |
| }, | |
| { | |
| "name": "transformer.h.26.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18935808 | |
| }, | |
| { | |
| "name": "transformer.h.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18941952 | |
| }, | |
| { | |
| "name": "transformer.h.27.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18948096 | |
| }, | |
| { | |
| "name": "transformer.h.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18954240 | |
| }, | |
| { | |
| "name": "transformer.h.28.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18960384 | |
| }, | |
| { | |
| "name": "transformer.h.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18966528 | |
| }, | |
| { | |
| "name": "transformer.h.29.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18972672 | |
| }, | |
| { | |
| "name": "transformer.h.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18978816 | |
| }, | |
| { | |
| "name": "transformer.h.30.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18984960 | |
| }, | |
| { | |
| "name": "transformer.h.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18991104 | |
| }, | |
| { | |
| "name": "transformer.h.31.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 18997248 | |
| }, | |
| { | |
| "name": "transformer.h.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19003392 | |
| }, | |
| { | |
| "name": "transformer.norm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19009536 | |
| }, | |
| { | |
| "name": "transformer.h.0.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19015680 | |
| }, | |
| { | |
| "name": "transformer.h.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19021824 | |
| }, | |
| { | |
| "name": "transformer.h.1.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19027968 | |
| }, | |
| { | |
| "name": "transformer.h.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19034112 | |
| }, | |
| { | |
| "name": "transformer.h.10.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19040256 | |
| }, | |
| { | |
| "name": "transformer.h.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19046400 | |
| }, | |
| { | |
| "name": "transformer.h.11.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19052544 | |
| }, | |
| { | |
| "name": "transformer.h.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19058688 | |
| }, | |
| { | |
| "name": "transformer.h.12.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19064832 | |
| }, | |
| { | |
| "name": "transformer.h.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19070976 | |
| }, | |
| { | |
| "name": "transformer.h.13.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19077120 | |
| }, | |
| { | |
| "name": "transformer.h.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19083264 | |
| }, | |
| { | |
| "name": "transformer.h.14.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19089408 | |
| }, | |
| { | |
| "name": "transformer.h.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19095552 | |
| }, | |
| { | |
| "name": "transformer.h.15.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19101696 | |
| }, | |
| { | |
| "name": "transformer.h.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19107840 | |
| }, | |
| { | |
| "name": "transformer.h.16.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19113984 | |
| }, | |
| { | |
| "name": "transformer.h.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19120128 | |
| }, | |
| { | |
| "name": "transformer.h.17.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19126272 | |
| }, | |
| { | |
| "name": "transformer.h.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19132416 | |
| }, | |
| { | |
| "name": "transformer.h.18.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19138560 | |
| }, | |
| { | |
| "name": "transformer.h.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19144704 | |
| }, | |
| { | |
| "name": "transformer.h.19.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19150848 | |
| }, | |
| { | |
| "name": "transformer.h.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19156992 | |
| }, | |
| { | |
| "name": "transformer.h.2.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19163136 | |
| }, | |
| { | |
| "name": "transformer.h.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19169280 | |
| }, | |
| { | |
| "name": "transformer.h.20.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19175424 | |
| }, | |
| { | |
| "name": "transformer.h.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19181568 | |
| }, | |
| { | |
| "name": "transformer.h.3.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19187712 | |
| }, | |
| { | |
| "name": "transformer.h.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19193856 | |
| }, | |
| { | |
| "name": "transformer.h.4.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19200000 | |
| }, | |
| { | |
| "name": "transformer.h.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19206144 | |
| }, | |
| { | |
| "name": "transformer.h.5.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19212288 | |
| }, | |
| { | |
| "name": "transformer.h.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19218432 | |
| }, | |
| { | |
| "name": "transformer.h.6.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19224576 | |
| }, | |
| { | |
| "name": "transformer.h.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19230720 | |
| }, | |
| { | |
| "name": "transformer.h.7.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19236864 | |
| }, | |
| { | |
| "name": "transformer.h.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19243008 | |
| }, | |
| { | |
| "name": "transformer.h.8.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19249152 | |
| }, | |
| { | |
| "name": "transformer.h.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19255296 | |
| }, | |
| { | |
| "name": "transformer.h.9.ln.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19261440 | |
| }, | |
| { | |
| "name": "transformer.h.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 3072 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 6144, | |
| "byteOffset": 19267584 | |
| } | |
| ], | |
| "md5sum": "f066d7c15dbf7a42f65846ff50a0ae06" | |
| } | |
| ] | |
| } |