diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 7be5fc7f47d5db027d120b8024982df93db95b74..80b070f4bed7e9af4606b712acf5c164e4214457 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,11 @@ --- +language: +- en license: mit +base_model: huihui-ai/DeepSeek-R1-Distill-Qwen-32B-abliterated +base_model_relation: quantized +library_name: mlc-llm +pipeline_tag: text-generation --- + +4-bit GPTQ quantized version of [DeepSeek-R1-Distill-Qwen-32B-abliterated](https://huggingface.co/huihui-ai/DeepSeek-R1-Distill-Qwen-32B-abliterated) for inference with the [Private LLM](http://privatellm.app) app. diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..d9e7e08f5c8e277065c653a7c72987d064f55208 --- /dev/null +++ b/config.json @@ -0,0 +1,5 @@ +{ + "quantization_config": { + "bits": 4 + } +} diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..eb35901aa9a4a02bfb969ba43633ddab063ca431 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,9583 @@ +{ + "metadata": { + "ParamSize": 709, + "ParamBytes": 16895535104.0, + "BitsPerParam": 3.0067237203635373 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 640, + 152064 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "fc6101d4aec293d4610b4d061da07594" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e3029b403f8ece245608f6fd480de38b" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6251fa918295a16ec39c6f675af30e13" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "2160f6cbda407f70c0d2058e2543e6e5" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8844ae2ef8aeac282075495f0ed33cb9" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "466915118d0035dc1649e89e311914fb" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 33218560, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 40, + 152064 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12165120, + "byteOffset": 0 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 12165120 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 12175360 + }, + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14387200 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18810880 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18821120 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12165120, + "byteOffset": 18831360 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30996480 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 31006720 + } + ], + "md5sum": "f91d69acede10f5069c1ceec22342abd" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 23371776, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 4423680 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 4433920 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 4448256 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 22798336 + } + ], + "md5sum": "7383675dfe8f71f4b718fef03498e0e8" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e2e01354a688dc69f8144cc0e2a268ed" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d8414fc01cf2aebe2124fe531d8563d7" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "edbe983cf5fb116f5be4df9c868778ed" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "3b003259ed167af39e45cff2ecd46592" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "381173d6f04b99b9140eee619248b581" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "fe4863acc0076e0200554cb6070eda24" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "bad07eb8706c833098a03ce01d9ffa37" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "01846d1902ae7ca82cb575e42ca4c951" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "4bf42bf914ea38476af2bf2b9d6b762f" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "362f9f9224dc5db70db690a911d97048" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "07822c54f1aed36a312b326a661666aa" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "92018761d25fba4857242764441cd44f" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9c4d43d032402f3412146b9d21ec0010" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 25583616, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 2211840 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 6635520 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 6645760 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 6660096 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 25010176 + } + ], + "md5sum": "15d17fa70d1469aab8d0ca814721f165" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "988d076db6de14f6f71de7eddae1963d" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "3922e04c8099dc2a7075d003e453338c" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "662da3b189700e3f6a1850c46b907c31" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "7cace7da99ae4fe2ac47a650be6a572c" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "683d56211627e8bc5292f446d0039fbd" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "3eb284760c8ab67c03ba7872927e84bf" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "11542ff5bd184d9bc88998e0ac220ed2" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "dfd1bdc1c0710e58e0e858d94ac936c9" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f283d6b9f469cdca977ef1160fc730c4" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0d088f69da0b55b7b6b4102bea507a5e" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "bc87ff9836809dfbbc2e0dc618dbf41c" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "3a6496625450bde8c2cac4e67d515921" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "17729c992e5d0c75b594ed819d3a9e82" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "32fa7b08f70b062813d95170945fe5ed" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1313cfbe3d5d279c89aab78c7dd81113" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 32239616, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 2211840 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 6635520 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 6645760 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 6656000 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 8867840 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13291520 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 13301760 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 13316096 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 31666176 + } + ], + "md5sum": "01bfdb70a06089856f6465df9cf91053" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "28c130303062e13e9867ecdd273eb520" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7401f32cf3624495c47489239bf75c1d" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6b6c6b2fab9e5e52f97a5abe84ae62e7" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "63a34bddc82ebee5302ca458e30eac6a" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 22992896, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15738880 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15749120 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 15759360 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 17971200 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22394880 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 22405120 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 22419456 + } + ], + "md5sum": "fa4aea231d7ccc2009b2b6ce33624c9b" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f83d520af6f7dda426db47d22783e2ac" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c7e045e595760a3b0484cf66f1406a0c" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "521a56971691e214b9ca90faeaa637e4" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "315e1edcc7b5b7abd60422187c96a975" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4d316f6b7c4644233d0ae6b23a89c13c" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "27d14b4b1d1e31e97aafd59545dff15e" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8f935992ddb22b29ac33e290c1536925" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "4ddaa2f69d23f3faf88893599e9e93b9" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "08da806435deec8ff1d7e866c1c4f4ad" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "3048f78d24e69335a8d994ce329b5d04" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "11a99023b95cb793968b219128a1a1ff" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "67c62420bbf6f33f6e47652d1a962764" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a5ad2932b54e3873c02d585bd674ec19" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d08d9715ce7f7e2dd27a32a5b488f7d0" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2d5a7414d9469f88bd484ec914a635c8" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "ec57956f5a761c7f62644ed964e01897" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c782079863df2936fa99e2da86a2e125" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "63c0104d3a6f43c3cd30816c7bbc6a27" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "44c737c41552b5840278561025dabaee" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5c9b857dbfcc0c86dfe2aa99f25bc7be" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9b9efc1e7b2ceec4ebb366343d5af16b" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "98ec1b9bdc2e3d9c1b9f4d16971a9557" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "5de32d1e2161fa0c487a032f6a786422" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "42fff9f4b4169b66386f58a8d6de69fa" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "047bdf25ae1e8b9718bc6e9fa86899a2" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0e6ba674b2fff585ca0bfe4779be6af8" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "ccde50605f7576c1147d424bbcc5bb6f" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c153b39eb9ada1b9389bc41c317d1799" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "78731ab6fbec3f26bad932349aa0cec8" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "cf045d54a2014f068b081ad85e18a854" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "91a835020a791176b88354448f7887ad" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e5dea608897e260609eab7bb82285f00" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ec5a3b1dc1862b735699ed56e2ef299f" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3e25602a271fd102178f2b94d14ce13f" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "94c15eedebd3eff48cee9ec399b45568" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fb3674c4dba50828c0830fe36c20d21c" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f6a2ba3d6835adb7cb7e85705ee04f7c" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "e168d1b80462be24b26100aaaf178899" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "670d984eaf51b058981c193efae73ca0" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "17c8f5f45151656ffc852e3ac4c3bf70" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7415ef306a89ac3a5dbd4d5d669e678e" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "290927ada0443bee687d39036b78e215" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e2d1bf3e97013975554b2d74f61b46d3" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5491dbd37b53c5bf23b22d7bf82bb3c5" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "1eb7670f0a32bc6e3dd0c704b45f869d" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "8061dfa5f81003fd8cf4d686f91f2ec0" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "146c0e71657c8b8f4494b70bcf052f0a" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5a67715366b1d82baef21f3a72eda440" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ad5e508109a7e01c2017143a27f274b2" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "f1b2b908ddfa86475f9f5ae710d45b01" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a154577a8250951a37677382d01ad8ec" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "de71cce067a02ca0ada37818d8f27666" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "223dfd63fe54f30df6b07b41f697806e" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "f93b71fb4311e4060f196a495a200b36" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c00608fda13cd10f654215798d88e4ac" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1d882d2eff186a652276ffa990c15e7b" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "ebeb8f46fc869a6abbf7b89a55763ddf" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "916308043a1763a5e83d36a8db0f3765" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "981b0845839d3fd7098cd49bda85e0c9" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "1248900a5594913295e8afff88097682" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "1284aa6448ebe6f018e895e2cf7c55b5" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "521c70b05f84d659a335fe56655921f0" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ad4de251162cf9a61f4f816afc6ca4de" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ec2379b078f28200a6f640640aac2c12" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "c0427aaa3a070e861c0bbcc103bca61c" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ec0e8ea926bbb26541e6dd0e3b77714d" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "71f41c693cefa3f6e85166fc9c891f6f" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "9bf243f4e924bf234f3406b81a8aa1e4" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "9db73f691f03ec47235d11b23ed2db5c" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "91c29af436e345b2a987688dfa26a410" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "9138cae9f76dccb17b826ef39704e992" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "499bf327e448962f0ae6d62d7f9a4ba9" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "4e0cd3413a73ce538428ed16ae5dcb31" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "bf50c050fbf987ec5b5593e1dc945a49" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "bbb90e25017cffbe18397efa855b7d00" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "6000b0c60e780281df0a5350d1bb52b8" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e29fd3a940b09cea9e30a64371bc6fc1" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "508df8e94b36089c6adaf09e6f5c18b5" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "6ae263c3fe7d2287c1a199e28f7254eb" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "37a2d4f64852b8ec663351859fdbbe27" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "35dce17591aa7e72735ff1dc72b0fb0d" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5183ade84b789b368c3cb6aee59018d0" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "9d44cdca45298fef35ae91ab4bf7aab6" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "1101f01fa1dca18d999c610517fc1be5" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2df0bdd1dbb4e38f3af7a7e41d78e994" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "566c7ca28e2fb564c83c026d2d712280" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5327d4c2b80201097e105468cf1818c2" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "2555648e42a1e949e2f6118dca592b1c" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 13516800 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 13531136 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 31881216 + } + ], + "md5sum": "18923dd8c542d137719350cbcac7c729" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6faac75761f59c59dd2860962cb0ad85" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f4f6ded7e72f781b5b6e630cbeab90ed" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0b2c642f4d29d17540a636b2468a432c" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "9d1cff8f23803b5bf5a2b794531292ef" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 22992896, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15738880 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15749120 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 15759360 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 17971200 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22394880 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 22405120 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 22419456 + } + ], + "md5sum": "48f89a878ff6ca729e651bf6ab17882a" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fd078c33f9b10be6663cc8e88dc3b51f" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f0c3a2787643f74b5cd4c6062f2736ba" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ef1d2b2d08351939eabc8d46b68c4085" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "d54c5973f42da61423c72ef6ca8a3677" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "55873604ed6f7170786b160e30fb00d2" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f440cba338f668801935d6a872854998" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d70f5dd0492484749a0ad83d7795e080" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "138c126ef8392974916c230562cfd0a8" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "bb31e974f3cafa0faf397aec69732738" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "bfdd594b2e0b527009deac3ee7de651b" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "38becfa913846fd05a71920cd7be6181" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "689fe9e772d795e3065cfe7f2cfffc63" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7a17957cb133a0a9a18bc03ec00148e2" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ed1167fa78759dfddd7b6f0c9aa36aec" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b5b2c5d00842603a409efabe6efb9ef8" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "6874980f9ed2f4b4a98c46bbef52686e" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0f5139342bd2330181946069bad8932e" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "8053c02def42e8329962f485121c0784" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "f6a80fa5d410f79d2ee28c827e76f389" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0ffd0ea2b99710726241018e2e1f59dc" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c814f1f6e192ed8d9e024519acb20f00" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "edb3e29b389d73b46d0e5d2382163dc1" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "f0fad7c3873216f6e109acba9d6167e2" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a1642231b268c799d2e2b8d827bf8a18" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5d58fd8f7fcb7eef70f44550c135f444" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "46aad16dd5aacab50f6b2a119bf9b357" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "87549932b108075a97f6c6600124a623" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9edb9df9ccbe71e35bea94aa9c63abfa" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "94041b740cf313126f16564e027d6f8a" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3c2ed43ba647fed62a4f3e6f066509bc" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "f2e82eda706144228417019f2f0730ca" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "efdc5883e6796a86c7777f51cc413a6a" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d81e5fa36d4e5f33894338bb4a7cbbf3" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e0321674f40dcdd3a3301ac27bd19442" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "d94f6f874d7037c885f94bbb6796a549" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "55069dfd52ef66b194940c4bb40ed14a" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a8ce2b3692ea88b7d8aa7ee447ce6144" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "fc988b62e74e5291f380189b3e04c7fb" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "17a797a52dd9aedefa4034fc7d115e14" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2eb13721c280554e2d2af5fea56ad871" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ead8b46ffd2d96bbad5f483bb6d6a2fd" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "0ff3b2fa1a4a32a755a1e067aa061966" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6744dd21845a6c09eb6607c3de0c4ade" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d39bd04c8e650246e0343b391d2d2fdf" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a9cceef1c4f10b80a0e577828b98b9e2" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "015b630dd5d658a5bc4b9e8e81eb9705" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "134f8f08eb2eab9cf23726968dd63254" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "667fedcaacd7d6601b91d8024a6a5c03" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "f6458ebf3084f7191825e839e8b45ab4" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "11ec0bd12937d82e754bb0bad994719f" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1d164a2f767ec826ef0b66fdd04194b6" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3d49fb6e87e37f9a63603a2c2e4362ce" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d617ca69e851ccf45af4b512a4586e18" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "014ee361c554f7112d2580ab0f857681" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6513d3e2ba8847ca0d8245f468304a06" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "846d3abb00a997cd2f8280419cef6479" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "021a2d3be0453079e6a3193d8e80957e" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5be340d54a989e899e600b563b238ab6" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "4ad2555277c722edd7e2b2dc1ecf135c" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a0fd8a06bf5a867d9ae53b9fce99a0b7" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "7854ac85b852273233b0d2e6117e86e4" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "34b9d9349ddd8cf49808ffb7c6b075ae" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c04fd035760c02f1c21799758412079e" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c3401689e6a2813fd6484e527bca9560" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "591d3e470695efee4f8fd27ca13a3a5f" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3ca37a25b077b7bb334f3dc97a35d18a" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2ca832d2e16d16a2c44ad237b4a16c99" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a5437fd6f06c37f5679852a685ab7fd2" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "6f5307031bf9d45df5cb171d2635475b" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "54c5429583dede86649689ec2ab8b5b3" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ebcac004f10c100bd81659046d087bac" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a3b82dc091150d33d0449956bec14d97" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "41c2d499b44e41e0c460c4d3f7f1840d" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8782171263419ffcba792edc4716b929" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "53939b7a56c1a64c438be58b633aec46" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "07affe20381940744c7a4697e35c5285" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e1840a20f8f194bfda961cbc9556c4b6" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d3df75be6263785d7c4a0060066d8eff" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3a475b8411f5a4a528b176c45d7ef13f" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "17c5b1f6a98cbb6fd69b23da07384dba" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "043a418fd4741c6fb73e862c3ac3a333" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "25abdbeda9bca81c22e92349c047bff8" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "99799cde1f94b1723a1f74a66655424e" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "cd9a98f878bf05b0c349bc09792522d9" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6811140f8fd3c2ccfdc36cbd3635bb9a" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "fc3be65439600ae74cb0a8ad148ef0cc" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7b3ddcfb2f20215d3bd6196dbd4d94fe" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "22364da1a7abd47b678285ee5eb05e40" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c174ec738dfb8456281737e006e469e6" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "325dfd8639cbb7fcc33d9c747440f3ee" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fb09595adfbace3daac86f7f65f8ec3b" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "7bcc8dae46dd1a0c30aab25a6d2b9dfe" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "42dbd9855c7349ff0253b5da8f57f92a" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "fcd4c5f19719666120a5b6ab3ad23858" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "c515e7873bf0b40ca571f8b11b6857cc" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3cb562dddc89aaecff8818e2176f882e" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "72818e9acee43d6f5581e22c647b2712" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "e376fe84f1acb5863db9fad9e2870ba2" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "e8b028836a12b345652520f0a4bdbda5" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "27a09084fa70fa0676c1ec4f6acd43d3" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "245084597103e1bfeb97ebead6f66af0" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "f56f62fcda0e9c92a0f4b705dca7b271" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "7d40707a0bc7b7ef2aadb6761aae7b7d" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "aa1655cde65daa1a17d8602bc15016bd" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "607a18e0e585037ea20f1c9567b4dc03" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c995c8ceed104b5e58e1ff53ef944869" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "4100bd85a60b3be258b3607197cf7544" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 13516800 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 13531136 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 31881216 + } + ], + "md5sum": "667983ae1fbe7494d55b39e2a7aaad75" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 13516800, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + } + ], + "md5sum": "98b90c357be8fdb1592eab51568e1c17" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..19fc4da77a92642181d862d33cee504b96cf28e8 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b3adcab897bcf2e74f041a77f59f2368d608b312b3682f58eb3ebe020c621e +size 389283840 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..12f0c65f55572612a2bb19850bd8b3fb41c5b615 --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba31be5205ae9a53aee2a9445f146c9ab3af8fdbd32b7646bda5574add9f963e +size 70778880 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..911373b70d284a738e89d151d9e0639061f88f85 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbbfbd566b2f9c3208fa41ae97c313eb1c310a5ba1fcea105bc7a598aac91fc9 +size 18350080 diff --git a/params_shard_100.bin b/params_shard_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..04cfc9208e787c13000048df41682461b316746d --- /dev/null +++ b/params_shard_100.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a300b5993c1253fedafd0507bcc7c3b54e2615e46717175a03eb29bf241b28 +size 70778880 diff --git a/params_shard_101.bin b/params_shard_101.bin new file mode 100644 index 0000000000000000000000000000000000000000..7bf75069bc7371d5acc6b14eabeecf3a4bb89f40 --- /dev/null +++ b/params_shard_101.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d06456379b0774af22ccc50684820767104a8616ffdc81ad7c1f68dfeecc7a8d +size 141557760 diff --git a/params_shard_102.bin b/params_shard_102.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b3ba39cc67f844ff8745839b1ef587d1292fcd1 --- /dev/null +++ b/params_shard_102.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c09cef71a5eaf29c077878327871ccbcc3b7b088411c7e85cccc1a74121945d +size 18350080 diff --git a/params_shard_103.bin b/params_shard_103.bin new file mode 100644 index 0000000000000000000000000000000000000000..2276c1ddf6a2a9ca7000cb4464f2cd4331c7379c --- /dev/null +++ b/params_shard_103.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec049e78751699a62d1134bee479092c645f07cb874f43fc828bcd04e6d34a9 +size 20760576 diff --git a/params_shard_104.bin b/params_shard_104.bin new file mode 100644 index 0000000000000000000000000000000000000000..bc05f8067402b58add64ee996265ce9ac153b5bd --- /dev/null +++ b/params_shard_104.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca13a1b6f62da05b1afab6f6906955bc64738e4684712a0892f4a7e0dbb28b6a +size 70778880 diff --git a/params_shard_105.bin b/params_shard_105.bin new file mode 100644 index 0000000000000000000000000000000000000000..5e84827520241a5128f76edb33a5f93155fe7d8e --- /dev/null +++ b/params_shard_105.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07164c48ebe61716e90d281c25a86db09983c9b59f3038e58914cf9e289da651 +size 141557760 diff --git a/params_shard_106.bin b/params_shard_106.bin new file mode 100644 index 0000000000000000000000000000000000000000..788897441621557c9196a05b60ddab8e20a72b0d --- /dev/null +++ b/params_shard_106.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e73b1436617fc9c43d731ca6003689fcb30f717ade8aa9bc85ef2ff6ef42db9f +size 18350080 diff --git a/params_shard_107.bin b/params_shard_107.bin new file mode 100644 index 0000000000000000000000000000000000000000..cc32b77bfa0a96b5bce7c7892d7c916ded06a717 --- /dev/null +++ b/params_shard_107.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c605d0593ae85b882cc97050a06c58bd6155d8e43cc0fc01bf4c675e51e8cabb +size 20760576 diff --git a/params_shard_108.bin b/params_shard_108.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff9dac3541e53e3e3e45315851939ce592948603 --- /dev/null +++ b/params_shard_108.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e44a9512eac1238765d2c2fa25f23fc99fa4bfcf8427cfc4f1097cd87e9a41 +size 70778880 diff --git a/params_shard_109.bin b/params_shard_109.bin new file mode 100644 index 0000000000000000000000000000000000000000..19b104a896b6a23596fcad5f8b615efb13fef215 --- /dev/null +++ b/params_shard_109.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:799b871b05f2bcffd69d9384bbe5cdc6a60e860d8eaf7f5fe828e118aea5717d +size 141557760 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..a689408b0ac0f4ad1d675ae4021ad3aeb42bdade --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07f44a5047ab82586e8e45d2fe86699be5e4c457db4bf15145776507e7d82db +size 20760576 diff --git a/params_shard_110.bin b/params_shard_110.bin new file mode 100644 index 0000000000000000000000000000000000000000..be3463d4a82f93d1d871c8c6ada35907d5c8f677 --- /dev/null +++ b/params_shard_110.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b2c1ed57efb43b8f6831198c627709f5377d48e21a8a4f9dd71d8e680aaff99 +size 18350080 diff --git a/params_shard_111.bin b/params_shard_111.bin new file mode 100644 index 0000000000000000000000000000000000000000..81877af479772b7bf146e6155817614a6e6fd155 --- /dev/null +++ b/params_shard_111.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c51d16974fdc5c282df0d2b2f75ab55f9c4c866c508817975de7cc2db64d2d2 +size 20760576 diff --git a/params_shard_112.bin b/params_shard_112.bin new file mode 100644 index 0000000000000000000000000000000000000000..6401a8ac23961303b506dc62abf30d14fba6e0e0 --- /dev/null +++ b/params_shard_112.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9d136994194a231b469afd30d4341764af269ef3bbdc7268d964a991035101c +size 141557760 diff --git a/params_shard_113.bin b/params_shard_113.bin new file mode 100644 index 0000000000000000000000000000000000000000..0f46ec97db1623fed9dc750add0c42958164fe60 --- /dev/null +++ b/params_shard_113.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60f04a790b7a6d7f87149ba3e5fc371997d02350246bfeaf1fd8d7d1f58f8c0e +size 18350080 diff --git a/params_shard_114.bin b/params_shard_114.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b7b0bbbdbc566371e2e39b128efa391ad87e238 --- /dev/null +++ b/params_shard_114.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a9187433bc6129eef8137af4d2a5fd54e594ae06e52577ad24e2ccd6114f606 +size 70778880 diff --git a/params_shard_115.bin b/params_shard_115.bin new file mode 100644 index 0000000000000000000000000000000000000000..535d6b6aff68624b6ea95ee7dbaadede0556c9b0 --- /dev/null +++ b/params_shard_115.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fdbdfbc9832fd33e3e3ef9604cd3a18a259e1b85e8f456c2e4277cbba23cfc2 +size 32055296 diff --git a/params_shard_116.bin b/params_shard_116.bin new file mode 100644 index 0000000000000000000000000000000000000000..045b159910e03c4e9e886059c3ac412bb2bd9ff6 --- /dev/null +++ b/params_shard_116.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14866dba310f779d2473f1108480cba151949c9f65beee8584dd44e6c2bf4159 +size 70778880 diff --git a/params_shard_117.bin b/params_shard_117.bin new file mode 100644 index 0000000000000000000000000000000000000000..049fb6a3092db05295c61d1b1e54ce6a7eeb2cd2 --- /dev/null +++ b/params_shard_117.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad597f681bfdb7dfcfe65fefe533af7a509342c7263659c87c2aabd42b69b17 +size 141557760 diff --git a/params_shard_118.bin b/params_shard_118.bin new file mode 100644 index 0000000000000000000000000000000000000000..85b8ee5f340617feb374e0b7417dd6792fe260a7 --- /dev/null +++ b/params_shard_118.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a3b358da70c36df01a9e7342c377834526461c33b035c5ee49d1314fa3041b +size 27815936 diff --git a/params_shard_119.bin b/params_shard_119.bin new file mode 100644 index 0000000000000000000000000000000000000000..76b4aebf19bc1c18c93af86cab355314a4bc6693 --- /dev/null +++ b/params_shard_119.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:906d59408b431d8151ba1978ca22066f2ebf57c2bccc7eca3adb005e4bb654ad +size 70778880 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..74f315f71ac6abed48988071471611e368c52082 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6b5a5cb9034ea265e7da0acb8e624026fb26b048efb9a0b7f152256571f480a +size 70778880 diff --git a/params_shard_120.bin b/params_shard_120.bin new file mode 100644 index 0000000000000000000000000000000000000000..2acd3cd26c7125440a8ffdc4f5eae8578e8a9f15 --- /dev/null +++ b/params_shard_120.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffcedd1b6920686caf9eec934f59c862e96dee2dfb0a9f765f4be57aef474667 +size 141557760 diff --git a/params_shard_121.bin b/params_shard_121.bin new file mode 100644 index 0000000000000000000000000000000000000000..c75c82439de6a75fa4f283a113b8db8d45d6253d --- /dev/null +++ b/params_shard_121.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6a658a637c7e361bf96048f19f696d6bb5ff91fc5f7124f69c8b23b673b0d87 +size 18350080 diff --git a/params_shard_122.bin b/params_shard_122.bin new file mode 100644 index 0000000000000000000000000000000000000000..49101b83c1a8d8ba81d926d05589e0814d81d0e0 --- /dev/null +++ b/params_shard_122.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2261a5b30254ef754dd817f208db33b0bdcc59c8c6952c211b4eafcfd2b83d34 +size 20760576 diff --git a/params_shard_123.bin b/params_shard_123.bin new file mode 100644 index 0000000000000000000000000000000000000000..f8dcf1fccf89dd6283ed5ae68a755c9df6e20b66 --- /dev/null +++ b/params_shard_123.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e957665735b582ee2b18f9232360584fc22879ff17971c414357f79f486c45f3 +size 70778880 diff --git a/params_shard_124.bin b/params_shard_124.bin new file mode 100644 index 0000000000000000000000000000000000000000..19ef4c98635bd78bbce9304070a47fbdbfa4a536 --- /dev/null +++ b/params_shard_124.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b33d89a5e9dd3e7a11900adde29953e20e96e2ce3f0593d2af7d88687b5d0aa4 +size 141557760 diff --git a/params_shard_125.bin b/params_shard_125.bin new file mode 100644 index 0000000000000000000000000000000000000000..b83183656d9010ed67fb3c5737b1d29d1d3d051f --- /dev/null +++ b/params_shard_125.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf4d4911696f0c6a25be18ee1c83ba7e8328ca069e1803596799404294ea27fa +size 18350080 diff --git a/params_shard_126.bin b/params_shard_126.bin new file mode 100644 index 0000000000000000000000000000000000000000..fc1f2e86314b198b4474e12cf1326e36d738426e --- /dev/null +++ b/params_shard_126.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9873e0408f8870dab88354ebb44fa7dff9a1ca079afa6aecf633cbef75ba75e1 +size 20760576 diff --git a/params_shard_127.bin b/params_shard_127.bin new file mode 100644 index 0000000000000000000000000000000000000000..b19a20b32c61d29fedea8328e0c4e813df561f2b --- /dev/null +++ b/params_shard_127.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a388dea7cc8aa693d044c75ffa888b35b3d3a345eb9da7832fdb493b98d03e57 +size 70778880 diff --git a/params_shard_128.bin b/params_shard_128.bin new file mode 100644 index 0000000000000000000000000000000000000000..f1e3f989ad7c0911fbdebd9a92101d691dbc27f5 --- /dev/null +++ b/params_shard_128.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98003c402ba0ed7860d31922960e3402d598f531e2d684a1c8fa07b61518e624 +size 141557760 diff --git a/params_shard_129.bin b/params_shard_129.bin new file mode 100644 index 0000000000000000000000000000000000000000..7efc159e5b1ad65d8d4e3d2e42126d7278208418 --- /dev/null +++ b/params_shard_129.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65727735a321cda5d454f6fd095fcdd1e2f2c56fe38f145cb524bd4b6eaf2f87 +size 18350080 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..8d7bb60b0f3bae894a06692bbf2d76de2024ad0c --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6ddb4375a21c26b9e7b36924e319e6f6f7cb88cba8a569a67735ba6fb2f0aff +size 141557760 diff --git a/params_shard_130.bin b/params_shard_130.bin new file mode 100644 index 0000000000000000000000000000000000000000..7eef6a7d7a653b8e01fc42501f72e1c8891f5906 --- /dev/null +++ b/params_shard_130.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e73e49a0b395f944afe85b98d2a7abb5871d05f5515cb4f935f4ff4547d9520 +size 20760576 diff --git a/params_shard_131.bin b/params_shard_131.bin new file mode 100644 index 0000000000000000000000000000000000000000..383db983e492e90707219fb95f50371698a2b012 --- /dev/null +++ b/params_shard_131.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ffcf3e1aec29108c26bdb325e938b47a64458b4136ac42e0e30a66f040ae257 +size 32454656 diff --git a/params_shard_132.bin b/params_shard_132.bin new file mode 100644 index 0000000000000000000000000000000000000000..466bf2e474e7312d1bfa2d28ea9a709f1685d6a4 --- /dev/null +++ b/params_shard_132.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb4883ebb35d7637220a30304cf646445e36310bf74360801e3c9a728894c2f +size 70778880 diff --git a/params_shard_133.bin b/params_shard_133.bin new file mode 100644 index 0000000000000000000000000000000000000000..83a2b7224b4fcc7325e0690db6b940cc84a72284 --- /dev/null +++ b/params_shard_133.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5ed5b83e22f50d011b3a6c3303ae4136e34f6f5cc7c19c803c1b427a9f6642 +size 70778880 diff --git a/params_shard_134.bin b/params_shard_134.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ef3b712cb94cc3d2f2675d088afe473cec0c396 --- /dev/null +++ b/params_shard_134.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6d7f210b25f70de7823665b5f61f22a17e62274bd1f98e36b0399e02aeca08e +size 141557760 diff --git a/params_shard_135.bin b/params_shard_135.bin new file mode 100644 index 0000000000000000000000000000000000000000..f2e78fdbd26dfb6a6557fb233eb542d5f55cac70 --- /dev/null +++ b/params_shard_135.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60abfa2d73d1c01e5838589b6d8ad13395b4b2cc03f572fa936dd2584f558c19 +size 18350080 diff --git a/params_shard_136.bin b/params_shard_136.bin new file mode 100644 index 0000000000000000000000000000000000000000..d93ce93d006c85fe41513e7856afef5676204244 --- /dev/null +++ b/params_shard_136.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fbaaa7b4f82652df8435b86e6c39e524454189c569dee777f61fa71934f5055 +size 22992896 diff --git a/params_shard_137.bin b/params_shard_137.bin new file mode 100644 index 0000000000000000000000000000000000000000..31abd5a49026ca46cb513e255b9dc80af3c6536b --- /dev/null +++ b/params_shard_137.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e10c8ecf01eebcd3a38858bd8cb0b8d4b7f77effde9c435c55f2c4669a6d0d94 +size 70778880 diff --git a/params_shard_138.bin b/params_shard_138.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c13255f5576364d2bf3b5dce5f1afab089e50d4 --- /dev/null +++ b/params_shard_138.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d493fa661b3c94148bb737d09a3e2579f2b738bb9423872aeb132a715daf9e +size 141557760 diff --git a/params_shard_139.bin b/params_shard_139.bin new file mode 100644 index 0000000000000000000000000000000000000000..3776d1fa60de7722bc3f3c411f9b4755ea84f2ea --- /dev/null +++ b/params_shard_139.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eda3c6b4bb5b97bfa3c02989f1800460f0ed5a06d501374dfe7589c3206ed717 +size 18350080 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..440715c098dc489f7723922bfd5a9d0990ab39aa --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980a3781294d8b80e11d4fdc0c6b3e2438fa8832c3282f1b2e6a362a16177fe6 +size 18350080 diff --git a/params_shard_140.bin b/params_shard_140.bin new file mode 100644 index 0000000000000000000000000000000000000000..3a623d5c78dc20a8a82e44d6e3a959c84f6483c0 --- /dev/null +++ b/params_shard_140.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b87991e22fa11a1dbff2e20b11fa37b59742a09fe88479f908de93bfba7d33 +size 20760576 diff --git a/params_shard_141.bin b/params_shard_141.bin new file mode 100644 index 0000000000000000000000000000000000000000..eff7af202421fc3fe273cd34759aa9bc58f48730 --- /dev/null +++ b/params_shard_141.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7614b56c8edc1a8c75f7019b2ea1f333fbfa60f7a067f899dd4685849b1a43f +size 70778880 diff --git a/params_shard_142.bin b/params_shard_142.bin new file mode 100644 index 0000000000000000000000000000000000000000..0d369ef42887fa2baa345498fc2f87ee1d42a6ef --- /dev/null +++ b/params_shard_142.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69187711aac5438eedef42173614d3bac1f5402eb1951ae28f849d578d42afd7 +size 141557760 diff --git a/params_shard_143.bin b/params_shard_143.bin new file mode 100644 index 0000000000000000000000000000000000000000..5fa64ccf1679a2cb4ab6ec90b58b020cac4e707c --- /dev/null +++ b/params_shard_143.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f81457f04750279e7cfb2a8e9dba077807070fff29d2ca4afbfdd7257268d58a +size 18350080 diff --git a/params_shard_144.bin b/params_shard_144.bin new file mode 100644 index 0000000000000000000000000000000000000000..d666d240703d3507a61c213d7cecb78deea04f19 --- /dev/null +++ b/params_shard_144.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afb84e2539fb78e2bb62272da8114057478fd8a764bee9769571df42bf4135e7 +size 20760576 diff --git a/params_shard_145.bin b/params_shard_145.bin new file mode 100644 index 0000000000000000000000000000000000000000..9133b24957ddf949d9f0a1461d9b0dfebb39e5dd --- /dev/null +++ b/params_shard_145.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa60485de430ed5f782abcb5ba58c838bdbfb353fcf5829617c5b6575acfdd5 +size 70778880 diff --git a/params_shard_146.bin b/params_shard_146.bin new file mode 100644 index 0000000000000000000000000000000000000000..91fe79f6b20cf818b63eb849a96feb5e0541035c --- /dev/null +++ b/params_shard_146.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8262f3252e7c60300424a027b802f3afa6bd1ad98ff0d998488f465b48e180bb +size 141557760 diff --git a/params_shard_147.bin b/params_shard_147.bin new file mode 100644 index 0000000000000000000000000000000000000000..8ab6155a2e8912a3fb3601d9487d4d6c04c6f373 --- /dev/null +++ b/params_shard_147.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91f0176dd56eb91204f18f5278b15de251cecc4653bb1ffe4be826538c24d66a +size 18350080 diff --git a/params_shard_148.bin b/params_shard_148.bin new file mode 100644 index 0000000000000000000000000000000000000000..1808487612c27997ae9f48156ff8ea4319037522 --- /dev/null +++ b/params_shard_148.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc11c0659b23f46d770592963808c773654c939ac407879f88137ab978449460 +size 20760576 diff --git a/params_shard_149.bin b/params_shard_149.bin new file mode 100644 index 0000000000000000000000000000000000000000..d31fd7a50316b16892fef0c32872568cb0df6e85 --- /dev/null +++ b/params_shard_149.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aac3720d68db8789e4813b0883ed8a572dd56fe4231e0df2c6a6b60c6101e8b +size 141557760 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..288544a6f4dae899f37b6ff90efb7c110c43ae5e --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:754913450d909eb887a0037c5d17e708bf5a7b55d3d7410c032bd1e44fd1ebf6 +size 20760576 diff --git a/params_shard_150.bin b/params_shard_150.bin new file mode 100644 index 0000000000000000000000000000000000000000..f18cfc576b355b58d60a48505a0eca91ef99a3a5 --- /dev/null +++ b/params_shard_150.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:517be39e741dd98c84c990355fe86c597788c4fa88f114fb9758b8fe4edf0369 +size 18350080 diff --git a/params_shard_151.bin b/params_shard_151.bin new file mode 100644 index 0000000000000000000000000000000000000000..5db9c8c57ecbe6eb6981ab9ca09c5f32ee48bd55 --- /dev/null +++ b/params_shard_151.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e56884f13c6a3d4752892c54dcf14c0182d2963da96ed75773d56fa31e179597 +size 70778880 diff --git a/params_shard_152.bin b/params_shard_152.bin new file mode 100644 index 0000000000000000000000000000000000000000..203f74ebc0a1a0bc267270ddd142fc6a19cb58d8 --- /dev/null +++ b/params_shard_152.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeb680fa0b32fcd6b6a2316ee20a256da37766461d1c51125c69d1af2a7c49d1 +size 32055296 diff --git a/params_shard_153.bin b/params_shard_153.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0182fbfc6af0b1635d239d227fa8f41545fc5b7 --- /dev/null +++ b/params_shard_153.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad74498cffbec2dcf4118a43a483ae07a6fa903a9cc393eae7aaf883fdf0fe55 +size 70778880 diff --git a/params_shard_154.bin b/params_shard_154.bin new file mode 100644 index 0000000000000000000000000000000000000000..f987409eb4ecd4702c9314e3555e4d63ddb1668c --- /dev/null +++ b/params_shard_154.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22bcca68ccb5f5f849b45060849698908676207a82120bae9c07c5f43a67b032 +size 141557760 diff --git a/params_shard_155.bin b/params_shard_155.bin new file mode 100644 index 0000000000000000000000000000000000000000..15110de7bebc58971e9099f2893fbb8647fedc4d --- /dev/null +++ b/params_shard_155.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71ba9a769be585897fb9be087e5b7a12897e3b3380e642add88b38121d858d73 +size 27815936 diff --git a/params_shard_156.bin b/params_shard_156.bin new file mode 100644 index 0000000000000000000000000000000000000000..6450f323a61f4663761745207ad8d3df62959b9c --- /dev/null +++ b/params_shard_156.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f6214ceb27d5ae20857325708aede698cf9eeadf2ca5f1d8f861a05a9c9cae +size 70778880 diff --git a/params_shard_157.bin b/params_shard_157.bin new file mode 100644 index 0000000000000000000000000000000000000000..7144effb58f904b75b7f016d228cf4824df09691 --- /dev/null +++ b/params_shard_157.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7759ff33f0ebe8050ee7603b056ab8806cc2a26f5ff84c3d1392ba5d38c3e082 +size 141557760 diff --git a/params_shard_158.bin b/params_shard_158.bin new file mode 100644 index 0000000000000000000000000000000000000000..b4f85d077ca294e1682f6d759bb84bd447d15953 --- /dev/null +++ b/params_shard_158.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fc5a432c3011577b3cd34c0e039f942f4d14ec5bb416be1c761d1cee9d4a1d4 +size 18350080 diff --git a/params_shard_159.bin b/params_shard_159.bin new file mode 100644 index 0000000000000000000000000000000000000000..28293ca4f5fcc261d3ae85eaddeb1b55ed931f5b --- /dev/null +++ b/params_shard_159.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9215137ef92acb8e793d7014f9106646d4b7eb0463158a9a9ba2404b5abcc33 +size 20760576 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..5fb71e93fe25d852198fc8a8df24d08c38954050 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9442824716483c79825b39307487b531e89f5f2c64126b0004bcbe9d87e5d7ce +size 141557760 diff --git a/params_shard_160.bin b/params_shard_160.bin new file mode 100644 index 0000000000000000000000000000000000000000..21b164de946583120cecd2d424ad18ee278cbe4f --- /dev/null +++ b/params_shard_160.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b67653217baf89be44090ac5bfc840b9f77f32a16d3f0effb6bf82612574e1 +size 70778880 diff --git a/params_shard_161.bin b/params_shard_161.bin new file mode 100644 index 0000000000000000000000000000000000000000..53d1c988adf1c4714b9faaef18514558ce52fc9c --- /dev/null +++ b/params_shard_161.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1a252912baf0eae8ad08b48ad449b9cd9a71132dcb1791c5d892dd3c2cbaffd +size 141557760 diff --git a/params_shard_162.bin b/params_shard_162.bin new file mode 100644 index 0000000000000000000000000000000000000000..032e7b1e2abd69543d4f09a6ca68edeba7c13c49 --- /dev/null +++ b/params_shard_162.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d8f35f62650344383027ecfd7143cf4ad0d317f005af61b72391103efe360c5 +size 18350080 diff --git a/params_shard_163.bin b/params_shard_163.bin new file mode 100644 index 0000000000000000000000000000000000000000..0527048da1aaf5ed3b607cd45c5f85d533aec4ee --- /dev/null +++ b/params_shard_163.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52171adbe6da422664e9119d506d93c88c501af26136c463e847484ed6f56df3 +size 20760576 diff --git a/params_shard_164.bin b/params_shard_164.bin new file mode 100644 index 0000000000000000000000000000000000000000..664cf5cc83750fd59fae89d909be9c67579aff3b --- /dev/null +++ b/params_shard_164.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c99f4d164e2243db456235dd50cb68768d774729734af352095666145a37252f +size 70778880 diff --git a/params_shard_165.bin b/params_shard_165.bin new file mode 100644 index 0000000000000000000000000000000000000000..163237fcb149fcaea4ca6f56819655c19435a587 --- /dev/null +++ b/params_shard_165.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9668a45bd2b17d810737bfade262347cf2bbb9593f33eb254291d76aebaca5b +size 141557760 diff --git a/params_shard_166.bin b/params_shard_166.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f6636c7e6ceddf00672724ddd430880b8798264 --- /dev/null +++ b/params_shard_166.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d365b6316492f099b0169b8a3d23b0462e3f2a6f9e56c68e1e01d1684b5c397 +size 18350080 diff --git a/params_shard_167.bin b/params_shard_167.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dcd1ed863fcf2b24fa7302e01c726652d7c9f76 --- /dev/null +++ b/params_shard_167.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64562299e70617191009d76b82316156aaee600429108d482a8906f112734312 +size 20760576 diff --git a/params_shard_168.bin b/params_shard_168.bin new file mode 100644 index 0000000000000000000000000000000000000000..83d049a5882bde97c022a456ee4fed9d27f8e4b8 --- /dev/null +++ b/params_shard_168.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20a30427b5d65d5f444071a34724db0783f30f78d95c9deba15e99288deaade8 +size 141557760 diff --git a/params_shard_169.bin b/params_shard_169.bin new file mode 100644 index 0000000000000000000000000000000000000000..0fe85d593ce6a2ecbfcec2c906fd5e2a4ecc56d5 --- /dev/null +++ b/params_shard_169.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ee9f66993a222dd44a746615ae84db65e530a15f4275385b814cadfbc127bc +size 18350080 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..abf1078505f69dfc60b4d81609d22903c0df9f7a --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e913d86e6b3512e6a5be558f6988c12690cf74845eb85f51841f3673ee9dcc +size 18350080 diff --git a/params_shard_170.bin b/params_shard_170.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a4c5a96e3dbf3ced90e5a49ce609473dda90315 --- /dev/null +++ b/params_shard_170.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1700de060bb224532309acca3d8c03aad3cfc00801b68a6c052680b7f74e306 +size 70778880 diff --git a/params_shard_171.bin b/params_shard_171.bin new file mode 100644 index 0000000000000000000000000000000000000000..b840a8285de92e3d260b244d554d1eab0caefd05 --- /dev/null +++ b/params_shard_171.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2400eb36c9f51f0c439260038e86a311269e1de17c926b9c8ebdcf9ae703b1 +size 32055296 diff --git a/params_shard_172.bin b/params_shard_172.bin new file mode 100644 index 0000000000000000000000000000000000000000..fcf0cea9f1e4cfd2b88e6fbe85b8ef12f883b464 --- /dev/null +++ b/params_shard_172.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef5eca2739b6f3ce4902dcde8bf0eb050896e619b6f1ff219fe715809609b4ca +size 70778880 diff --git a/params_shard_173.bin b/params_shard_173.bin new file mode 100644 index 0000000000000000000000000000000000000000..fed3fb0f1338eee90e02f7c9fa3f9c552f5832d5 --- /dev/null +++ b/params_shard_173.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c49fc2ca74bfe038eedf00c550a1702c0a62f3e46d79b250b0d51b3b6c65d993 +size 141557760 diff --git a/params_shard_174.bin b/params_shard_174.bin new file mode 100644 index 0000000000000000000000000000000000000000..37af6e7efe45a84b698e5277949fcdd65a1a9b01 --- /dev/null +++ b/params_shard_174.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81453f6dd2f96eb7f4de1fd5cc2b3149cbeeb89dbf347116c047aa8407bc4c5c +size 27815936 diff --git a/params_shard_175.bin b/params_shard_175.bin new file mode 100644 index 0000000000000000000000000000000000000000..f98229a8f334152a4305645daed7779f89270071 --- /dev/null +++ b/params_shard_175.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a70410dddb9a694f216e2368c24a0c4b7626d9cf5f96bfbd75e04d2f4c408f5 +size 70778880 diff --git a/params_shard_176.bin b/params_shard_176.bin new file mode 100644 index 0000000000000000000000000000000000000000..5840afcb74cb2a90d0bbe7e798423e6271d2f6a8 --- /dev/null +++ b/params_shard_176.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9efb1de6b93e5257a871418df529e524a6af3fc9d5acf0de11fbbed179a5ae79 +size 141557760 diff --git a/params_shard_177.bin b/params_shard_177.bin new file mode 100644 index 0000000000000000000000000000000000000000..fa1f7fbffff81195b952d542e14f87102ab9c999 --- /dev/null +++ b/params_shard_177.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4f501f36cce555dd1dfaa80dc703fc887aef610291ff8e91d368ce31682d784 +size 18350080 diff --git a/params_shard_178.bin b/params_shard_178.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d78320ca80516dbdb49eddff955e14877a947c3 --- /dev/null +++ b/params_shard_178.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b40e94a4e098fd7cad888575070ed1374570fffa521e356708f63fcd2869ca2 +size 20760576 diff --git a/params_shard_179.bin b/params_shard_179.bin new file mode 100644 index 0000000000000000000000000000000000000000..203b53ac9f833cbdd2e186b259ddb60343e501e0 --- /dev/null +++ b/params_shard_179.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c01e8694942d2747ad17b7ea821700d0af94e05449e327dc9c912c5fdff1c34 +size 70778880 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..3149c9301a0f92a4be335ff21fb117cb75c51b98 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d7544c0dc06fc842d1522475d534c23bef9ebc88c8932448c8472d868b47132 +size 70778880 diff --git a/params_shard_180.bin b/params_shard_180.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f4b3213a4532230d2d6d0832a7cb051c2c6fb6c --- /dev/null +++ b/params_shard_180.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fac8cbd6974f36c4de0ef2fc0953198552c79b75be343deee6708ac5b662d00 +size 141557760 diff --git a/params_shard_181.bin b/params_shard_181.bin new file mode 100644 index 0000000000000000000000000000000000000000..c36a8f4268386c5dbc0e6ad2189ab3de1f4a7fc4 --- /dev/null +++ b/params_shard_181.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a62034a4a24f6e0fc390642d0b0c63c22afbeb937f7fa3d71fbe95b970084a6 +size 18350080 diff --git a/params_shard_182.bin b/params_shard_182.bin new file mode 100644 index 0000000000000000000000000000000000000000..57c198f5f468fe46c16de06bf5fecea7be89d388 --- /dev/null +++ b/params_shard_182.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:882ff47a85716558b6a29f7861f125ee8533474853e85a82c5a5e32bb2024111 +size 20760576 diff --git a/params_shard_183.bin b/params_shard_183.bin new file mode 100644 index 0000000000000000000000000000000000000000..e8de2b6d0a0a7ac2461f3180011263a54abec552 --- /dev/null +++ b/params_shard_183.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dd29ced8b5d3d502a0880326a4f8a56830622d8d56080471e2d64375c5fddf7 +size 70778880 diff --git a/params_shard_184.bin b/params_shard_184.bin new file mode 100644 index 0000000000000000000000000000000000000000..377a5444390cf49ceb4aadff1d1ac001ac956e87 --- /dev/null +++ b/params_shard_184.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a8bc7db2fa4f871c53479866ce8366115dbc25d48e2e2388a85e53cf13d6292 +size 141557760 diff --git a/params_shard_185.bin b/params_shard_185.bin new file mode 100644 index 0000000000000000000000000000000000000000..813041335f398336fec536fef668913ec72cae0f --- /dev/null +++ b/params_shard_185.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9524755236601f5b416c001b9aaf1c381ddcfb12ba7e885375f89884f2174bb +size 18350080 diff --git a/params_shard_186.bin b/params_shard_186.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0f2d25590aabb56d0898797fff8a8d392b47eb1 --- /dev/null +++ b/params_shard_186.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:575c128c049881df85e54c41470d494358dc1a811f1005c09ecbfce8024cf227 +size 20760576 diff --git a/params_shard_187.bin b/params_shard_187.bin new file mode 100644 index 0000000000000000000000000000000000000000..0abd309f838453000fbdcfd3253bc201580a6176 --- /dev/null +++ b/params_shard_187.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5453d0e8b6b63a83fe9836e3bd997b1d585eea2cb38bc098201a46e86b9fb1e1 +size 141557760 diff --git a/params_shard_188.bin b/params_shard_188.bin new file mode 100644 index 0000000000000000000000000000000000000000..d21c5ccfc3c9d94cdc495a8e2a86f36407444d0a --- /dev/null +++ b/params_shard_188.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea27f1145c2286d036ae98a2622338d0f74dea803d8b97638bdd6c8b497edbcd +size 18350080 diff --git a/params_shard_189.bin b/params_shard_189.bin new file mode 100644 index 0000000000000000000000000000000000000000..670824a4b4401528632f08b8f54936e613ba8326 --- /dev/null +++ b/params_shard_189.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c77fc4f8a8d5767caefbfc295de50b2becb0c273264d035af38dbe931ebacb5e +size 70778880 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0c32241b3358f8845cc8c350a258df778851a45 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33eb771fc80246ee87bc814ccaefa962028782fde987ea721b898bb27a29536d +size 32055296 diff --git a/params_shard_190.bin b/params_shard_190.bin new file mode 100644 index 0000000000000000000000000000000000000000..f4673be72da15d1c143e97b79b2616e2e5b30b0a --- /dev/null +++ b/params_shard_190.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7074d022e1b8f3a124f314629ced9002ad83b370f81f115144084f3e0a3a61a5 +size 32055296 diff --git a/params_shard_191.bin b/params_shard_191.bin new file mode 100644 index 0000000000000000000000000000000000000000..f0f85e68c2aab4167dd1b605f7d3c12fec47ac1c --- /dev/null +++ b/params_shard_191.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:897fb38f43920886c0fab1bb856ef420eaaac45c0d79cdff26347f6084be716f +size 70778880 diff --git a/params_shard_192.bin b/params_shard_192.bin new file mode 100644 index 0000000000000000000000000000000000000000..019b65163958f2aac8fe2e1d0c0b67579f151b1c --- /dev/null +++ b/params_shard_192.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6b2cd899420f01d5bcd569b6f1c4d860d0a088930b710de5f3f8ee47a71b168 +size 141557760 diff --git a/params_shard_193.bin b/params_shard_193.bin new file mode 100644 index 0000000000000000000000000000000000000000..6aedd1fe4cd83795c624d5eeff617c4d9b50b095 --- /dev/null +++ b/params_shard_193.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:767e4adb7435f510e05357ff700688c0c4a2b39acddc6edfd3135654658d5637 +size 27815936 diff --git a/params_shard_194.bin b/params_shard_194.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4503f44c7bd372f80c3aadeaf6d76498f96be03 --- /dev/null +++ b/params_shard_194.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20327dff00567d08985fbc65469327e058581ae443238aef0e58c609bcfb15e3 +size 70778880 diff --git a/params_shard_195.bin b/params_shard_195.bin new file mode 100644 index 0000000000000000000000000000000000000000..c7f52f79224eb9406bd53b474568908a02a27092 --- /dev/null +++ b/params_shard_195.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da6b656be1cd14b268a5e23b0d6985d26f062e414368a06655fc1c82bafc71d8 +size 141557760 diff --git a/params_shard_196.bin b/params_shard_196.bin new file mode 100644 index 0000000000000000000000000000000000000000..26bfcd73606f40ced53eb4aeec5e4d463c94f138 --- /dev/null +++ b/params_shard_196.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfed7c903f5c34d2d863940e00372b44e0ab3e21b293c94707894e6b9fc36ff2 +size 18350080 diff --git a/params_shard_197.bin b/params_shard_197.bin new file mode 100644 index 0000000000000000000000000000000000000000..591ff4f715ccb6b4f471c8bbe309ffb29f5945b1 --- /dev/null +++ b/params_shard_197.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbb772eb8db6f5413a26f3054735204de87023acb6ac7d3ae9b1d5959b01b716 +size 20760576 diff --git a/params_shard_198.bin b/params_shard_198.bin new file mode 100644 index 0000000000000000000000000000000000000000..9546a0dae07cf3749e6f8b6b5c743c619c726c33 --- /dev/null +++ b/params_shard_198.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a636cde02622c2628fb2b67fa6c78b5f9fdf4ffd6b866814262098fb1ba27e +size 70778880 diff --git a/params_shard_199.bin b/params_shard_199.bin new file mode 100644 index 0000000000000000000000000000000000000000..be1286fd7d384af6f61983fb452e4ce3f8db9f2d --- /dev/null +++ b/params_shard_199.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a46e0a717e8216dc664c82decbf02d056382f662d7ea431b2a37fac852590d72 +size 141557760 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..a750e8ebe1c75b00789f21313787368d1c0b3536 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af9e4dd567f57ebc8130debaa9773a285744ebb4d1332278f6f2dc7bbecec5c2 +size 141557760 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..12623f0b138658215f0b9318ca271e6cea1c22f6 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23fe0ddd3f7125306c79ea64004b8c1661738d4cc9321834dfe506bdbe9ca7d7 +size 141557760 diff --git a/params_shard_200.bin b/params_shard_200.bin new file mode 100644 index 0000000000000000000000000000000000000000..16f6d4e759c3948c1bb84bc6bb3ad25cd536cff9 --- /dev/null +++ b/params_shard_200.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c31dc4f10a083a30c83e5d52f0f373fd131c4fe5dec95daedf72114a020b53a6 +size 18350080 diff --git a/params_shard_201.bin b/params_shard_201.bin new file mode 100644 index 0000000000000000000000000000000000000000..961f8a1b6f724e46f3c0f156d94ab926a296bf9a --- /dev/null +++ b/params_shard_201.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb667d457e52f1832bb201b563fb317b9edeb26570858db07113dd834a17fa1 +size 20760576 diff --git a/params_shard_202.bin b/params_shard_202.bin new file mode 100644 index 0000000000000000000000000000000000000000..c5c9a90f9961e2fee82b07fc5c912a346a6a1aeb --- /dev/null +++ b/params_shard_202.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bdbf76f4cdb5835691645eac32685c8ae4d083d50d3f34f4bc029c2b3ec8711 +size 70778880 diff --git a/params_shard_203.bin b/params_shard_203.bin new file mode 100644 index 0000000000000000000000000000000000000000..c148f35da0fa10bfa57495082f3bc0e3c2cf2f7f --- /dev/null +++ b/params_shard_203.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:818bceac0caf4a2909258ce09ddaa99587da629a3c5b006772443673c8c57e45 +size 141557760 diff --git a/params_shard_204.bin b/params_shard_204.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a99f8639f05eb375177a2003825cea23cddcbdf --- /dev/null +++ b/params_shard_204.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:256329ae85040791995b2422f7acf8ac70791c2a819ab3437cbe74dd8bd3937a +size 18350080 diff --git a/params_shard_205.bin b/params_shard_205.bin new file mode 100644 index 0000000000000000000000000000000000000000..9668fa4d507a55c66202c1f61982f8af5cfb31d6 --- /dev/null +++ b/params_shard_205.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3df76399f26b918abdf52a14eec900d79653db70788d2687e336e0c9756930a +size 20760576 diff --git a/params_shard_206.bin b/params_shard_206.bin new file mode 100644 index 0000000000000000000000000000000000000000..16c7d475e3e28d48058abfa624145699f28bba35 --- /dev/null +++ b/params_shard_206.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d867e6d87e8fad6f466c6c59ecfd764e63c96695f29eb6defb2311923e9f8845 +size 141557760 diff --git a/params_shard_207.bin b/params_shard_207.bin new file mode 100644 index 0000000000000000000000000000000000000000..434590df9f41c888fbfc3fc60d2a26b750259ff7 --- /dev/null +++ b/params_shard_207.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae8005105654ac4f6fc4fcaa86fdd77ee35aad20b675bd6e6fedf08cfccedabc +size 18350080 diff --git a/params_shard_208.bin b/params_shard_208.bin new file mode 100644 index 0000000000000000000000000000000000000000..bbbe16ea2c83e14f5c72558681bcb573f50b0a41 --- /dev/null +++ b/params_shard_208.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0436d8ca14af2237bbf1528acf1ba8aaac4e573bf291ad8c23aa9c4ff4929779 +size 70778880 diff --git a/params_shard_209.bin b/params_shard_209.bin new file mode 100644 index 0000000000000000000000000000000000000000..ab4b258a38aaceb7b7c9331d56e23f4a32ce7a29 --- /dev/null +++ b/params_shard_209.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dc7365a4a3770c679c0e0dd85b26cafc83c186fe9d0e5cbed4ed53bcdb89a1d +size 32055296 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..74ff8b59e75b8606448e23081877faea2fd852d4 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c831840dc53df7cfc147186281d249ce919004420eadb6a2dade67833d2f112 +size 25583616 diff --git a/params_shard_210.bin b/params_shard_210.bin new file mode 100644 index 0000000000000000000000000000000000000000..9658167a1ab0ffcef415d18adf2ec5cdc1e86fa8 --- /dev/null +++ b/params_shard_210.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa7c7fd35ec5a185beb3629684148aa25a25740f42eea1531063fed8675fc336 +size 70778880 diff --git a/params_shard_211.bin b/params_shard_211.bin new file mode 100644 index 0000000000000000000000000000000000000000..4bfd0c9decd9711c75f1f8996fbbf412bf3a8fa1 --- /dev/null +++ b/params_shard_211.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1f5592f6634e01443a3109efa069b0059c4fd1e271dd6d357102426ee86c746 +size 141557760 diff --git a/params_shard_212.bin b/params_shard_212.bin new file mode 100644 index 0000000000000000000000000000000000000000..90e71a59c199f87a120bec8187f9c0ff815f1682 --- /dev/null +++ b/params_shard_212.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8a64afdc8ad189ded489f625fba05a8f1f9e2bafa940c857210efe4d3fecfb +size 27815936 diff --git a/params_shard_213.bin b/params_shard_213.bin new file mode 100644 index 0000000000000000000000000000000000000000..d25201cca0847c1fb5f1605488a889e1c9f821f6 --- /dev/null +++ b/params_shard_213.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0900c143b7b352555a60107b062a3c6ee4233f1c4531ee0e2462b210cf141c4 +size 70778880 diff --git a/params_shard_214.bin b/params_shard_214.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddd341b103423975cc59c30efbe164810380c96b --- /dev/null +++ b/params_shard_214.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:640f86ab09d3501b6c54b24364553f7bbdc468c490c558a4082a4a7c086f53ab +size 141557760 diff --git a/params_shard_215.bin b/params_shard_215.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c3f2651e9dae078636a9646500ea18615b663fd --- /dev/null +++ b/params_shard_215.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:547992be566817a1e9d8f9ee65f9cffd2632c900129f87020e79e3b8f5fe6fec +size 18350080 diff --git a/params_shard_216.bin b/params_shard_216.bin new file mode 100644 index 0000000000000000000000000000000000000000..b60024e41a73c0a0e829c07046a5a0defc3081a5 --- /dev/null +++ b/params_shard_216.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4726884b0bd41937335bbe40d155f0064d59745195a2c6c483d3fdae3a5f91c8 +size 20760576 diff --git a/params_shard_217.bin b/params_shard_217.bin new file mode 100644 index 0000000000000000000000000000000000000000..c9c553522ab4a5cc614bd9f2bb0937b1f66948fa --- /dev/null +++ b/params_shard_217.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb4cd0de6a58e4761028080d2390d1867baff7502b78f383a2be1fc80d3c518 +size 70778880 diff --git a/params_shard_218.bin b/params_shard_218.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef29d29dae986465f33c8d9deace1c3be31db209 --- /dev/null +++ b/params_shard_218.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02ea03ad6117e418d7549ad6a3cd450cd69869c500784269b2101630b7285121 +size 141557760 diff --git a/params_shard_219.bin b/params_shard_219.bin new file mode 100644 index 0000000000000000000000000000000000000000..bf9ccef1055455f1f7c250da4033985b05d948cb --- /dev/null +++ b/params_shard_219.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb0b6124302312976c92f3de237cf417678ea702aebb91ff1fe86f0317ab93e7 +size 18350080 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..6738862552545f3d4332a4fdee4955eb93e84b8b --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13476a0feb3ce8ef111cf6faef7705de9985e5639c420033a5d60d6f0ba408c2 +size 70778880 diff --git a/params_shard_220.bin b/params_shard_220.bin new file mode 100644 index 0000000000000000000000000000000000000000..1898e861efe7ad1de7edcb6061f8bec5445deca5 --- /dev/null +++ b/params_shard_220.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f56e7fd3e55bfd634bd6071f229e927c88d8e59fea8370dc7fa951d0bac960b +size 20760576 diff --git a/params_shard_221.bin b/params_shard_221.bin new file mode 100644 index 0000000000000000000000000000000000000000..f746e71044e08765a32b39d82308203b18d735e6 --- /dev/null +++ b/params_shard_221.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fed33326dd1da95e0c15bdaaddaa3b317bf0fdc9bfd1ade86efffb99b34c823e +size 70778880 diff --git a/params_shard_222.bin b/params_shard_222.bin new file mode 100644 index 0000000000000000000000000000000000000000..34330c8ec365f9879b3667e0a8108e175b425682 --- /dev/null +++ b/params_shard_222.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e1d12f8d95d52b9d9b377adac77aea7ee75ae2c79c22327bd435a695a74695 +size 141557760 diff --git a/params_shard_223.bin b/params_shard_223.bin new file mode 100644 index 0000000000000000000000000000000000000000..fab599a9e9db3ef883cee9f45655b9ad436e213c --- /dev/null +++ b/params_shard_223.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0db2ecf9d4aa9d56711c1845fb1b7e568be07272b97be02d7ee296c63b47bae +size 18350080 diff --git a/params_shard_224.bin b/params_shard_224.bin new file mode 100644 index 0000000000000000000000000000000000000000..c8f79c293021980e7f97ae31c263d62eb4db8a89 --- /dev/null +++ b/params_shard_224.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d1316dba2cc771550508a07d15de8c4c61f8262d0fc13734fa8aff6bfd06134 +size 20760576 diff --git a/params_shard_225.bin b/params_shard_225.bin new file mode 100644 index 0000000000000000000000000000000000000000..aa41958bc14c4ae15b615fde6f791bc1d15a81d8 --- /dev/null +++ b/params_shard_225.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2de5cf1ce53b0cbe69a1d7dd3502232febadfad6ae3de36f054f0b7a296b61f +size 141557760 diff --git a/params_shard_226.bin b/params_shard_226.bin new file mode 100644 index 0000000000000000000000000000000000000000..07b6b82afc21f94b76f4e2647b992c52f6954b32 --- /dev/null +++ b/params_shard_226.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30a805a2b1a4639b9adddc3c49b9d5cc2f45aba2dd40751ab2917a40d50e5e1 +size 18350080 diff --git a/params_shard_227.bin b/params_shard_227.bin new file mode 100644 index 0000000000000000000000000000000000000000..ce40d2d3045c3f24fc1c37967bf7958169be563b --- /dev/null +++ b/params_shard_227.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dc10b15f0a04ae36fe6f4b469d716a7e472708f9e1695668c8f54200437f2b3 +size 70778880 diff --git a/params_shard_228.bin b/params_shard_228.bin new file mode 100644 index 0000000000000000000000000000000000000000..f82a4e96de941aac6a8862e11c1a4ad51d882bc0 --- /dev/null +++ b/params_shard_228.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce4c4c63018ba362321995df82d7b8e865e9f53f591ad744703f4832643938e3 +size 32055296 diff --git a/params_shard_229.bin b/params_shard_229.bin new file mode 100644 index 0000000000000000000000000000000000000000..703ebf83e84abb67b2227c928fd9865c4b2a0fbb --- /dev/null +++ b/params_shard_229.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d63b5d5fefcde952d9019c8e683326cd68d4cf2db0a642fae95bc46770b620b7 +size 70778880 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..e2aaee3db60b1024d1d8be4b1200d0d8bdd2cadd --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bf4e5a64b343fc95027162c3456c130873fd13b1c5181113939aa20c7585635 +size 141557760 diff --git a/params_shard_230.bin b/params_shard_230.bin new file mode 100644 index 0000000000000000000000000000000000000000..6b552d95a831378620aa5d00d7afa6117dc77c09 --- /dev/null +++ b/params_shard_230.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a80c83e53566933eef99fc751ecccc57e4d4c4fcf4acce9989ae46a5b1e930b +size 141557760 diff --git a/params_shard_231.bin b/params_shard_231.bin new file mode 100644 index 0000000000000000000000000000000000000000..62abeec53b80ac2fe4a928f7866826c5f502929c --- /dev/null +++ b/params_shard_231.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c46b50e081b8027a5a27b419bcf6ed341763f1d1462cb6b0b601b5edbb8fc2 +size 27815936 diff --git a/params_shard_232.bin b/params_shard_232.bin new file mode 100644 index 0000000000000000000000000000000000000000..72c81dfd2ae28364b6d04154b4348826a6174d9f --- /dev/null +++ b/params_shard_232.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54df717a2ed55d974558588c33ff3defc1edf296a71abc2058a27c01901177c0 +size 70778880 diff --git a/params_shard_233.bin b/params_shard_233.bin new file mode 100644 index 0000000000000000000000000000000000000000..e9bde95bce0cfea799260719385d16567c0a2e41 --- /dev/null +++ b/params_shard_233.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73e412914670a2a92687f4615149218c724212e001bed3e5b886a0d099df0ab0 +size 141557760 diff --git a/params_shard_234.bin b/params_shard_234.bin new file mode 100644 index 0000000000000000000000000000000000000000..7eb5a9112d6a16cd1ac710f5334f64bd3b425772 --- /dev/null +++ b/params_shard_234.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:934e76241ab0adc1f0c4bef7791cb0196ee68618e4393d72cf1952feb3e74f78 +size 18350080 diff --git a/params_shard_235.bin b/params_shard_235.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d48fe40128b585f36b2685c1ef88ccb2f468d80 --- /dev/null +++ b/params_shard_235.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b478b45732ea0855289be6f639049e85797ffe2bcf2acbeeb0b43383bff1738b +size 20760576 diff --git a/params_shard_236.bin b/params_shard_236.bin new file mode 100644 index 0000000000000000000000000000000000000000..46fef8560e63cc9c80c5381c711fc41713d29764 --- /dev/null +++ b/params_shard_236.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a61d7803c63920dcc38b47ef6e0c12d1fedf679400f6a921f23dab86381a88b +size 70778880 diff --git a/params_shard_237.bin b/params_shard_237.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f038fb4584d7968aa819bfd66ead03c00c1aada --- /dev/null +++ b/params_shard_237.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69421b2cda543c7ee1d7af86b8544204b8828794192a34d54652b3f18fd5363c +size 141557760 diff --git a/params_shard_238.bin b/params_shard_238.bin new file mode 100644 index 0000000000000000000000000000000000000000..e1cea5a9ee3262a1534bd3f2e2a082dbbb30fe3b --- /dev/null +++ b/params_shard_238.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b494c714493f0daa9934a2cd79a90a98201fd9e824c797408d6ae97ec01364cc +size 18350080 diff --git a/params_shard_239.bin b/params_shard_239.bin new file mode 100644 index 0000000000000000000000000000000000000000..bbf985e6dc161426b02fa52500cd5386652e63af --- /dev/null +++ b/params_shard_239.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ecf9e5b05acc953c973ff6d8124e2d18ae236035b94607545de26c57b290042 +size 20760576 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..8d75f917e73cbe028c41d5daed81685f92d6e4e8 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cb514516b8af9cd0d343e1c837b3d4e0b1b5228b5e1ca6d195f8f69ae8546b6 +size 18350080 diff --git a/params_shard_240.bin b/params_shard_240.bin new file mode 100644 index 0000000000000000000000000000000000000000..4d9693557865d0e3af194acbda53417743fdcd22 --- /dev/null +++ b/params_shard_240.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb32fcfceb7a247172ad2735616ff2257d2e808a8f8e2bd6c90980fd366d21a +size 70778880 diff --git a/params_shard_241.bin b/params_shard_241.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae963eb62a8e9cafa0fa8ff9c4cd9c3b918014a7 --- /dev/null +++ b/params_shard_241.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0617809e23b99a364d5f1b1856dbe6f9271b34f3d48b875d8f0e7622ee628f15 +size 141557760 diff --git a/params_shard_242.bin b/params_shard_242.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2e4f109d6ef6dd80383ad16b4d43dd40d48b2c6 --- /dev/null +++ b/params_shard_242.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4b4e0804974f38c032dee94653ec6170148a0d4020eaea024e108cae1c25b5b +size 18350080 diff --git a/params_shard_243.bin b/params_shard_243.bin new file mode 100644 index 0000000000000000000000000000000000000000..436ca476b7e9c522434fc20e10657529f063818f --- /dev/null +++ b/params_shard_243.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62bb490c77fe3db0d74eaa3daed5caf7e22c9f11b07c7e481746abecaabcbacb +size 20760576 diff --git a/params_shard_244.bin b/params_shard_244.bin new file mode 100644 index 0000000000000000000000000000000000000000..947f74093b801fe0f9b074eeceac41c6c489e35a --- /dev/null +++ b/params_shard_244.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e9fa77fabe2ca838336bd6024c563ac4a1190c43024bda330212bdad52e5ca9 +size 32454656 diff --git a/params_shard_245.bin b/params_shard_245.bin new file mode 100644 index 0000000000000000000000000000000000000000..b838f1b6b4d424cdc24f5a11ef6fef7219fc4cba --- /dev/null +++ b/params_shard_245.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f41f3dc6910b12f3edbaac62f986715af607050429850e44c1b77628b49884a3 +size 13516800 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..760c243a72b3bc0d0e185387a9456a18c805e4bf --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec690a8a2d92748249608838ebb87413940957fba9917d8f7d1c272d61dd942 +size 20760576 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..6266e14b79a765f5672f0521a9df4b0c16d7f6f4 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd14a9a718d955ee246541e64e119bdc49958ec4e4a1dbb94f9d7c2fb872923c +size 70778880 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..1286e451a537acf216d875ccb85e012fd4beb89a --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e718135dd16de19a9cf28af3bdc9ce4dd204ff9f204a9a25f99b02388c7d2204 +size 141557760 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..34af753c53ffa9a8c3da13ca4bc0254680bb66ef --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e060707ff9bee1282082fbcf4bb9854776d3d2c60bfd86c3d8efe931a2d8d824 +size 18350080 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..cfa994e95e85555de8d89531f27334a9bbdb15a5 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4defc92df7103e3df0c0ff6d84a09bb83b968834fea4cb1d461ee5edacd17cc +size 20760576 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..f02631b19fef83d77e3d1870367892a335b82a9a --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92771e7ea852f0045445e03e2f129a8568ed9bddad391417ebbc9f902b4c7fce +size 389283840 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..b6459a67d4bb088c1425c0b921be6264a729c23b --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57517ab70966f8d817980ba74f7e7d92556372e1a0d74b8795e46fd21dd30c2 +size 141557760 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb3cd7c2b11013bde1ff97dbf50fad5d5d8f497b --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49633f4957342a0d7f3d710c0a25d348b513989b9f5b8c8d4b4289df2402c9f6 +size 18350080 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..00c0fa58da29c806794593405fc996d50ce01af2 --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56d5364cf988bdbf3eecb134a3de967df7f6e6a4323a549e410efe3f7162cf3f +size 70778880 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..636f23bbf7805863da8e50927e85f777eb74a287 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cee0b99b463c6d74e34be463cf59434bb69615b4b9f64988b7427b22e65873be +size 32055296 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..a991f1693832e1577a38ac984b460f63c38d31c7 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d1ed99c9f7c0273548e50e41bf7cde0894f963602953ab574506224b469110 +size 141557760 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..3ec46eb4ab5cfa48f25d58861b0be097059c00d9 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0ca731be5dcb21bef7ace7ac1d2ec16efe12b09bfcab7491e0e3f96fdf7688d +size 70778880 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..fce2d64b50609282f7ca3f24690feea180724ba8 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c0a373e28a11a22816dd448439e39512a3c0249d2f8adcb1d49387e06a7120 +size 141557760 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..1e3ad0960584c0576f01a3dd476e9fdc31bbb8cb --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95ae473981125d2fcc2af6371bc8cefdf043e98dc4c3c3698ecb34566e6202fd +size 32239616 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a51276ab05ad7f146a8eacaa57cae1c1f6d911c --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aba982ed60affe88fdaa1ecbba05da2a382d6d1113860990c94ccee9e1649f17 +size 70778880 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..9bbfad42f8ccbd44fe4e184474c34370450a009f --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0ff087801cb84201ddf36c52874eefffa31817c7ac0c6e3730f69d8f11f72e +size 70778880 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..94cf6bf332daf82c6cb242446cf3f72264e0534b --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bef93ee76b6c2e84d7108b44ea584e10ebb654873fa55143523698572939e0d7 +size 70778880 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b02feab976a4b5370d82191329616ffbaee1b43 --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:378a71f9e81261a3fa4aacf3e15457835966468996ef49259422cd0b13cc253d +size 141557760 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..4181253813a180d9fc4b8b28802b7d487b37213e --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d52be5ecc3151d84f716ce975f8e170a0be42db1d84631edc2765b70d220101 +size 18350080 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..63e02fd77515dec1dc939450e8a1976740c1c7af --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6c27a84eb34979900ac9f0bf152be9fdb3975e462bc9510425e1038d8341789 +size 22992896 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..33e2f8f9be635bc62113a31c993289b2981737fd --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6573de1467403f436d3b53a5c2d26300456d08d883c90d37f4db5cf00ba389f0 +size 70778880 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..fdde747d191d79861f6ac53ca2e74c51b78f81f3 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85e4318483e242b27319e8023b7e068b5de8a0957aaec2a008ec45e680627d5f +size 141557760 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b18e535eed3ce6d45f2a55285eab5bef1e6305b --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fe7735874fb468771453fc1871988b970ec0b72ffcd5a1f68947efb413bfb7e +size 18350080 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..35d2ea4d5c191e19394a3c2a6976acc9a82c0188 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10d988b3c48874e96c0711bae5e4e861eac5f4a7555f32179066071d4f7c3c8f +size 20760576 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..28c0f7ef27a6543b6d47bdbea100bea17af67d0e --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e30d2af587809fdd007bbd248acf1fea320570b3083a125b53de94d866db2e89 +size 70778880 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..3457e8a8c0be14e23edfa3cdffb8a414c0f17ef4 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2a62903cc2a59dcc82aa21d67c8ed45a6f445ea421f5dcc2ff4a7ac77eafa2c +size 141557760 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca509e927a7a0588644ccdeb4e5d7998a45b2014 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:480ab604b7616bba74f578318f3479c80a44a37398ae0452c1aa8cd73a81d5e7 +size 18350080 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..b1bb85c899c210837a796228fbed5bf0f9d7fad1 --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc450accba5014260e922ef248fef3e34f502e2c9f7c5ae22cf33b678db9e6f +size 141557760 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..19ff4707a55ba2e8d51bf7cc216419358985e795 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc09e1960b71b70a4ff77247fc84bc2fe9288f7fdc9c2f5eede156f3383a338a +size 20760576 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..62a663245d8d3d623a41f495955378594a2629c0 --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8caebdf294682f99ab00ab74684d0b72dc1dc23b41f409f8e55d8890f29e8be8 +size 70778880 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..f3013cfba612108ccdbc526fc1026633a5cbeb2d --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72b0ef70b01052ccbb8415dafc9f97bd273992da45cdb5e3e93147d180600d36 +size 141557760 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..0fd720fd05022aa7337e510fbb2d3d3b817df7de --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:131d27e79178b51385f0bf07950e693fd834e5df23dc99ffb6fb222dc4d3e938 +size 18350080 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..51f38b737282cfb7a2b4a1ffc7b96cde52475b3c --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab35e08c6a29b43ca386e8602703f7ff6eded786793b6571daddf68cf44a4546 +size 20760576 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..8f996cc20f1fe9ddc30add1a2f4fc5eef9f79629 --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f45b4f0f0145113017e035e13447d82d118d210b7d251d9fa3a9c3dfa2cf4ad2 +size 141557760 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..f1ac7c505a70e7c511b7676ea2351e127007aed6 --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37a25a4e6e93194e207184f95fd57881b72e3fbc147894e6977eb6f2cce2c32f +size 18350080 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..5256a04bb86d075bb4622f2782e367ac060f64d5 --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e485789555db5a6ed9319db490a5a6796684d2a8c41b2468402856fd9bfcd705 +size 70778880 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..03f7a948a378c6c275d7b90c22b94ac515182787 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1330460171df68915b4e62fd9f0a6e285bd520f4d0690f9cc40511f796b4732f +size 32055296 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..424d969f12315ef76d1556c3ec5ec42da8788901 --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26f8bec6f2348087b9e3ea9f1ea9f5bdc68774bd4959ad817990748f1fc50ebe +size 70778880 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..2d8880de3b245fb9f07dba51063e678d18ad5aea --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64c14137d5ba286297dc3d1e79f8334a29a0c13c31e611c7b1001906590fdbd7 +size 33218560 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e24d313277de244f94ef1fa4e2f8e008ed192d4 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91eebc72257e0471163d19f1770975a84e5c6920b2dbdc978cd06404c261dfe9 +size 141557760 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..ace15690cfb11430c8f9c15d66e808482b18859f --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d86dfe398f28b8070c1fa55208d0a26635630eaf003d5482c425a567efa031c0 +size 27815936 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..fbf4c30a1db6c65001273fffc3ea0b8dd6f33c9a --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25716fb4b8bf6daed08c5c3b92791c5e5f4b46b005dcc9339e954f51f0d25193 +size 70778880 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb115e70c192e83a99d2ccbbf4d5a431f1319ed6 --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e479bdf0a4ed15bc642928a7c5c6379b703b454f060b0d9371b37b31ad558828 +size 141557760 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..1eadae69ecd2554ed57253adad43adcf0d97dba0 --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:194a865f93d7efb6bc668d4f2ddbec416043960947fb8e345986fdce84c57391 +size 18350080 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..d862bf0bfb9a1fc141e2ecc7c9ecac8d9ef66b35 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cb4511953a35ff1e6a64a4b3f555aeffc7cf29a072f193e15de24b7390f5217 +size 20760576 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..e43fe34854bdb0cc67a414564e1feeebe3144eae --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10e9df65f6d36df6fac6f01febc2b2edd5e0f0e25f140b064b42ef903e4d4241 +size 70778880 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..a4310f3d77453617336538544011248fb15d62e9 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71d92ade10f6d933960ec18a1d45db87965f0ea4ddc2d6b77405ef18ac995897 +size 141557760 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..d70b0452e79783e1d9d01a8adf4f87452dc7ce1c --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:351f275138b4d96c26acfb175e71902905a326371a72588e4c334b41ef8e0388 +size 18350080 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..44e1a1d86f0c51e657eb1cc9949f04b96e6e3909 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19b57d883bd37d363e6b53dda7a5d644126332af2bc81b91ab279556bb9da662 +size 20760576 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..17965f026a6caa7f31cdaf1ee46f48dda6d728ba --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c93d8034f2d343933cbddc44c87e44368620eabddffa1380f88eaf29fb1a7c8 +size 23371776 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..7115f184e9538fb9c349d93a9fce00d31e770363 --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ebc0f45d1c3d9c0f4cbf472a1cf017d9ea819942edffb26cd6ce23b662db79e +size 70778880 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..b37a1315b3d1a10229860d25a8770bdb380193a0 --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41af0ef61abd02bde51338c8f2e8fa996ccec684ed944bac28b77e48e282ce68 +size 141557760 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..f88a03cdf824faa918ab6663e251f95679a7bbfc --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b710b144750d353f6800d93a24510dbce2f7e081ddaca236f1e08df301cb30 +size 18350080 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb67c14d62f4e62ef376dfa7133e9720fca627bf --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3adc079507014ba9a62c898720b9098fd31ad0c23692f6fcabc393d48e11b6eb +size 20760576 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b978d8aad26d185ef07001bce26e588120bc405 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d97ca19fdf567e6410d1c94f8a927fcd970bce267d653b07f050cd5ef78c8a3 +size 141557760 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..b7f0eb4c6c4556411405708f204a9c817b29a1a6 --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af9d4a7288178108436c58eb116c91fcf9bebdeaba40c38e688579a126e45fb8 +size 18350080 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff5e670fb3e920f0d3b9adac69f5330f174f14e9 --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff9e650071c507dd33336855cd166d0c70bde638e57a13b255217a5b1d619c6 +size 70778880 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..301af75a4b9d5ef621cead6202e6e2886888fea8 --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9fd92e07089b3a4c2250c37040d4f055138466e169d091feb17176834ca0cc3 +size 32055296 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..321665dfe6d23b39e954ec0249088ad716007891 --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7711234ec4ea7cb73f4f053fa3248fa4384cb56c8e89c9f0b7b48f6bfbb38c96 +size 70778880 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..987303f26b0b2e51a180031371e276c5583edd0c --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0213cae0e9bd10cf0d83682253df0dfe73df3dde6bfbb58ba6dfd36a2f82d3d7 +size 141557760 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..6634bf15553bd15b85bbb5afe4e2792313926b79 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25a9fe75f5627601f489e165d0082b548aaeb74205eae402d4cf27421ae4e986 +size 70778880 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..185f4653da6c69533ab33302b70ba52acc3fb818 --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90f42b3849f74716135ca74267d37a6d15bc20b83870492a2d4ef0d0d37ca2cc +size 27815936 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..ba3f0ebfaa24495670fe3de85a1ea5629c486dcb --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1aa78b67063699bded68dded7d3688bf8702df56bdb23992eeea95edb8b4681 +size 70778880 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..e12e7d743f44669a3d60c8249d2c7f251bd02326 --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5401141157bf71ea5ca9f847fde485484ec0adf939a06e12a42ae31cb1a6b46c +size 141557760 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..2cd007630253bec90c09c860f314b264adab2fd1 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cbf0c5e873cd361c0ba28ec9655a636802d70098667d5d3f2ad6d9fae91e9ca +size 18350080 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..21e60826f683b28864607cc6dc643a1442039d28 --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a4bfa946aba611710f1ccb6217387c5f612f675eb9488ae41ae467b83eb7d51 +size 20760576 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1eb5bb85367ad088b1869da840ebd2a7b5817a7 --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c99c9a4af91f952ea07d23d502fee1b30e7e10221874ad82f91c18c671818484 +size 70778880 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..b6b58960833065f3c74fd0b3432a1cbfe1da9fe9 --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9ca54d781cb2fa30233a0cb18305f56fcc391e8e7b86b8daafd640ae5a07e51 +size 141557760 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..3156057167febba119683a3402c460cec760676e --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a2bdf6ce068763f426fce1b1fed723e2aa4f8ce11a7c5c1a217e4fc4e196ff7 +size 18350080 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..63c46c0b51052dacd8b8b1880aebd7429c1bfa4e --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f32fd5b343ff9c90d952a23aae49ba602ce8860207e2cd925edee49e7fa6108 +size 20760576 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9035011ca3292b9973a252cffc24c03cd3aefb2 --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebeaad9c9c1433eb6bca95a8fa894925ca5ae0b60267b5555576cb6ba3198252 +size 70778880 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..057b7df140812407a21028c05b874419ea165947 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:874bffa758cb568dfdb5fe504cee3a2cd7cfe995a92dbb8375461340a4e8fb81 +size 141557760 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..a4c228418eccc26e1097d1661e8dada93f93d6a1 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4030f5026d274fb2b45e82cc3b9b0dc9f4912537ca477b0b2859574719600cf3 +size 141557760 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..35a6e88549bdd062f058bafdb88e368d2b181591 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:581a07f28530b43fcdcfc350891adce1f619172d086be8fe288c7558a6e2125c +size 18350080 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..a5a2067202df44b1466f9bdab4d302e6d127ec67 --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc0f88dc51f10e953b06e3c109dc95ca611b0dbe3c6c19e2c7eb83c2e176ef2b +size 20760576 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..984af8438d9e86fc8404a70f5214b46cac0e4bd5 --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93318a81f88160b7f41b8c6aa61950a640ac3b84ec0c0634a32f98c9258d8387 +size 141557760 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c1af13a3e3c98f313d7a586dc47892a3a71ca69 --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6975bf37b6174838efb06815f9286dfaa497f18f186da05841d0289fededb51 +size 18350080 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..162fc1e062e3d7ab70a833aaf34db77f8d816ef4 --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f294b7c33219f82db669f21a396da9490ec56e868681f8b9b987d0ecfd89a9a +size 70778880 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ea3886e14b62ba156e76238fbabc91180130365 --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dec676d327c866b15a005117eb413f8bd9a2ba469168f370375a6f59924e54d6 +size 32055296 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..ba69e5c7cd66d4bffd23d78d686f14809e45ed1b --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc380640715939dc96b8b13963a9ecd88094e45f403e667c0b0b495528c8d728 +size 70778880 diff --git a/params_shard_98.bin b/params_shard_98.bin new file mode 100644 index 0000000000000000000000000000000000000000..2356ea33ee1286cc7bf6dfd025371cfbf3d97094 --- /dev/null +++ b/params_shard_98.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f7009afc1bfcde84a729bc5b48ba6338c1c7f6ea00828a25302ce2bd220e92 +size 141557760 diff --git a/params_shard_99.bin b/params_shard_99.bin new file mode 100644 index 0000000000000000000000000000000000000000..75699eb1903f3ac828fe36fb24d55e7ce4b36db6 --- /dev/null +++ b/params_shard_99.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4de3a97f06cbe995d12ce3d4b94760bdf1f0622a77fb34f01cfe5ec40d5be8f8 +size 27815936 diff --git a/private-llm-config.json b/private-llm-config.json new file mode 100644 index 0000000000000000000000000000000000000000..e065663d05adad9b7d5381bac4d3cb2847710225 --- /dev/null +++ b/private-llm-config.json @@ -0,0 +1,45 @@ +{ + "model_type": "qwen2", + "quantization": "GPTQ-Int4", + "model_config": { + "hidden_act": "silu", + "hidden_size": 5120, + "intermediate_size": 27648, + "num_attention_heads": 40, + "num_hidden_layers": 64, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000.0, + "vocab_size": 152064, + "tie_word_embeddings": false, + "context_window_size": 8192, + "prefill_chunk_size": 128, + "tensor_parallel_shards": 1, + "head_dim": 128, + "dtype": "float32", + "max_batch_size": 80 + }, + "vocab_size": 152064, + "context_window_size": 8192, + "sliding_window_size": -1, + "prefill_chunk_size": 128, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "mean_gen_len": 512, + "max_gen_len": 4096, + "shift_fill_factor": 0.3, + "temperature": 0.6, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "repetition_penalty": 1.2, + "top_p": 0.95, + "conv_template": "deepseek-r1-qwen", + "pad_token_id": 0, + "bos_token_id": 151646, + "eos_token_id": 151643, + "tokenizer_files": [ + "tokenizer.json", + "tokenizer_config.json" + ], + "version": "0.1.0" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8c31060551040e47a0b2e2407b95f268b4c47a0 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,195 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151645": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151646": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|EOT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151648": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151649": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 16384, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizer", + "unk_token": null, + "use_default_system_prompt": false +}