diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 962f55e9a99b0eab2926ce2f9af55fb37cea8a91..d5007d6641249879d89e1a0a5e175241d4693735 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,11 @@ --- +language: +- en license: llama3.1 +base_model: lolzinventor/Meta-Llama-3.1-8B-SurviveV3 +base_model_relation: quantized +library_name: mlc-llm +pipeline_tag: text-generation --- + +3-bit [OmniQuant](https://arxiv.org/abs/2308.13137) quantized version of [Meta-Llama-3.1-8B-SurviveV3](https://huggingface.co/lolzinventor/Meta-Llama-3.1-8B-SurviveV3) for inference with the [Private LLM](http://privatellm.app) app. diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..2f98e357c05745bf2cf79816dbac3b1d79de5789 --- /dev/null +++ b/config.json @@ -0,0 +1,5 @@ +{ + "quantization_config": { + "bits": 3 + } +} diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..e4df1e036e9c4281404488957556878d89cd57ab --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,4311 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 3631664128.0, + "BitsPerParam": 2.6739310072364444 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 211365888, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 128256, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 211365888, + "byteOffset": 0 + } + ], + "md5sum": "bc79e62ae00d801443df8bbaa482f8e7" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "19807ce94df9f3940ad0c8dc822f8cc6" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "51a8acc3d127956e364147967897a8b7" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29369856, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 128256, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 26420736, + "byteOffset": 0 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26420736 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 26428928 + } + ], + "md5sum": "687ee5596690fab409dc44fd76bd32dd" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "4deec00f834c5a05e9fb65672af97e48" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "3d0149ef0ba5a32ec3d705c0fcbf618c" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 5914624 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 12664832 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 13508608 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 23633920 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "e5394d336773f3d2f1a98416fb9c76d0" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 211365888, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128256, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 211365888, + "byteOffset": 0 + } + ], + "md5sum": "8959825b4b03ae8ca5744ed837b9f392" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 26420736, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128256, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 26420736, + "byteOffset": 0 + } + ], + "md5sum": "6d8b6fc0fca1af54e9cb9279fa85e400" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "23cf262dcbe5103b0bc06e2497a2ea1a" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "a44cdf7bb85d7b114b8528b90790c967" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 27856896, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24907776 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24915968 + } + ], + "md5sum": "9728c00632e20c51cbc1762188edcc4e" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "36f310c612b138733c8dc630c85d70c2" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "011a2f70bedbab55cce817a0684e09a3" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "41456df325e3350aa598569b2a2b051f" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "859fb619eeeb33e3e2805512d89de74e" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "db1e5392ac1cd55545175b5010772ee8" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "23fdc94869c1aeabd21cbcc0e1e240e6" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "389ea64dd3badb584a95de6bb6897707" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "7e9d1da0d6049ceeeeac66c0a8cc675d" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "e5a2e5a3b65e3d12c036913243dd3a16" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "2c8435eeecdc558b11a8a2d03002f09f" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "cce795503d8372e41a812296491ec7d5" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "34d3ec2f239ceba9b1c4f2a4e7bf8396" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "04a4c648f54e7be513784f824623f1d2" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "db81fdc80078e32f89bb55698974dce0" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "36eb0dd65023c2d92bfb97fd045e40f7" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "1618fa35e1d12dc1624cb9f28e2dca92" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "c1a6e621e13a00c0eb27fb3d2c321c11" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "bebc2cf7500280ebe39e97e518c68ce9" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "8341abb2f0e6a14d2c57da44e13c7b28" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "1c0bbc40b5ef683fc9b0b9099fc44dcd" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "687449d5fb2d3a6fa11ce5fcf44aa317" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "d65a169ff574bf56294294422ce81f7f" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 30806016, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 24899584 + } + ], + "md5sum": "6441493db84d7b0351ccbc5745e63f15" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "c14df4b7b1b6d3b91b7cfd9eca8b5f6f" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "893a022985cdb7f083326fc1aee85581" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "c87826103ca7e4839a503ada4144b571" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "3cda5c004a0caf33ce038b90a33e4627" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "5bca5529ac22e7670b373ad6bc512af7" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "cb4a38a8d3c53b35d27d2cad668c6284" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "27f8282a809f5105407d8820afd31d7a" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "39a33465a712835b49a7640c62a85467" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "c24f0df17512f099463673a6d3abe369" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "0b4fcc3fb4facce1ba7be87c7ebad920" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "a3b8ee677ed7ea335743d2cdb4343d66" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "c07961e4dae1bd6240de520884d585a7" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "eebb031df612c494ce0e240cf8ebc299" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "bd16a55adf1a2cdb846c29ea47fc6dee" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "f928f99ecdbd04e260ed260c4c3db012" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "9ed4c0848523535d8d602eaa92d9e71e" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "e336b54e59fddf96e6dcc226864a5084" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "82f821934e1c4f51549bc6bac105ef80" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "0ef09cd1e4e46f3a2f4acd946fe2b70b" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "5980275be452eac6a7c1aa4de62ce9ac" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "074d655db43eb8380250468d60bb61e2" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "89b484f12ee5dfdb9def27aec8105554" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "ab648be570676d17f7f411947dd7dfc2" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "151635d7850862f62ee02c6be2dc1272" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "806a640944fa87cc176788cc40a7e3be" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "bbb4701235e7782a416ef103df62c888" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "347665644090940d6d7b3758150b5ff8" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 30375936, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 18984960 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 29110272 + } + ], + "md5sum": "6dfc35c0eabb250a57ee5957e7fbec9c" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 31129600, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 6750208 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7593984 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 7602176 + } + ], + "md5sum": "e3c6cfcdfe3d70ffd6bafc0ee1a62041" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "8a13b84794bf309243aef9bcac8f8748" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29425664, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2940928 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 2949120 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 2957312 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 26484736 + } + ], + "md5sum": "18addea6f6982d74e8271498c42a29b5" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "a1499b4c4b0efab8b485e7060de7fd48" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "4ddb42a6933243041be2475e43af7288" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "e3ca5a93a08a886a5b52d3e6e24a6dae" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "d9c4b0f2d3e47450587df93b78f1b57b" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 32391168, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5914624 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 5922816 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 29450240 + } + ], + "md5sum": "a27d908c7051dde896c886fb2cee9839" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "cb6de3ffddd5f029ae7a7de4d87080bd" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "034311577e3141b13d2e70ae0c06f46f" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "cafa364b4827110520678ffa60946cae" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "40a4e672e0e1a7cdab15391a0d17a616" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "b90e94553b26ef801d4f8fd50b957197" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "501d65d790bcf8a7b4bfea5624b77e25" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "348e601a3998211926e024e19de42132" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "925e37a183ca7c2c16195dae1ff29967" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "3a9f78278690ce302a19dd831b816a48" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "73b86d686ca6b53a5a8cc63a388fa794" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "43b8ee4aa46ac5ad5918c47fa8607002" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "4ae076948bc038aa5e2d71f8ee2e19fd" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "5a0a92ca6ce3f32b895c6d8b93d87b37" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "3ff3a8435eb7626d921ef1ccc1f89a01" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "7540c84e853348130e0cc41632293e38" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "b7da6ba34c962b56adf13920ef7e9a92" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "b4fa6db70128293ffb2f89927f601c6a" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "1721c036edb347f1d40ea06e42502e81" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "9c7fd77b1769009ec52a9ff7fcbab785" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "4bbdcd6cc2f38a49a1f361fbc61b55ae" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "767df9f9489e598ba55e5fc78d6234a3" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "812cdacc1baa7c5e7c0c940d6fa57b2b" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "c63bfb54bf054a364e4b5d5ebab2bf6c" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "0009e3f67e2937d09fa4e4aaf5f32eee" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "9c11636bacd358323eafe7b755bebd09" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "3e493523e9865f7ea8073fb9e21d053a" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "1e910a90b54e2a32e20aa56a439b098b" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 24899584, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + } + ], + "md5sum": "47a7e0d0a9ce3964b4c0238811ed7d7e" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..338ef2e280f2141ca7983fa324afe74af90ae193 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6216733bb8ac2ea3a2f91e5b306b51dbb0f852e7b8cb6658d20ec537d37fb87 +size 211365888 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e581f24939e504bd837131c2672fac99576f4dd --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17389854c535fb83cdccdb4e0518bbf89aa5feea01ba4ca0317d6fe0a43e986a +size 23527424 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e2ba2e742f64315133ca046ffa0166079b2132a --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b71eee6aaf969fec653a941c94938310f4d26e3d744b72c9a5e8fc136509d887 +size 47251456 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..14656f961b3bc0906061777e1568a8c44a675ad9 --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f2a76850df26f7c91a055c924bc67ef8e0089fba7e0a5f44b93c01aadf1d4c3 +size 27856896 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..002e911f1296518678ace24cba6c5cea67ad68ae --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffc5a0d462061bc7f5765a0db4fc1ad6add3a6815ada9e3e3ce8cf6b040d5bee +size 23527424 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..fe63b616ff404c6fe90194a1a88b202e5fb48534 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf54c9fe2a2d8b6d36fd966e2c36b251bca4cd9d8359ad7e80322da27c9029d3 +size 47251456 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..ba4c31055d076ed52000e10c90efbfc81ac5bc07 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4b6004bb9d00c871565e59ea3b20259dbc06645139c57f86e7dc20fbd0fab75 +size 27848704 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..36cdb7f47c0c7189b40dd0bbcd7d44b23c5f5b81 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed0fcd31de2f8459d0cb70726cd33cc9b377b27383aebd4e919c866a87a6c812 +size 23527424 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..5044cd2e541514a79edfbbf32e5dc75b548f0f44 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696290a8dfd2fc64cb27f265f5bdaec9298869fa6b610a5b4bb1bab7120a0ff4 +size 47251456 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..a1c5ee52228372be43fa4ef7f591be9a8d3da266 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3596d2cb0c8ed9e5ae62721b2e956fcfd8e6c9e203b5d8bb4d89079f464207b3 +size 27848704 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..08ece8c09b3946d017daed5b2d818ee2fb1db6ca --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1971ffea7600cb388f6431267cb3734de2af6d930a2f4c00981aedf2cf31632 +size 23527424 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..76e5b88ad73c178612a56441779079a10d292c05 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7266800ef9e5e1aaa2eef9f3c9321087205d208ed849b2f8e9d7ee1bf6506949 +size 47251456 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd29daba6ee48f409923dec948f694487490d013 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17fc0b29178c1703ef19ca9ac12217d840436bc186b69a2281a5fb41666383bd +size 47251456 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..7a86676180853b7a60c770fa0e58232b895c1dbc --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40893384cbb5a917f1b2a6cf11fac89eaddc4875ce7e9000493c9228183f14ac +size 27848704 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..636bfd4227142b03cbb713475edb56c1ff08c5fc --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93143ea40b7236127a5075c6ca1a3bace9bad97fd418d6c20bcda1ee60b360f1 +size 23527424 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..b21b2d4fd7c621443003b6a6ec0c9fb00bc370ea --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac856ee06d6b664f97f2b1311004a1bdf8bd7703cfe18f0dade903386cf8fd0b +size 47251456 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..2b2e93ccf44b8c3769e14b943141b9b0a8552381 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2696aef93b6bd23371d2a497a662a3da4500ead0e9883e8677264b6a8d2d51fe +size 27848704 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..a31dd547b7fc19c9c000d4aece1a95043899b4b6 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12b1f80003d82faf4e25a824e709818b917ad9e54aeec94b06c628ab22f9f062 +size 23527424 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..c940d6ea0c85821dbd2da6d2e4c053573153a62d --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f54861be818ab7795300ae8ef5243e8bf696993162e928211cff4d40b10a4924 +size 47251456 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..e2a5fa641b0ebbfaaf4cbc265bd264079bd05551 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e71cfa70d89eaa1c32911c31cd40d3ca0f8d58d33e29588f464f8f80f11903ec +size 27848704 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..4da4fb9e0136e5760b2516bb6ea3c0cb8550e2a3 --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:346a9648ae1ec2516cd7d030157552eaaeaaf188e57de880b73171ac4cf1a40b +size 23527424 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..d768c31f60fcb27bb852c383aca6055035d419e8 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:181ff3122882d21e20049d5e8ece13a5014dd592f44e7258946faa6e744a0c31 +size 47251456 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..3ad6bc3ca87f8d9dcc40b8074728831285b180e4 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f65bcffcefc9d656111fcddedeb4a9d541c3f55c478692de91b9692ce2b9dfd +size 27848704 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..5fcc1e990a5feb2c78febc527b715fde9730e5a4 --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e94450fa78a5b000555d1cf3250ee0d5aa53bc3ab31ef9be805a65c8c340a26 +size 29369856 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..47014a234efd9ea6ec006e9c8ac5805e314b0ff8 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80dd78fd877be8fc99e9f8f34b18cc060161a2bdff192074c856bdcf18f78011 +size 23527424 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..302875848d235e316924c63f970f5d36d401e721 --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbd35f7c6304187cc68f52df9f0ca480d04d7f0e3bd583442c0c580d43b3d35c +size 47251456 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..0464cd38a9516c29bace1d57c9b78b7699b146ff --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e95e203c44cb7867a6976da5edef565982c76d97fc084cca5b35aeeb463f7147 +size 27848704 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..4380f8dc174383a60519f29d32318a942e2943e2 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:804f08ba7d783895251170525e84660913e23277d4419e394ef90678d38f9662 +size 47251456 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..aa3cc1331d6ddf9e2495c84c272649e347990e0c --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dc8da9e6ca37cf87c48103ede7190ad49ba3a3fac0b2a56201a983b0849bb74 +size 30806016 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..8904d5b4ab05e432cdc8d97f8eeec9fcf1ccde29 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8ddaf1bba9bc54b1dc879b0e48a90b4b3d73e4a80543392e2c97cc780a902a +size 23527424 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..864abf8b4ea8045bd8a13fc0cd4a97c5e4d556e9 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd90be729c8895ff38411a3f4d3a29f9d7cb0ab959abc0791f68ab3f8bdc0b57 +size 47251456 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..641045cfadab054c0bdf04d0b4cc6d4d619c4c78 --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3759c7d3137f9898854a71e61975823511c63c8110a58b6127a46ab7abe9a2a9 +size 27848704 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..045c72a34fd169431bf269fd081c50862e087bb2 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99f0e645daaf8f8298ba5b6680464873f59648a47d72fe950f452da13b58c903 +size 23527424 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..18c001f5fe22bf60537527cb2d87eaf46fb96e9f --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a0185cca900eb63b15e3dd63aca07affbd00c6c42dd10a4d1fe389c3ae1481 +size 47251456 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..221bce404195a46b823ad54ca5cb81672fb97111 --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:065cffad34dea630e6e70542fb0be9284fb35e2f344b7bd740977bc0ab1f2b35 +size 23527424 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..5bd198e0f36e67ca3943a22927865f615f8d4f4f --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dcb5e2ba43ef9fce8eb688e892005663e41d0328b1a3d1157e319a92155d90b +size 27848704 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..e99d214bf423f453cafc75fac68f0dcda842a389 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:befd8e342fdd336170a33a7ba8f8d180cc97738428fb5cb3de98a6a9496be423 +size 23527424 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fafd88c9de594adcd591f5782257341f664c6c7 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb822a08b873fbc16f98e8d2c6d65100a5441f82c13e4756bcb9b147984cbc80 +size 47251456 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..1df90426c044223d03ca5ab1f81d9556c59c63f0 --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:617fded2fa74903554005a5614db1d0ad8067550dd4665c37abe798115eb5271 +size 27848704 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..b9b43ff5053f02d7dc80780cc8f5961c3abeebb9 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb5e8b5f91e3b1f7f79a8476901817efe33c0c0e541fbfbec5bbc2622ca59ad4 +size 23527424 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..0363a9ceb3dd73e6765b8cd9ab0bfebc8537d611 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e74af897040286b123cbdd593abc20bd87ae9d7487da72dbf8085614e9c4f4d1 +size 47251456 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..9509eb2df2c64bc2bb4c3c560b348161499c6c00 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e0d256fc07d953b516f83905871510c6370a721be7710587ae414a51c08174 +size 27848704 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..1113e59b543b423f5ddb6e80d4264c81fbddddea --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87bfbad321dc455ac5aed31700524d64b362bcb05e8ce24d38afe4366caa7bc6 +size 23527424 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..efc8ed9842b914a5f02e3ad14660916af693939f --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a05e97cb12a37ce9428c75d366b48c2d9c9bfbfd587cb4f06796a601bb188bda +size 47251456 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a952db405a6990de6d66c04ed6f49669d28eec7 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2666fcbf4c23874800b6a0ef8cc64a4f500edc8a9da65812a642f0a68a1cb6d3 +size 27848704 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..0833aa45fef6c334eda8b43817ca0bf239137575 --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a598a01a06c490374c18b209ce311fba391254b4f2c00c7a447dd8326ec751e +size 47251456 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..3aca04633cf6c4eb1c19bf2ccb397f0d89583436 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:782236bb87a1decdf73571ddbf9c29c30866f2bf8843b25f933e87c619389931 +size 23527424 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..01558f3547eeac5f07393ca50dbd8e585b11b81a --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00465d8209c0423f85a8f44f0a44272cdc01c605f2d20fa68e386c504e04b030 +size 47251456 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..edff8b9f0401533d50033caabd1601d3384fce8d --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f946bc6f10eac2c11ebc66e228a0bb8bc45e442f86aa399e509aa11a0e7ac64b +size 27848704 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..61bfadb46fd0da50cd480a32f525ee543cebc990 --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e30bc836b27b3baa1731cd524a4f2945cdc518406e3b4708d2c46bb0c01f6641 +size 23527424 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..03e80a962538d28bd2d91985b13986f2ba1a0540 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4597a62016cf85fc3609222b893c9b65bf282f38c8d55bfd7d019dd58f65fd6 +size 47251456 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b556e0d9fb613494a6b1f7a7bb6eb7ad4609a23 --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d66ae8726a8adae9a14b2f335c258a0e758f7ca5520d58fe19aa6dd972eaeda +size 27848704 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b2f3b38dd117fee4172c049d8ccc5d5c57f30b3 --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5a5f41a6b966f2ba666c0f28198816cd4f06619167628423dfe6aa07a29a185 +size 23527424 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..0ca528c74cec4c64879737e63f1a4e6500f863fb --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d0ebb897ec658a9ee70c2354c197d0e23afc9e459807680023fff6edf780cf7 +size 47251456 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..be2091ee3caeff38e9ca05d578f7671049637c02 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f171a42f711c669e72fc61403ad2b7babdf15a2c5f58b73e7e0c0499df787b8 +size 27848704 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..30290bba35ced157115b549742e9436cc4841226 --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0fd2d6c1afaeff00d1abd29ec7384e8b4df9247d9ea975771bcfa6650333a58 +size 23527424 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..83d7f081bb0249ed6f28158b4d2f79262391103d --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8238c05a8c04cf7a780c8f21e273fc3f4edee1b349fd034c77f864ff6945c0a +size 27848704 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..35c10f7d96f610cb07dd99f80782946111e2e1c1 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23019dfd6ad192ccc8fd16d43b365281e27d210b83f71dcd1ae76d100e57a958 +size 47251456 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..9781c0d061b49808e3993f0e28481069d47aaa0d --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6735bf7725477fdc1c321f188d20b22310406a94846ea535f6ba7ae20cdd7d88 +size 27848704 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a631ebfe927d1f0edf64c17090ef59b616e9b72 --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:211e744e25bd77bb8825183e1a11d2600a5a6f3b3794fbe34ad4bb64da5624f0 +size 30375936 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..4ae23962815a4a5545f833dd1ba889dc9c458820 --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10feb33097128dce403f539dfade03955cc9352ab2a4d9efc6c7769831929e1b +size 31129600 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..4041980a561d8c2925cfae5871063b25ddac1837 --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ac4cd50438096f3adfe28a6dddfd22d25c6e31f2657124931ded0fc10639a86 +size 47251456 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..edaeedb1c827c32e6665a182f7c8ae4d335bc667 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:940dc92998562a1a44fbfc4251b6c14480327fc654f8775f7b04c0f620a32b5f +size 29425664 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..765124cf6178cdf9d03c9046aa27762ff3c2b40e --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:269cb590a67e2df6102483ee0960cb8af4c87fbf96d45aa8303d5cb1367b4257 +size 23527424 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..a2d14a8147d00cc5028887704532948935f6debe --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:473c4df2bc935290dca88ac7ec74a8e80df97057573a66017ab463c197b0dde8 +size 47251456 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..410c773e150e1d639746fb1d26eac20e25284cb7 --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4c6ae1b16f9d954a45adeb9b2f33f7d7998e88b0877b61f23a8d33fead241f3 +size 27848704 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..9ae631262adebd18a07b76cb5ad3cf0e8bad2801 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23478cbe1eea7a5d7a0efe8a6cee5540f539513cc5502387456c87bb8cc71722 +size 47251456 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..93b6784c11555c851d6492103e6398032a8c87cc --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2144c3dc87ea8e0c5e8115993f7a603cefa73af2cba876a7b79d4e14e0cd1ad5 +size 211365888 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0554b628e2b2b60c441765eaa385132bc66386c --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d97e701adf6b8f786a3102b97cb4a7fb545ec32e544d17ec2cb90297c7878aea +size 32391168 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..c16592a64718ad0e647985b107bbb2b31338066b --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347c3dd228ed27e4a810908f147393fa70a120870ee19ef1e3c8e5bea497ada2 +size 23527424 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4f288789a2ddadce38b684a2b2d8c34d4cadf8f --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23ceb4cc3f99dc9c94ba1cad853a65217f49d964c4157ff2593300af96d304b9 +size 47251456 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..f9897da2a3c0e414d4e839fa626076065003d907 --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:799397883d0f74c36c544e6590c3a8fa9394bca89a799441c6e374e8402c9d99 +size 27848704 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..f4cd1bfa8da71cb810ec1cce43ed7ed510f68793 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3700447e2d018a5f1d33ff14c09341dedcbd9f590fc06641936f82f8b69dd647 +size 23527424 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..a7f68d7eddd57a7e22f601a442a789a2f395c91c --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abf746b3aff72378c7736ab2e5d73d1b294bbe3c268e53af8ad2b0b8f3448240 +size 47251456 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..17792132eea6ac00595898a4769cea2c8c6b0588 --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a5cb5bedd61487f3ef3717860cde6034c94948f6b33e2a941ec5acb2e81bb12 +size 27848704 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..d68520fc6cafac55904215627e954ff5a4293918 --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c0e61981b82cb948a8588c052d1a38d1952e400465ec2cfc60d30eb4bb247ba +size 23527424 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..308197e7477228f171c47d5224e9198fc57f65b0 --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea8ca3c9785b527b2b2dfcee488e8054f29fc9f2b716d0c161cce05271398a9d +size 47251456 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..56031076263f6061429a87f83b09e7611b107c83 --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7438d9ce3b120afccc836db66cae6a026b4c510577ce8285a3ab3071a9a92ce0 +size 27848704 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..17852af77b4d5b74b6a4c033afec28c0b28d6d4e --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a17390bd01db27db9193b47ead2b2df80530669ad3686d7eca186f9c1fd308b +size 26420736 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..b062e613b6c4dd5d1c5958888b4ce6ef78e1f9e6 --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74d8a64cadfbf5cf7daf3acb12757673eecedf2034234424d81ccf0616d4cc27 +size 23527424 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..4bd0c89e232c1a89aab29ace1c72e4a70e291100 --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a30db5752f7a9159621e2e4b7d3456796af49cdacc5f90334d35069ee718b46e +size 47251456 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..3ad88ce53a39d7c8c55697a38c74437728a890a8 --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:470e5184a78f359ed2f30991e64ec41617e5f0d78fe0f0a8c3c9911446aa5984 +size 27848704 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..e129a2104e80b4497ed00a78248ac274c49659a2 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d936468830d659e3e876d1144be385c99b0fb82a169cf5650c9bd84f70e1391f +size 23527424 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..739ad2ee6935af8c495b89e1e858a24f40407e35 --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77a3b47d0eba1ce7c73d6eae6d600314b1ef8848f53190815ab92d58af623d78 +size 47251456 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..95168deef4a78c907ad9143e58bdd946c9ac0108 --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c009d2e2cf8d1823d9be2007d3a138190c884e645a59fb5ab78c7e0e7db1116 +size 27848704 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..cbac1141d551f80a4dce1809c3dc9210778c83c6 --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25e96eb43bc2b3a798b1f1bee1966ff19edc6cdf4a205c14187f0530512911c2 +size 23527424 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9b62ad70f25397a1e85f18f8513a2cdc364e614 --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11e0816166ae8f9173b3a41751b5583c302edb447f1f94e302fa14c9a903b714 +size 47251456 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..7082e66c36310655ff65620b2b2350da3ff4d3f8 --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbc76aedb71b46819673dc317a4a175f0d5d7777a202b192c90505456a111e87 +size 27848704 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..1dade64b27c775a569b72637e0c60ae1c9f6397a --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04f6ac7436d48d5fb226056d78d7c69d704b8a0c70556f4644de688875ea4530 +size 23527424 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..8b2ce5e8c04fe18e07225e45140943c13bc26cac --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e2dd66e7e6a6697de18e882d19e479f2b544c46fe15d16cbcab28c8ffec0003 +size 23527424 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..6ed4a78cec7b3bb39ffa1d3636d650fe4e8bbbe6 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a31fd544d5f7972ca568d22c1c4d22fd0365587622619bd263b4e775609763c +size 47251456 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..acee46f948c1668446b3fc374c79022bfea62eea --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3150f9e7f446be9b88b029bf347fc88aa2634f5b04d6d61725b5c402fc6b90e +size 27848704 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ba83235725836e16e447c42d7b1150647dfe9e1 --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a505eb5d3850d5d1c1ed7367e9a6e076b557d68c3cbbb3581111d13c4cf8fc1 +size 23527424 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..e76ca0d3a0ed6b7e3b7b181f2911979d62786c7e --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5f60d8683efac3d3e62aa4177fbddf6c7ac84befceb38c0a8ee8d273dfc29d4 +size 47251456 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..467c49a3d45559cd1dd4232b848e20cfdeb51239 --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:445a782883744e94a0acd5238ab623c7f80e9c013d46b2b27a37ee864d4ab71a +size 27848704 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..91cd72d8c9d58c029024c60cdecb9364d2faf1ec --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:547c07290a58416bce9617f391360278fc4eb786134411d8556fa00de4db8a9c +size 23527424 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..6346e60f3e530c253ac121ac630ad8fd438f0399 --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e038ac180706d30c627fc5aa25910aecc3d1d05ea653c91fb8e0d262c89b1c2 +size 47251456 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..106a6b0364cc47535efbafd4cf977512357146e3 --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c38a9874ebe8f067e06854a9093defd8680f5553a960cfb688f309182f6185d2 +size 27848704 diff --git a/params_shard_98.bin b/params_shard_98.bin new file mode 100644 index 0000000000000000000000000000000000000000..f4eb0634f1c76cc1b8a84c7e875f9ba418e5a3e7 --- /dev/null +++ b/params_shard_98.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d3018fae71bc54b73f0740b64d001f8236323ac9e0262f4d261e24ee6265e8b +size 24899584 diff --git a/private-llm-config.json b/private-llm-config.json new file mode 100644 index 0000000000000000000000000000000000000000..57335de42e23894bcbbecd75c52418cf470067e7 --- /dev/null +++ b/private-llm-config.json @@ -0,0 +1,51 @@ +{ + "model_type": "llama", + "quantization": "w3a16g40sym", + "model_config": { + "hidden_size": 4096, + "intermediate_size": 14336, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "rms_norm_eps": 1e-05, + "vocab_size": 128256, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "context_window_size": 4096, + "prefill_chunk_size": 128, + "num_key_value_heads": 8, + "head_dim": 128, + "tensor_parallel_shards": 1, + "pipeline_parallel_stages": 1, + "max_batch_size": 80 + }, + "vocab_size": 128256, + "context_window_size": 4096, + "sliding_window_size": -1, + "prefill_chunk_size": 128, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "mean_gen_len": 128, + "max_gen_len": 512, + "shift_fill_factor": 0.3, + "temperature": 0.6, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "repetition_penalty": 1.0, + "top_p": 0.9, + "conv_template": "llama-3", + "pad_token_id": 0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "tokenizer_files": [ + "tokenizer.json", + "tokenizer_config.json" + ], + "version": "0.1.0" +} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..ebdd789a7eacede3c8658e5a0725edad0ef6523d --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2071 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eot_id|>", + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{ '<|begin_of_text|>' }}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|start_header_id|>system<|end_header_id|>\n\n' + system_message + '<|eot_id|>' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|start_header_id|>user<|end_header_id|>\n\n' + content + '<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|end_of_text|>' }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "legacy": false, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|end_of_text|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast" +}