diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 7be5fc7f47d5db027d120b8024982df93db95b74..339888d7ac180b1899739f32169fcd735d5d3528 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,11 @@ ---- -license: mit ---- +--- +language: +- en +license: mit +base_model: agentica-org/DeepCoder-14B-Preview +base_model_relation: quantized +library_name: mlc-llm +pipeline_tag: text-generation +--- + +4-bit GPTQ quantized version of [DeepCoder-14B-Preview](https://huggingface.co/agentica-org/DeepCoder-14B-Preview) for use with the [Private LLM app](https://privatellm.app/). diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..560f4d0b280a94488743c9c6184ecacad2f1bf32 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,7103 @@ +{ + "metadata": { + "ParamSize": 533, + "ParamBytes": 7617046528.0, + "BitsPerParam": 3.8073789790921078 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 640, + 152064 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "b4674daf61a60a9b559c512c324dfe33" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "7839fe93fe9de9247e9bb395640fc253" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "1e5c1d52466a5bcb52fe4a846340cf3d" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "5937eca514ebf8619935415a93d0b53d" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "34ff15ccbac9e354a06e9f271bb0362f" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "97eaffac050defd1dcfb85835048a380" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "61b02048cfa960b9e305043aaede305f" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 32956416, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 40, + 152064 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12165120, + "byteOffset": 0 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 12165120 + }, + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 12175360 + }, + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13281280 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15493120 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15503360 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 15513600 + }, + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 16619520 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18831360 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 18841600 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 18855936 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19429376 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 32536576 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32946176 + } + ], + "md5sum": "28af345c0a30992425762af71b3b4fc3" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "715a54d350f1cb60c6a93a70e14c6991" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9b36105752dd33d69ce730957243176c" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b6ccea183f1c72c6de025180ff5435b3" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "26edaebe65d5e75422743d5995935251" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "fbc49eef53537cf4279d66a2d9a1c40f" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 32407552, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12165120, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 12165120 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 12175360 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13281280 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15493120 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 15503360 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 15517696 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 16091136 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29198336 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 29607936 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 31819776 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 31834112 + } + ], + "md5sum": "4889dacbcb77e6f1c2eb09e0e9e3d712" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "1fa819acf0582937fc8c3e24784aa9aa" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6f1b1e29cac778242b9f57190a292642" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "92d484bc9f8898a8ec41bcba8527cbc2" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "14f4d3cc8195986e96ff4d74242b7244" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "fcebb8567e0f56026af778e8ac6e54b4" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9cdd470733359cfd16caf18de3f61b66" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 33202176, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14632960 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14643200 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 14653440 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 15759360 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17971200 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17981440 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17995776 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18569216 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31676416 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32086016 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 32096256 + } + ], + "md5sum": "6c97c3af9f76bc1699aa3cf101ad9a0a" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "c3d1be35d36a35d418703db856e5b0e0" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "5986b630ba2cd6f4423a8002c73629b4" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3ef16436bd1dbad39d34ed18c673739f" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8ceb2cf18bc97e95dbd3949f603b5b2b" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6c9dd46c89ec32c36246588d7335b9d6" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4ad09263b5cbce8de88eed57db11e652" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "2c3b9d15fb0e09905bccd2ef369d0b87" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "5dddcb193f67f02cba4a122e8d270f90" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c460fd2c88d211479512a723e8b86b5e" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fa0ff5587f86528c60af775f533feb55" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "90c89f13fde6fe189208dbe1f0778bdd" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c0b7817b0084a2abaec13a79f8824405" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "4c26f9b724a0765d93ba0b2a5f6c49eb" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 20781056, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16855040 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 16865280 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 17971200 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20183040 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20193280 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20207616 + } + ], + "md5sum": "ad9de8ccfbb25681462173bd22efe975" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "faed5a4d4f100271944a0a093d4fa67e" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4d4f853835d626b5a8acf859b6957ed7" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "bef00a768845a859ba83b24d2e94efca" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "5fe96d3000c636cec9106465196575f7" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "db4f614a26ae2dbb8ec73bbc33cade43" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "cb4697a264541eafac4ed20b770ccc26" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "4c89ade7a4d3597d8bf83ffb08beed74" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "492786998ec2ec4fbbf5925062f90358" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e6d488c803dce1d55b11b609022141a7" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "1228c0d9fa103ae49d4cc628242a1fa6" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7c173fdd1a887a1bb44fbd7935588687" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 31547392, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 30959616 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 30973952 + } + ], + "md5sum": "cc3f1bd082f1bb19977698dd3ac77f8f" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "4dfbae196ef1b06cccb0e8ab6eddeb35" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3039ccf4af87c3c85db67e6135271f50" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "848453f02b28f01e3bf5bbb3c5d19b25" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "a837a4d63ffbbe1cb300e2c71dcc94b9" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "452133efdd138c7ebcc285681aaaa7fc" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "a41c5ea9a5bbe4b89a407d4ce2fa1716" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "7584cf7539d976b10f74effd6112a805" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "dbf638cb5f931887818a56bc5b762bb3" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8f582efa6c0afd924be63043afe967b3" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a550c72758ece727c1c540c9eae971ec" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "ca7f3b2cbc7dba36a3984ac950def79a" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "52468dac20be88a32edad15106678de1" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "f9ea59d878146226b73dcd3fb38e860e" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "3e8a4ea076f69e6c4aa988f0085ef139" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "fa6bfa954bc21f5d9c6d3d84acd4e37c" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b256a0c6012783b0c5be3c8965ef5741" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7fa789cd6f65193083666f61e30735af" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "88fb5dd23841b4b726a67d471c66b677" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d49e90607ee038148fc449638c864f7a" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "013d6301c46b6b5d5725106b5e1a91c5" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "937f191c319ce0d241822ebba60da73c" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "696d359df81a295a51d0d0f8baa02655" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "163c7a37174c0b01b80fc29fb5227344" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ceb724796272bf748ceed7cadd77ff1f" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "13334c8fa552281332d316c2d95a8f54" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8d8edd7b7715aa1f7e837ef7233108da" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "407225ffb87f1ee5131e2c26ce6146d4" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "5cff06333e005ac82abbec2afb2b48d5" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4f5a69087e87495f25cb9340635c2492" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0198fc8f45475a34335b6d5751db99be" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6b2b0b7fcf767ea9f1769db6b00ba35e" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "5d6882c383d15e57c2d07f3d1453108d" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "afaea38689db3006429048f536fdac72" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13516800 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 15728640 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 15742976 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 16316416 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29423616 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 29833216 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 29843456 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30949376 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "de8ed83879133b0eb5ef35b24911eb03" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "23d35122dc691939160415449bdbed59" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "bcd3cb90aa300808d95fe11728eb9b50" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "1e699e011781e44b9345de73991a7da0" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0c5c63d447b5c41e392b2b9a67763fff" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c2c2797f69c8a472a059f1a7c904e7b1" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e92e0744bc889984c1d26dff4a1234d2" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "4bffeb3ea709bd3d72e3de5b5f0a8f1e" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "d496a370828f53c448a5647aea0366d1" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "abcaea7c0dad266379740d7b21d989d2" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "dac82b3fd1b782c47632dd01b92a1eee" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "36b7b156ae74ce4ac7fed7e0bbf303bb" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "ad08758b259ce8a60aab2f90f4aacfe9" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "655264c1e9fe3cdf4b60b1cdd983c2e7" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "f050803c546b2b89e27db57dbce6ae45" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "18d8a560523e9f4a800bfcd89dd6f31e" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "7a4f26b4999cf4604d46e9e5ba5adae1" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "23cb6be35935cee0a419692f06e22623" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "90fb54b71833747de9af16c5e40c4e1f" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "af2989046482d2c8dc9cdd6108b8d12a" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d698b9332c2e3c31800780f414a37fc1" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "1269e68c03a108126e684a0709c183af" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "9681dccb4240835f1b39aaff8aa8298e" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "880221a035d99bcb45f2ec4276993c44" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b9965b853dc02f57681969b6902dc032" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "6f7d6fe3ce1c08467f4ac71c26e42b82" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f26c5775527ed24bd5ee9d0db4acfc77" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a112c4cc77b7ad1a9000dbe41dce0366" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 33185792, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 30959616 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 33171456 + } + ], + "md5sum": "9cb68b679b77425953291111e784d941" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "4f0bff19323645301ad2c018ad79a134" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "2879e9370c79d7fc00ab52d37103af03" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0a34ad92e46a800e7677b89a0bc42ab1" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "59e305045996faddc43b13d1db45d3de" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "2e634693a7f9e111649b9c80fbe0bdb1" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 32669696, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 573440 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13680640 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14090240 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 14100480 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15206400 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15216640 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 15226880 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 16332800 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18544640 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 18554880 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 18569216 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 19142656 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 32249856 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32659456 + } + ], + "md5sum": "a83966e4728c399935ba9334dfbb16f8" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c137056d4bfe3b27e9f7a9f738c50aff" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 22265856, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 0 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 1105920 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 3317760 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 3328000 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 3342336 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 21692416 + } + ], + "md5sum": "36191bedd1ae7ccaed476bbdd08c37e1" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "011d06cdf298f2884bbc7d85b8d27c59" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fb59e7c7550a9bae6778cffa21e30c32" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d4499e09b631b7085481e1268ef9e79e" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "31f441bc0af0dfbd01d406d8e3c84f66" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "68e4cbf95aee91da862dc04129155d84" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "0ae05e7bd80906c1b34d78d50dccefe0" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "addeb39a6e2e3f72a929d36ddcd6848d" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "d4f9a765fe8d70a7de03fbc71945f611" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8ebade09312c7964880728fd5c74fae2" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c297acf8b0672b7c7124cd610f93c8e9" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "153c025583f0468bcbedfcf585504063" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4852963968808c8ec503ae7977fef1e7" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "70493b0efc9eae1b3a9455dd2868b58b" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "48649b63d3be926a45dec661c9a259c2" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "de145f136663a8b5f36dad7169ffba40" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "37e48d85c97a3ae7c87c40bcc18e9732" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "312f51e89ac100e17335937e59bd9e08" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "f6e55d268fff1481c36018080eabd708" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7328788b5b086355c5a4deaf10e63b98" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "1cbdd822db3b9494d2ac11008716f8df" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "05e76972f99b2ef47e7bd480b781ed16" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fdb6c093498a53f78c357132467d0b23" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "f93d716ca68a0e021b86a6d783e81809" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "db4dfbc805f84e94a66e129e2190a6bb" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "b01f08a3edfee298f0cd683bfd006b0d" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8565b05bfe652ab44c1f7911dba84c29" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13516800 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 15728640 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 15742976 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 16316416 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 29423616 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 29833216 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 29843456 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30949376 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "50a81acb3e1f9f1f0b4255550a4a4d21" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "c45a2397c04173b59f04ffcd77c6807e" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "00b06aa4bc619a1e2df22aa7b4cef0c4" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a65c0d7058a35b7f61f09a2825548835" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "2095f5b2c28b6debf9be1cfd242843d0" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "2918c0cf6ba95768fe9bc36097b961c5" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "586ae65479fe5f83b410df7bdb16d329" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "7009647a36fd9e754d06da6a3ffaa2dc" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "2a799e69dca3706458696f53b781a346" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "575600f7ca1be118190bdc05f248d5bd" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0e3958eb3bf3eceb02591b0601da9d7e" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "77b5eb2bc198d7b96335ec260bbee630" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "868db7fec66bc9039915af13cbce9de9" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7e8f7a8360452f622095e26ac90ed173" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "cb4a22a758c109923d077fe9f963ab5c" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "70819e15fdd98d783f7e1d3fb36f7b82" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "3db794d1211729631113262258b9249b" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5065b83e254846dbf274fb0ede5152e4" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "fcdcd68a15d1d94df47d6201cc7f7bf8" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "0fc65f1dfd4189ab8ee1192909cf734a" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ac03316527818a6b3f7bdb8b32a398b4" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 32075776, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30959616 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 30969856 + } + ], + "md5sum": "956ad1d2bd8130f3c4ab342023f00c07" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 21159936, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2222080 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 2236416 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20586496 + } + ], + "md5sum": "0b0218c25487b2484d5f1100a6566b02" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 1728, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c7710fbd9c88e07aae3b86795b3a14b2" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 27648 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "609fe3f42cf4898e93c331f424aacdd4" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ab8705acee4d52ae8905c823bbae6652" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3c2a1f7ac352ad6cfde8b84e95e3593e" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 31547392, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 108, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 13527040 + }, + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 14632960 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 16844800 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 16855040 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 16869376 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 17442816 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 30550016 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 30959616 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 30973952 + } + ], + "md5sum": "b309a22ce9f9e3b27aa1c379ff762c9a" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 13516800, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + } + ], + "md5sum": "ded0d827346eb598733b7aafea1ed5f4" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..54518e2d72558add90622c08156a854dc1b95df3 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:992a8115ae51149dc4e634ed730cde58055b2ee87acbf5976b0ff1f71b0eab6c +size 389283840 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..02db35b73fd5f8b7a754817c0f57b003e3fa693c --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33bf4a230f5f2620bf0926f97c851713618d3ccbd3b3f5c97830731852729167 +size 35389440 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..c557235d81eca425c8c2a602d0a22040a3b59753 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a122df236bc0fb62429f7dfd26ba7a2c5a6b051df88cec08153eecf21ff0b9b9 +size 18350080 diff --git a/params_shard_100.bin b/params_shard_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..3da2aad5027a473fcecb6b6318c04730e2b3e2e4 --- /dev/null +++ b/params_shard_100.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec925b61550665930a0bee2964021bb3f052b5c947d90fcf614808b63a27e71d +size 70778880 diff --git a/params_shard_101.bin b/params_shard_101.bin new file mode 100644 index 0000000000000000000000000000000000000000..093ffeee0a6976b54eebd7a7081c3bcb126e19a5 --- /dev/null +++ b/params_shard_101.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:233788c7d878877a57cae86502f7f0020faef1f5021a0ea6dff61947a3868e8e +size 32075776 diff --git a/params_shard_102.bin b/params_shard_102.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad6ae9d11b46510005173bffc99c19a7af8ed5ea --- /dev/null +++ b/params_shard_102.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bdf9b0fa9d3b60da8ca5dc980c1f1d3998f2206acee0b5eb885dd9c329384db +size 21159936 diff --git a/params_shard_103.bin b/params_shard_103.bin new file mode 100644 index 0000000000000000000000000000000000000000..cfdb000802f2b9e432baff77a63b90c869ed615e --- /dev/null +++ b/params_shard_103.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4efbc01054b6e26fa477bc05f7eeb4af35fe0b8b245a6c6fea1f9e5ad9c1d3 +size 35389440 diff --git a/params_shard_104.bin b/params_shard_104.bin new file mode 100644 index 0000000000000000000000000000000000000000..46220b6dcaee346560a6ee4fbd936156980eda81 --- /dev/null +++ b/params_shard_104.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f28925275f9f1d7a46dc31ef03454a87a0b86b4f5305a9b7b2d055c7016a217 +size 70778880 diff --git a/params_shard_105.bin b/params_shard_105.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e1c26a9893b05195fda7eacdeb82a0814276699 --- /dev/null +++ b/params_shard_105.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d998f285e8c5cb4a86341d6b5647afec75b529f3248ee172dcb72047612997b +size 18350080 diff --git a/params_shard_106.bin b/params_shard_106.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebcb9cd60a929fe20ff2f4c944cadf49d5d554ae --- /dev/null +++ b/params_shard_106.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebd7595ccb9d268d12ab4ae80e63a5a71c630faf2ba287624d0df74d4b7b94ae +size 70778880 diff --git a/params_shard_107.bin b/params_shard_107.bin new file mode 100644 index 0000000000000000000000000000000000000000..ce79e7f3b2de65bd41af0df50264587ad562fd9a --- /dev/null +++ b/params_shard_107.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:631a152730f04ff380e5d65f863e3fe202397a835038374e76764748d3814af6 +size 18350080 diff --git a/params_shard_108.bin b/params_shard_108.bin new file mode 100644 index 0000000000000000000000000000000000000000..febaa50ab41349d0754ec0bf82b2d5c859ca5b5c --- /dev/null +++ b/params_shard_108.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202ac237675908f69f7cc5e9b1e7d8089decc3f296be05cc9a011c83e4c8dd93 +size 33185792 diff --git a/params_shard_109.bin b/params_shard_109.bin new file mode 100644 index 0000000000000000000000000000000000000000..7843428d109c82b9162ff4be30d0b61692f62b5f --- /dev/null +++ b/params_shard_109.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b5f11cbc5ca792533f7ea830910f62437f1a5096eb18214fca03c7a8a0d675 +size 35389440 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..439fea6e19047a85b4b6632452bfe434620e9ecc --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71c081e20a95a4a822e662971d9a84110785ef506a4cd86316d513493decac6 +size 70778880 diff --git a/params_shard_110.bin b/params_shard_110.bin new file mode 100644 index 0000000000000000000000000000000000000000..24f8d788d2dae492879b9a14fbf4ae181d5409aa --- /dev/null +++ b/params_shard_110.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15a3d669fac6d18d8068ff04abbacfc72e474f1e7bcf302069776793b91da59a +size 35389440 diff --git a/params_shard_111.bin b/params_shard_111.bin new file mode 100644 index 0000000000000000000000000000000000000000..784756ac7be55d5a2330c593646972fef28bdcb2 --- /dev/null +++ b/params_shard_111.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7959dd2e6b77788bd504cdeae46b988045e13840ac8ac163b89423a368af0a56 +size 70778880 diff --git a/params_shard_112.bin b/params_shard_112.bin new file mode 100644 index 0000000000000000000000000000000000000000..9ee58bcddc4d6a50a3f176594a59f66073076b73 --- /dev/null +++ b/params_shard_112.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04154ffa0cc83ceb74624ff8099451f7d67fee08114b73168690d980ccd5eb5d +size 18350080 diff --git a/params_shard_113.bin b/params_shard_113.bin new file mode 100644 index 0000000000000000000000000000000000000000..89da7dd66f3c1533522433f4662de336a8a16011 --- /dev/null +++ b/params_shard_113.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6066ca7afab4c78e3303ea6eea021fc9017e7820f688edb0c95bc9fb8ba315dc +size 35389440 diff --git a/params_shard_114.bin b/params_shard_114.bin new file mode 100644 index 0000000000000000000000000000000000000000..637f7ac41a7355bbc3e36d34db4f947a38386fa3 --- /dev/null +++ b/params_shard_114.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1744f6641e50251031317631c0c7f03d3d210b69ecfada002787ffe1712d9760 +size 32669696 diff --git a/params_shard_115.bin b/params_shard_115.bin new file mode 100644 index 0000000000000000000000000000000000000000..32785ddb36474857268bd64952ac463f787b087f --- /dev/null +++ b/params_shard_115.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaac2df5ad9e329a08eeee585208b9d7a4490411c8c6795b2e3cd863accd7d26 +size 70778880 diff --git a/params_shard_116.bin b/params_shard_116.bin new file mode 100644 index 0000000000000000000000000000000000000000..d78fc9f90b600a10ba2686584a5e60e2ec545de9 --- /dev/null +++ b/params_shard_116.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c4a094301c9eace31481bcfd7ceb0333e10e86823f88dee4bf348289c99029d +size 22265856 diff --git a/params_shard_117.bin b/params_shard_117.bin new file mode 100644 index 0000000000000000000000000000000000000000..a3336929d66ea4cddc6dd8de7496421e3274d4fc --- /dev/null +++ b/params_shard_117.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58f30f6fb830bbad582a2f8cfac706e3c4448a501a3176208a96710c97cdbb96 +size 35389440 diff --git a/params_shard_118.bin b/params_shard_118.bin new file mode 100644 index 0000000000000000000000000000000000000000..507cd07f883ce7d38643805cc95c548a1f1ef0fe --- /dev/null +++ b/params_shard_118.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c711d95c3e35893b07ee9e97643e9ce609d4993cff5b9b143efe64e9cb177076 +size 70778880 diff --git a/params_shard_119.bin b/params_shard_119.bin new file mode 100644 index 0000000000000000000000000000000000000000..9c24ed1b630001dc2276d9d56402d4933768db04 --- /dev/null +++ b/params_shard_119.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2721090aeaab61fb9afaed8deeaab0d83b3a1ff48e1b7e4236c2af45e57b6c4a +size 18350080 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..4fc557e1969e64164b1b2bcd7049dee769227ac2 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce537a80921a4e6797c336fa9061e85cdccfabdd409ae2657064f20c6e924eb7 +size 18350080 diff --git a/params_shard_120.bin b/params_shard_120.bin new file mode 100644 index 0000000000000000000000000000000000000000..be8e9622824d7f16d9a17ee22bb0dedc965759b0 --- /dev/null +++ b/params_shard_120.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1972ed5ceef9006d5d70f9d2b7a74e66bdfc381dc649aa024a385b1414393af6 +size 35389440 diff --git a/params_shard_121.bin b/params_shard_121.bin new file mode 100644 index 0000000000000000000000000000000000000000..c5779f54a5191fea7dae01517b0d40fac8f39da2 --- /dev/null +++ b/params_shard_121.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53f62ddad2da62af84377ca255b7a72ee32591d73120e01d4d8c1d667fba74ab +size 70778880 diff --git a/params_shard_122.bin b/params_shard_122.bin new file mode 100644 index 0000000000000000000000000000000000000000..025ddc86411fe12b056dec8a3b126a367b837a61 --- /dev/null +++ b/params_shard_122.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3ec239270cf2f11d68ac6ba5f4460a732a745a33da202f1f442d208439336a3 +size 32075776 diff --git a/params_shard_123.bin b/params_shard_123.bin new file mode 100644 index 0000000000000000000000000000000000000000..20f546f9a7024703817d73c7a919e8f2df90a018 --- /dev/null +++ b/params_shard_123.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:401d669f66dc56ed3b2023e62a69e18cb6a2c854ad8976b17b8b66927619f884 +size 21159936 diff --git a/params_shard_124.bin b/params_shard_124.bin new file mode 100644 index 0000000000000000000000000000000000000000..cce7121b461409ff44f5f454fa5bdc0a1f9ac701 --- /dev/null +++ b/params_shard_124.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0eb49bf7520791b901ada7b5691973c1628d618df10494ce3b060a08dd89391 +size 35389440 diff --git a/params_shard_125.bin b/params_shard_125.bin new file mode 100644 index 0000000000000000000000000000000000000000..ba3c344eee53529c118805485719ecb0799cda8c --- /dev/null +++ b/params_shard_125.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c2afce74e714e72efae7b236a2504762be5f3e8e494257e70a3e6b690ffd7e5 +size 70778880 diff --git a/params_shard_126.bin b/params_shard_126.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e08f8e645522ba8d3f86b1a7d12f27a857d051c --- /dev/null +++ b/params_shard_126.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a16e19db344473e6ee6e14ee0dcd81b3bc294f269eeb7b48c40f3bda4760aa4b +size 18350080 diff --git a/params_shard_127.bin b/params_shard_127.bin new file mode 100644 index 0000000000000000000000000000000000000000..4601991e4129f07816b0bdad10b95584cc1cea6e --- /dev/null +++ b/params_shard_127.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b1c8302e0dd9d7bd7183033baa47ab93dc8477bf7791e2d0f8d1e6273921ce7 +size 35389440 diff --git a/params_shard_128.bin b/params_shard_128.bin new file mode 100644 index 0000000000000000000000000000000000000000..bab22db4ad085b0de8337b1aeffe1b3c783a8dcd --- /dev/null +++ b/params_shard_128.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65fa66c847281043b55f6aa1eb1adee551b93a0b1f7459a4a0cc7b3cfc1eaf82 +size 70778880 diff --git a/params_shard_129.bin b/params_shard_129.bin new file mode 100644 index 0000000000000000000000000000000000000000..349b628fe3b40cf62bc6162e8855f5f8add8ff66 --- /dev/null +++ b/params_shard_129.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43ca60470b1a3358b787d6bc9dfb6f793508c1373f1d40b88ce34bfe6653db70 +size 32075776 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..ab59cf1d025af89a48ff43bd4805f07a28f2aaa3 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53be584f189c4ff323071b81f13fdbb0e095ef3afcad1ad07fd51ade021bff51 +size 32407552 diff --git a/params_shard_130.bin b/params_shard_130.bin new file mode 100644 index 0000000000000000000000000000000000000000..d0d9c676f5e1174ea39ba22f6a8a6eb66ace9362 --- /dev/null +++ b/params_shard_130.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29b04a375f6e89c5ce1ea45c1c2299033f9e521327f481710f68f6d9945c8bf2 +size 21159936 diff --git a/params_shard_131.bin b/params_shard_131.bin new file mode 100644 index 0000000000000000000000000000000000000000..5465df21faec0a315f1497514c9aa6feb2e61206 --- /dev/null +++ b/params_shard_131.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7874a1b8b59b4f2c17c6e6101cd822d4a9f33a5b57dd1c1a07d8c5997e99244 +size 35389440 diff --git a/params_shard_132.bin b/params_shard_132.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a114e4750264ba8062f51229843cefb0c745e5f --- /dev/null +++ b/params_shard_132.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f00bb9890a664e2f62dd96c75c92fbb68c5f572381276283647629a62be8e88a +size 70778880 diff --git a/params_shard_133.bin b/params_shard_133.bin new file mode 100644 index 0000000000000000000000000000000000000000..2923c5d2853908ba70c255b6256d01b319045e0b --- /dev/null +++ b/params_shard_133.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b0f9ac81665090488f3519206a94fe446c35a25893673e975314b4a868a08e1 +size 18350080 diff --git a/params_shard_134.bin b/params_shard_134.bin new file mode 100644 index 0000000000000000000000000000000000000000..18593ad91507b3e9fdf3b82191819ba3a7ca5b5a --- /dev/null +++ b/params_shard_134.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d55ff3e1a0d082570e284f78a25396d4fcb05cf00cd708bc30fbb10309308868 +size 35389440 diff --git a/params_shard_135.bin b/params_shard_135.bin new file mode 100644 index 0000000000000000000000000000000000000000..3156d653c7ff5b90dc349ba1dbdae611cdb7467e --- /dev/null +++ b/params_shard_135.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82fa32f521c833afa2dc3b7ae757ec151d413be42638e4c3f694c95fda200ab5 +size 70778880 diff --git a/params_shard_136.bin b/params_shard_136.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a0a235c65432cf2e59aa1606cc56ca8024a1ee5 --- /dev/null +++ b/params_shard_136.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d8bf376314d8a13a0899afadcd9471c981de298f04d1371b094eeee275fb033 +size 32075776 diff --git a/params_shard_137.bin b/params_shard_137.bin new file mode 100644 index 0000000000000000000000000000000000000000..f2d3352cb59d680c2425122138d168dd3d995ff5 --- /dev/null +++ b/params_shard_137.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0a6ebf1208f227ab2d6c52ee9874db13f18700215a2a40eca2a6f930a284bb7 +size 21159936 diff --git a/params_shard_138.bin b/params_shard_138.bin new file mode 100644 index 0000000000000000000000000000000000000000..b481e7aaccc88da63a01dd702df3491384e87459 --- /dev/null +++ b/params_shard_138.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b527752714d28327c43d76685c5d3173ee9a08f28d0c6c46149465e97ff767c6 +size 70778880 diff --git a/params_shard_139.bin b/params_shard_139.bin new file mode 100644 index 0000000000000000000000000000000000000000..25676ae5764f45e58441d9c052e23cab28ef7e0e --- /dev/null +++ b/params_shard_139.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:534004a79a6f6b789fa8e4dc6042591d35933d465ba417ef3cf8d86a0038ba76 +size 18350080 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e08f60bd4bdbadfd5bad67cdb213b328d67dd58 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb408d02ce0a1c50c9ee5dfaae4afe7698b2f40fc4bd427ff3b781543239b910 +size 35389440 diff --git a/params_shard_140.bin b/params_shard_140.bin new file mode 100644 index 0000000000000000000000000000000000000000..586ab5ae765e0ed6efea255aebf226b26ed20f4b --- /dev/null +++ b/params_shard_140.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:555827688774f9041a83ed0af5858e58a5d08bf97cde6f01e021def4f3030243 +size 35389440 diff --git a/params_shard_141.bin b/params_shard_141.bin new file mode 100644 index 0000000000000000000000000000000000000000..ba841c8a13a98052064c1643c9cbc987f9780aba --- /dev/null +++ b/params_shard_141.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6963a93e7498fe09a8f23f9b7bf2ad6f6fd135cadeddf567c27a0c58d8899194 +size 35389440 diff --git a/params_shard_142.bin b/params_shard_142.bin new file mode 100644 index 0000000000000000000000000000000000000000..9f25f1b7f53e46f5b30d633eebab73cb872d4a24 --- /dev/null +++ b/params_shard_142.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74a7eb3edec51b1b3016e9ab436ba2340bf686b74d6add79f0b8c1316d07c8b5 +size 70778880 diff --git a/params_shard_143.bin b/params_shard_143.bin new file mode 100644 index 0000000000000000000000000000000000000000..ebafd7f81953ca35d2fb0645aa364cae95355fe4 --- /dev/null +++ b/params_shard_143.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22d3fc0c062905166b590e23c7b8aca70113508004dfc946b8d195dd963a70de +size 32075776 diff --git a/params_shard_144.bin b/params_shard_144.bin new file mode 100644 index 0000000000000000000000000000000000000000..59d8f47f53f6b3c5523dea9e109341b51be609e9 --- /dev/null +++ b/params_shard_144.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f07ab6cf70421b85e2f8da3d1eeaaf4098e0192a6fbadcb7124dd1da02703f44 +size 21159936 diff --git a/params_shard_145.bin b/params_shard_145.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c8511ee9b4a0a07b24b069d5b048dbb38780cdc --- /dev/null +++ b/params_shard_145.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d54def21ffbe1e9415335050e2b96d9c3a57de12198ed627ec576ba8d9044908 +size 35389440 diff --git a/params_shard_146.bin b/params_shard_146.bin new file mode 100644 index 0000000000000000000000000000000000000000..58dc737c423da28b0f2e38597a47acdded799dea --- /dev/null +++ b/params_shard_146.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dd7e2b239eee782e62407dd83410ef48db709330c7996eed7b6c159bb6ae72a +size 70778880 diff --git a/params_shard_147.bin b/params_shard_147.bin new file mode 100644 index 0000000000000000000000000000000000000000..00cfcc22479038ab7b4fe4ea281a8dae8ff38743 --- /dev/null +++ b/params_shard_147.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f3e2459f939826593525d8f403ab50ed90ab89e48cd1d48814596c8e1b8306b +size 18350080 diff --git a/params_shard_148.bin b/params_shard_148.bin new file mode 100644 index 0000000000000000000000000000000000000000..d504b6a3d82a3c68c1c858be1dd0e308f934ef91 --- /dev/null +++ b/params_shard_148.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32c3eda8e4a7a61ffa5154103d5dd9708a94d0de81ba9e115d5237dd00f6f4fa +size 35389440 diff --git a/params_shard_149.bin b/params_shard_149.bin new file mode 100644 index 0000000000000000000000000000000000000000..12aeff2bf5295dd72a7d39b537b0e9f69eca9ce8 --- /dev/null +++ b/params_shard_149.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db03554579471ed2ddf9a593379c9e6dfefc6dbf6431c1964afe419273be93b3 +size 70778880 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7281db8ee221bb6a6b24dbf05a6d4d5b7f5c38c --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ef3c4637817d54e8c3441940233cd855c0651d515e66d60a2bfd4bc6361db4e +size 35389440 diff --git a/params_shard_150.bin b/params_shard_150.bin new file mode 100644 index 0000000000000000000000000000000000000000..f1aead21505cccd650aa49ec6bf0036188dacb72 --- /dev/null +++ b/params_shard_150.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50845fcf9c259ca7e536ad59049bf731ab6edba0f4c878194ac94cb08facd755 +size 32075776 diff --git a/params_shard_151.bin b/params_shard_151.bin new file mode 100644 index 0000000000000000000000000000000000000000..b4a52d66192130e3f50e37b748fb9f635b1b73ae --- /dev/null +++ b/params_shard_151.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a7a422a6729c1f0ca4985abdfebb7f8570a53c98b633a9f325d4d8d0160d204 +size 21159936 diff --git a/params_shard_152.bin b/params_shard_152.bin new file mode 100644 index 0000000000000000000000000000000000000000..ca6216adc066c684514b81d00d090b444b748612 --- /dev/null +++ b/params_shard_152.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaaf2270677d7d869b481870f4c576184a1a3d532ff48e4c714e810e701224f5 +size 35389440 diff --git a/params_shard_153.bin b/params_shard_153.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e2b7bb21eb2f43fdd90446492f498ce4878e7fd --- /dev/null +++ b/params_shard_153.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09af0c0727c22b217e9ca25da7d0ff826593770c318da3647098f2efb5e467f2 +size 70778880 diff --git a/params_shard_154.bin b/params_shard_154.bin new file mode 100644 index 0000000000000000000000000000000000000000..72f13850319c63d3943917af9c354b5235d4c70c --- /dev/null +++ b/params_shard_154.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eae5ac7b35912fbb11b14b106f70ed08e8488ddd2e253538ed9819d11f806f2 +size 18350080 diff --git a/params_shard_155.bin b/params_shard_155.bin new file mode 100644 index 0000000000000000000000000000000000000000..d46faecf06b0ef6ab815fe84638b2d88ec1a7841 --- /dev/null +++ b/params_shard_155.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54dc5c17d73075e6cc97c1529add408d98af163317f4c279399f7dc9e468ac18 +size 35389440 diff --git a/params_shard_156.bin b/params_shard_156.bin new file mode 100644 index 0000000000000000000000000000000000000000..55a337834f57c9d23693fb930e76fcd17aa16695 --- /dev/null +++ b/params_shard_156.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7268d4950c8bb49bab912962af953d0e057bb0077202e0dfa82546e779d5aca +size 70778880 diff --git a/params_shard_157.bin b/params_shard_157.bin new file mode 100644 index 0000000000000000000000000000000000000000..d76c5f51e25fcb6185a90e205d730a705892c475 --- /dev/null +++ b/params_shard_157.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ae18e15b443ba66dbe90865dadf9f58876e9bdee470f2bea900643c712f85ff +size 32075776 diff --git a/params_shard_158.bin b/params_shard_158.bin new file mode 100644 index 0000000000000000000000000000000000000000..b437d8bb3dfa670aa5be16e2d4ebbc046656aa18 --- /dev/null +++ b/params_shard_158.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf42df0eadfa27f0727dff6908fdfdf59dffd2ec3ce2c930a63a62d0aa279088 +size 21159936 diff --git a/params_shard_159.bin b/params_shard_159.bin new file mode 100644 index 0000000000000000000000000000000000000000..8385b81a5205790066d4545d399e129b7d4f876e --- /dev/null +++ b/params_shard_159.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:573f406cbaf77b92e0cc5079b438664b4ab43b88ed357469872057aed6d66796 +size 35389440 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ec44b9bf8664d50bd4bb20d92b8eba197cc8b89 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:241d74d39b71f45978727d59ba6f186dd6fff24c24d7e7ce62f77d7325d2c74e +size 70778880 diff --git a/params_shard_160.bin b/params_shard_160.bin new file mode 100644 index 0000000000000000000000000000000000000000..c2500503f355ff9468109141f121ef59b5b93c4c --- /dev/null +++ b/params_shard_160.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47df03628bdd21d3d8704217792072a907a71b6633ec14a2837596be4244530d +size 70778880 diff --git a/params_shard_161.bin b/params_shard_161.bin new file mode 100644 index 0000000000000000000000000000000000000000..a36b8bfe73db3cfa5d2afb32251e2b9475703947 --- /dev/null +++ b/params_shard_161.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f5afff535ef29c8218e1b2498a2032f1eaeb1d9de7748a832b47defcd9fd726 +size 18350080 diff --git a/params_shard_162.bin b/params_shard_162.bin new file mode 100644 index 0000000000000000000000000000000000000000..842b92c34714a49e6afc2be1c9d52d2898d73171 --- /dev/null +++ b/params_shard_162.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:981d533191ceb49c2fda62e36ff357b2340b768b9950f413a0d465a7e183285a +size 35389440 diff --git a/params_shard_163.bin b/params_shard_163.bin new file mode 100644 index 0000000000000000000000000000000000000000..1466919e6b58a9de1b7fa410c5ee873bf25f1513 --- /dev/null +++ b/params_shard_163.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cba22a8ff24abd8c701c0d63fc954545a28bd81cdc775f8dfc4f24e7d3c17d9 +size 70778880 diff --git a/params_shard_164.bin b/params_shard_164.bin new file mode 100644 index 0000000000000000000000000000000000000000..f615c7f029fbc402fa57896c58be51f1090b53d6 --- /dev/null +++ b/params_shard_164.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffbccf9a4fa0fbcc8219a726bd6ce87e1c519a182b2a730fe3ef078336c0a0eb +size 32075776 diff --git a/params_shard_165.bin b/params_shard_165.bin new file mode 100644 index 0000000000000000000000000000000000000000..df1db283249e99792a5fdc5d29799a073a4e64e1 --- /dev/null +++ b/params_shard_165.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41c2fb3e9ec23441b9d25093e45e52de8def542d42ffb0056377f25e772bc19c +size 21159936 diff --git a/params_shard_166.bin b/params_shard_166.bin new file mode 100644 index 0000000000000000000000000000000000000000..03b34302e14bd29545594ffdb263e0be5165e0ef --- /dev/null +++ b/params_shard_166.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:817cbad713de55366ea6c51da3a521d26116663d5e9f925973c0a45842596fab +size 35389440 diff --git a/params_shard_167.bin b/params_shard_167.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4ab374a2a352b565e5bb94f8ffab6d74a0e79e9 --- /dev/null +++ b/params_shard_167.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:805584c18dafc4257c1a7c264b45f842d6e8d27aca17467a054c047c69101024 +size 70778880 diff --git a/params_shard_168.bin b/params_shard_168.bin new file mode 100644 index 0000000000000000000000000000000000000000..40369d195e4ea10b0dfa607844d05f2a2cd27455 --- /dev/null +++ b/params_shard_168.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150d2f8f640a3052a7cd6513acdead666167da5d1f5030a7a57e4e4f7f6edde3 +size 18350080 diff --git a/params_shard_169.bin b/params_shard_169.bin new file mode 100644 index 0000000000000000000000000000000000000000..1c4246799cdcc695a8b1586bbb9d93792861ae51 --- /dev/null +++ b/params_shard_169.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fb5e573b1fc030d157048f17e2a71c03737ec2105b43bd83278ad76650bf268 +size 18350080 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..5603943303ef9cb558c32f4e4f212db9ecc6e1cf --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aac737ee06034e6db3feea271e0377cb56143f471186cad1259ed5ed35437b82 +size 18350080 diff --git a/params_shard_170.bin b/params_shard_170.bin new file mode 100644 index 0000000000000000000000000000000000000000..4f4fdc782167ca1d1e8168100b9b408cb199694f --- /dev/null +++ b/params_shard_170.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b1653eb491f31a76121964e68d17503932ff6564716ef692e8b4242886f375c +size 31547392 diff --git a/params_shard_171.bin b/params_shard_171.bin new file mode 100644 index 0000000000000000000000000000000000000000..fec809c276f9d7b914672f8510ede2e440c9020f --- /dev/null +++ b/params_shard_171.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0a4d038ed81f503b2c1b683a3c2df2f12fb941173d737eb00508ce1e61871be +size 13516800 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..e3a72e1bd5f01fbb1649f188f211353173f77e07 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3457f26b5b38a36d322ea5b01882a25681346c6d0022576a86f27e3a293b4752 +size 35389440 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..7455405665b3bb9166aea5652e3b755cfd06ec7f --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:634844837d0fc4814baffeba5f64c8f7e4e6742c3d635b9c4e50952c91add6c6 +size 70778880 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..465c4f520da748391b6d05713289ee1c9a4888de --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15802b34ddbf5a596f8d7b65a8be727454f6e109f876f0d8dc0afaced5085a7a +size 70778880 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..ae5867e621141be59470f9a3e766684c25b6c688 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52fbe1553e2384efdfca5948210c4dd0df610f7eeb5d19f50aaa56fc817ff239 +size 33202176 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..fc5acd64669119663c0ad0fa83ef268f800bd5a8 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f6a8ef5e1c185543010baed94d83af17de3e9ee835e5d92c6ebecbce481b01 +size 21159936 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..825bfb9ee6214f67c3a1f4d75bef3658b4f4939e --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78854dd6372a92940b27c9d8b7a088e1d61c6c7285644fc8962b57530da39090 +size 35389440 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..3b2802e8a0ac6151df8bcac88e315bd93102ffaf --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc6df4da24ed2a680b9e8fa16a7dfa804c1f050a9810c7de55946e387738d0a +size 70778880 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..60200313d3d9ca89c0b287edbb52703257a23751 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07ed26a11f2e6cc453f345cb51811e020b11567308fd1d0b707be028cd471eac +size 18350080 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..3504aa82f2e30f7b50f49ea151e18d6d7446d026 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c880ca1362463cf70d73eff08292e6161566c9bd0aa22dba07c9efe13f4baa +size 35389440 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..9088a707a0215b09ded9af9d51504a0c15cecbdf --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5ed30500aa15f9572a998c4383be5a9acfd11ab159ff41f652268c2709f1ee +size 70778880 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..6af39ad635fbf88a11621538d72cbbde5e5193fc --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:411fbb46df4b375016cec7ab31441aed511fc07b63f8dc6b11e040e9ff0364b2 +size 32075776 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..21afad655aa02e55b31b74aa3b2b08101a9953a0 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1bbee3ad595d062188c0db1b5ad3a559d2085ac84a5c740deb610805260a9fb +size 21159936 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..eb9429391f04b9092e57a2699ade01bc0e96c11e --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb3ebaf0e2b0d7591423ac5a980f0736a6fc8a35653b00d731617af341641240 +size 35389440 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..f887f46763ad26e46ea13cb4492dedfbc2361cb2 --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7142b82c6dbc9afd430c68ba2ea68dbbdef110883be0258618c16925d02c2291 +size 35389440 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad57aa490832d1bf6a291e5d5ea7f19805b0b068 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8098632b7d7cebf8acca2499dfe4b04db36a2d92d8e05ddb44794b6648a55a50 +size 70778880 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..fbae95249a4dae7871d9b25a30d28ace1b8cc521 --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8edc6c7a0598a692336e79c11f17a122697a93ea5a37a006a3d4e9bccde93bb1 +size 35389440 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..31ff263bfa7d2ee4f0eed7a312b5ab5fec0bbeeb --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:958bc4519ff9ff8f90115cdd5dc85c9e701baeecd800c6ba569659a1e49620b3 +size 70778880 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..760bd18e7e6bb9206ba275028faccc183d5b380e --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8abab021ff03221b8e862d06d6f5eeda7633db341c476813013abd096cb57ec7 +size 18350080 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..28097f29406a9ddce080f0bfa641e123537e022d --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:127054d2068491d09734b36c2c6fb83bbf04a66010f8b7964e23d14f394a097f +size 20781056 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..03d431c043a0b872538f85acc2b09125d1026be3 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:261e4c5ddb234442c397ae14e7deb9738c581b828be2b4ec148537dd889389c6 +size 35389440 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a7cd42713663f5f8f0b5d56d00aa5abe95a90b9 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c72d932233e896547512f50faccbfc7e999f3652fe4206da928c84185947a35 +size 70778880 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..6d2c279260fa49c80cb8fff8eb08cbbfa9a0bad7 --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae43f9a335f7adeb25a91e003e7eb3f6affd717f255afb008f7e9dbc02cedea +size 18350080 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..b1a69586c27f784d96da7f8ad876179ddd970a9f --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecd6b7dae99430c79cbb3704dc12cc71e5778e2fa09fbaa97f0536694b9bba4f +size 35389440 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..6011be6a2a20735cb00a6b53ec77ce1aa92928ab --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03e2835598c3e92b6cee2fd2e7d19845b469d27448c3e9d47b49d0600798278f +size 70778880 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..6bd15d25c9e427d1adc16fa7ac6279cbf3f35ad7 --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e676bf5763233ea7b02094223ba21f518bd4389def7d80b95152815d71a1a45 +size 70778880 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..ba5eb007f5a6c1a063a0eb92f42c79a3efa6309b --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3606caa31d663c592906a95a789cea3c053b4295ca2f6621598f7ac8bd34e3d7 +size 32075776 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..18a4209bfe0132a982b9f9c07e7f18ae3c7020d5 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bd46226e54f58ab176176e1e04b50f2eee527494b80ac3f2798c30cf12e84ee +size 21159936 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..0f696f6cf53661c94295cef9b535104a20377264 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f2588c9448cfd4b4b62d9f6a1b3fe8ac1ac9c1ef67b7a5f513e3faa723f17e6 +size 35389440 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0e6ea6cfb8a0d5da1aec9accf7a754009528b86 --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e424381de92edbf0e155832dce2a94148e45a7d6615fe0b4b9e563cfaa4e7f46 +size 70778880 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..23abf79af5b92c860b1207448b2629e4c535c4b2 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6866056496845a8b3ac35de2be810559d44aa15956706812bf0f6206c3c6bec9 +size 18350080 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..1a6725cf45718cb95a3852d1ce6a7a7c9738706a --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b65fea338f76743ae402848ea9a43eb286da9f0780d35e3bb017919efd505ad3 +size 18350080 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..38b1d04216842591701f8ecce4c96bde1c711d48 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a2f6e8f5caa93145393ac9fd35ec4bb1315219c68aa75cd18c3d62a85cd3223 +size 31547392 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4a3fb9f15cc334b6a0704a812c2286f1475e18d --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c50e29b88d0f8b90e8866c770934a9804907179f157e6a5910760b7aba773208 +size 35389440 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..c8f082b798e27a1192db832550cb13a0c95625e8 --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a1d852befe9f429a88d654b1eb9bf03d3d62cca22d54a2c3cdb77a7b232382c +size 70778880 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..489e9c30d62f2670cd25545089e604f00c0ffdd4 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77c166499826e1b20244a1cd3d56793bf539d58bcd90319e39ef6d27842dc017 +size 18350080 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..bf8f81a3f79cbb5e6612932d1ed478612c61019c --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e23c9071bb0fcf202a6abe5ffe94f45bdfc8c0b2806e46a215ef4fa13a581246 +size 18350080 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..f4302512a9586e410b9aa7b88c8c11afc57b06db --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a8b262108f1a169932a71fc4e18ee37300b293975b45a43ca23a982ce08b695 +size 35389440 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..d3714c7ec9210976cfc5dcd98de09bcea272467e --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baaf7e56f127a1690d635e35ab22ca71f998d5e8d5763181150533a54de3673d +size 70778880 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..6d357810c959154734e077ab20e0278cf4b20ad0 --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:896ff0e77ed1b919f84b2b6624d96dc98138f34e0541fe0f7f952e4085fe8d2b +size 32075776 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..d673772a506c6815ace75085910251d3cffe5631 --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e44e5360ce57b05aae664db1e3c72992a237a6a789562e10d9f8bfa6f77f3e7 +size 21159936 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..c26d21d11fa831de3146c22f8d85140ac47f5f40 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:411b63890178d332d8f3325aeba5bcaa879ba13aae041915835bfe3e1fd858dd +size 35389440 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e6e3f57de5a54bdf2808e8d65c1636797864a42 --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e65cf39d659ed62d449f4ccaf41580f4e495e2480100325e9567e07ae48b7b39 +size 70778880 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..75b66ff17f932e68c2ba2a84daeadf9ecf558cce --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:898cab75dc552cc9e53e3e61015c82c64b4b23b72d194854c4594e834220829d +size 18350080 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7d340036d220058efe67ad45b76bb938ca1dafc --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb0501b57264af87ead06a448fd5b9d7f24df377e947215183cdf66f3d2c69f0 +size 35389440 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..a6158ec2b3b6388578ba61d2bb07d9ecd0a06b2d --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d57911b6a598da832493da381cc3368f56f828317f5f0e53f60e939a391d24f3 +size 70778880 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..4d07d1a72275b18e5b7a0234992e2dc8b60772de --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fd9148cc0dc139b5a083355f3347b0af034bc5a27770fc7d4886dc2a772cad2 +size 32075776 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..b0f7df3ef9f1515586dd0ef367d7f0069df61fa4 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cf4db90ec255284c38b2fa9eecace50e5185e9a13836f235d495d2ec96831cf +size 389283840 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..c850412facd739915716c889bdcdf23122c4a17d --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:414dc0717b79e8e379ea091ef33b16601ddd0fa8bc6163121d9719169e840656 +size 21159936 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..b65c55648d7ebda1e0167eba41e411e2aec625dd --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c152ddf6b26307a4312f743ba5109b8f05f0b3fd43a46edb7cc757602e3a3286 +size 35389440 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..20eb5b1967590e900ed5fcf855fb201b9755b45a --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ac18b7e26d4ff4eec83a0ccdc5a55d8a4f8171ebb2cecffe52c0e62dd42c2ca +size 70778880 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea274bb8d4b52b80f4b374dba74e07d6b3bc5274 --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93b09226d17e85061a80f919e81f1d6b27fc37e00a62870e46c50ef8c41562de +size 18350080 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..52be2c31be160b145062910d9ab2dcb913dfd31d --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:663b220ac2b146d0d3c084aa26f267d97eb345f25cba94e61e27ee7c9a8078ea +size 35389440 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..8389419f6e5f9633bfbf481d38d7ea9f5d1fc053 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:471babf78083fc3f23e3b64adec105dbc488876eb3884b71354ebd1c0eac3710 +size 70778880 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddccb42ee38c103394dcf0661128988648a1892a --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:086be9e610dc9242db822eb9ba6f723b675f2d81a80a2fa7906e159bfd51b695 +size 32075776 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..a605efad5124e7a33d08b5841171aa1fed9e8566 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18c3bc7b08ae6366881c35df46a6f45778a5486362a7d89b2809ecd09305fcd0 +size 21159936 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..2bbdf101044f23c50a7deb1a55defdc951931727 --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6660f4c33358207525a3b74fd8d5359e7592613e4182541c51bacaef3e2c38cb +size 35389440 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..9d0a954d9b26d5b772da4d02beb5ef6386ce4e73 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efa22eb79ad10d1aa778a680c0dd7a9af626aebcba5899343abd64a7abfe7a85 +size 70778880 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..04c75e34f9c966655f99f7ca3a3742759dea1a24 --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9c013b917c746f191fa90a3ab3ae3496703df6ecbc6806d550a0f3acfabaccc +size 32956416 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..6ec17ab2ac27f7923051e1347b4f3ea0a518807d --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76f37e578464c571a47b381932ae3eec5031d404df05f68e36b38f29aa2e8ad0 +size 18350080 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..d3ab44b99bc786770bb9f00f61b740833ac65aab --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86e4e36601faac1270d8b5c965b64543ae25f2af5523f3a0b034d44ed40b4dbd +size 35389440 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..8941bd2737b36eaf2d19837eb0a042d24b24b607 --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51dc55cd01d1f8867e04488e03322a7306d88087f024011fb9a91f1f1d4d5af4 +size 70778880 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..9cb4518f2157d8aec435685aa869d181c0569f40 --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e991bb855643f4610b5159b8796fafb8c8b14a42732d34fff7077e903643a00c +size 32075776 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..e5dceee92b51d13b97cbe7cc4f9c3e09238b9523 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afbad5e38d3b81251eea95892414503370b3573d49a061ec6c7b29bbce8bf700 +size 21159936 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..fcfab7ba117c56ea8de6974a77291841d7da41bc --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e44b5807f45266f8250c106f88e3ab181a97c879b4d610f5ac1e0aa51cf2b99c +size 70778880 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..1e2a77b5093a7dfd84a174a2f0eb69dde9ea93d6 --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c87afe25d44b1e2f463d9ee3715d499bc06f397039b5934bb60099e3d0379ac6 +size 18350080 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..c766b6da7af16c5e406cf89841a8afea06cdd54d --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52e91bcc324c44d5941adddd8854d67bb281015516a5d02caa6ccfe38413e0f6 +size 35389440 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..fed7eaf5f50c515f8c06631a1dacef2de8038a01 --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7e5521685f4ea2036479595f8abca1045dd77f7687d5109330bff0d8a5d6a63 +size 35389440 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..95d6f66c2ef939ac6422734a868d0b57454bd01c --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c55728484f31b030668b6b1e9eac729368d330a6b772f423fe9ced327d01785a +size 70778880 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..eebff0cc2b1a8421b91ee72ed664cda2921b31eb --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b291a15d04ac9212fb32b2ec4e553df9673cba23eddc169b1312fde7d4cef38b +size 35389440 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9dfd1d25037fae0edd7ae03c6f880bbaf0980b9 --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f623c3bf720bd255a8e2b1a14068b9cb2946f8c194002d81bf1a270ebccd3921 +size 32075776 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..6ff5a32a10f5f41da9f4becc22ae6fb19703c421 --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b7976ce794daa4efeba77c9849ffa3ddd9fb43e3824111a19b62901bea3b82f +size 21159936 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b17b835ccaec80552c46ddf67bf6c714559e715 --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4419391a9b010c0c560c66303c66738791eeced4f2752093b1761e053bc012d0 +size 35389440 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..28833a7eb0d79fb4fb6de65faf4ea4eedbc981d1 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fccfffaf61494827fe67c0d705fa906e46b6c528b305dab260bd41ee04ad0ea9 +size 70778880 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..351181bc417c2d9567163ec7b6b25d45551b7eda --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59da913994701b969253e6d63440d6d3bf70cc4d1192874c8836d2be03780bac +size 18350080 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..71245d82ea0805dde90b38b89d7012d4ba0e0b23 --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b9d90b655660fb3c07e8efd89dcced8bb3749ad481e69d4eb7186adbc9f5df +size 35389440 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..a68a31624fb1a1aef8f3b36fd20c76f2699c97cb --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b4f1fc8d29708487b8114e76ad9e3c070adedae9e0eab9e0be2a7465ddbd28d +size 70778880 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..a5df1afd287afceab96deed79dc08c57802ee425 --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:095262e51eeaed82abd62764994a42eef99d367555e50379550b6ec1e415e1ac +size 32075776 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..2210b9c699710c8b696a26222d4ab9ddef53a805 --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e1f2a07128632e9f9218d9b9c0e9b209a0afde4e8a66c6bc60de916b7a4b05e +size 21159936 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fcf774bb4a30cb52f741d39c72068c600aeb122 --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98fa36fd7e5bf0094217dcb13dc75afd6bf97b3d89f514e64ce46171d4625b3b +size 35389440 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..087c2716ef9f856c06206e5047346cf2289fdc33 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdd5885e14390f6ce67fbca55ad668d4f791d881166c6abdd7d680d37b26b51e +size 70778880 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..fc3c7861744138058b79981f03da92514b8a4c35 --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33147e905fcd4360f22d0e4e6fafe5dfccc0372761efe546d42d7bf22a42b837 +size 70778880 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..c745357137a032f4e4052c508f5f0adfd6cd3513 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2f70e359c665efacb0d1c3f17a7d00eac66bc3b18762c1ea84fa5607abcef80 +size 18350080 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..a3dceb9758af20f4aa1fe3a67bc4ceee566720b9 --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c48435f4a90f38b50108e4a3337dbb71f2e4fd8b6ff89ca7da40350ec2f9b420 +size 35389440 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ec7f4e21a29bf751be6dc13b1afef6e95cf4a0d --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99bdb9c17ed854098ecf614618a7c7941ceb09b6802271ba3abf380ce708b023 +size 70778880 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..69a590f0f68aef6d8c3a5bf7311088ce6016439a --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a357c3d3b9798438f5d2870fb2a1b8295db48d16e5b2681dbab72b57abead22 +size 32075776 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..8c4cd1074daf0b8a1a5d8d2d50761633a9c8e325 --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66cd0e9f142c531befd9a4fd2b3a12723fc8676d82f3e74b09768acf9a7d7294 +size 21159936 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..62b5279cdf3122c844eabebc001087dd1597e81a --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98ea192a1e531509c51d2b7205bc096857bce8c42f88603c88e68f28066b4ade +size 35389440 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..514a991bf1d6f4c61eeb9605fc4f91ad9e659999 --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c99204ef300a72ba3f2fd5d55e276d0b8a5171a2f30d7b451699ce75cc9ca8a +size 70778880 diff --git a/params_shard_98.bin b/params_shard_98.bin new file mode 100644 index 0000000000000000000000000000000000000000..296b9b27136628af2bc9b253ef23e64d1fba3540 --- /dev/null +++ b/params_shard_98.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa1e0e3c3892c1bb3cfeda5ee6946ee988ac765d611dc4e1a498e4aa983de355 +size 18350080 diff --git a/params_shard_99.bin b/params_shard_99.bin new file mode 100644 index 0000000000000000000000000000000000000000..379e584577a1da1be708b2817397bfa74905fe2e --- /dev/null +++ b/params_shard_99.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ffbf8701803b3e088aa00892d6634ca065c897280ed9c4c978fbae402fee80 +size 35389440 diff --git a/private-llm-config.json b/private-llm-config.json new file mode 100644 index 0000000000000000000000000000000000000000..848e1cb1dae079a72e6b1445efbf95f6fdc282b3 --- /dev/null +++ b/private-llm-config.json @@ -0,0 +1,45 @@ +{ + "model_type": "qwen2", + "quantization": "GPTQ-Int4", + "model_config": { + "hidden_act": "silu", + "hidden_size": 5120, + "intermediate_size": 13824, + "num_attention_heads": 40, + "num_hidden_layers": 48, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000.0, + "vocab_size": 152064, + "tie_word_embeddings": false, + "context_window_size": 8192, + "prefill_chunk_size": 128, + "tensor_parallel_shards": 1, + "head_dim": 128, + "dtype": "float32", + "max_batch_size": 80 + }, + "vocab_size": 152064, + "context_window_size": 8192, + "sliding_window_size": -1, + "prefill_chunk_size": 128, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "mean_gen_len": 128, + "max_gen_len": 512, + "shift_fill_factor": 0.3, + "temperature": 0.6, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "repetition_penalty": 1.0, + "top_p": 0.95, + "conv_template": "deepseek-r1-qwen", + "pad_token_id": 151643, + "bos_token_id": 151646, + "eos_token_id": 151643, + "tokenizer_files": [ + "tokenizer.json", + "tokenizer_config.json" + ], + "version": "0.1.0" +} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..3240a4948682325cd96df18e9bf9eabbd3631a22 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,195 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151645": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151646": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|EOT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151648": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151649": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 16384, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizer", + "unk_token": null, + "use_default_system_prompt": false +}