diff --git a/config.json b/config.json index e0cbeeb658acc9ec755e6a78f87f1687cc501659..31d5ef4131a0263c5617e4d2ab0f59cc7ac3ffeb 100644 --- a/config.json +++ b/config.json @@ -1,5 +1,5 @@ { - "_name_or_path": "llm-jp/llm-jp-3-8x13b", + "_name_or_path": "llm-jp/llm-jp-3-8x13b-instruct2", "architectures": [ "MixtralForCausalLM" ], @@ -27,7 +27,7 @@ "router_jitter_noise": 0.0, "sliding_window": null, "tie_word_embeddings": false, - "torch_dtype": "float32", + "torch_dtype": "bfloat16", "transformers_version": "4.47.0", "use_cache": true, "vocab_size": 99584 diff --git a/model-00001-of-00030.safetensors b/model-00001-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a88029803f6aad6017e739fd25450bafc897f297 --- /dev/null +++ b/model-00001-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d771ba3326f0117a0f702f35d63a39dd3f813be086097b0f5ad07041d03ce20 +size 4978303696 diff --git a/model-00001-of-00060.safetensors b/model-00001-of-00060.safetensors deleted file mode 100644 index 282fd8185b4d82d13534cd4ffca95b7500587535..0000000000000000000000000000000000000000 --- a/model-00001-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:20511f287f590778d005a4c042a4270fa019dba73cb415eb3fb9f80ce2b77a60 -size 4724000480 diff --git a/model-00002-of-00030.safetensors b/model-00002-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60e3ca1b04fe1eb1d8b758a0c9b8c408953d3026 --- /dev/null +++ b/model-00002-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc1d7db8cf21d03720ce2179fe307846bc644a657286b8a5f6dccfb470818afb +size 4881228840 diff --git a/model-00002-of-00060.safetensors b/model-00002-of-00060.safetensors deleted file mode 100644 index a3e4a34c05268f8a921119080e6f15ba5e2f30fa..0000000000000000000000000000000000000000 --- a/model-00002-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f4fbd825cc308aca8d64502a27e604c43ae04c3b086bc0738fe14427060be6f0 -size 4949486440 diff --git a/model-00003-of-00030.safetensors b/model-00003-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f2d8ecf9a392364acf3621185353719085315aa --- /dev/null +++ b/model-00003-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0efffa0874ddbe2b4ac381cccaa571814caf76266b627f485b5175a8d82f0de5 +size 4881228840 diff --git a/model-00003-of-00060.safetensors b/model-00003-of-00060.safetensors deleted file mode 100644 index 0658c771fc5ba2217df3c4136bb7348edacc1582..0000000000000000000000000000000000000000 --- a/model-00003-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6d5575d8e28bbc2fff5890272323f09bfed3b6391c6a9f9717ead3fbeefaad9a -size 4812966080 diff --git a/model-00004-of-00030.safetensors b/model-00004-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d90642499fca2a1e06db7350c4d80228291670a --- /dev/null +++ b/model-00004-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49f5ce7bdcfb9f44650604bf75f4b4783de28f20e99926e22cd0fe68d8f78ff +size 4949489368 diff --git a/model-00004-of-00060.safetensors b/model-00004-of-00060.safetensors deleted file mode 100644 index ecf3f10a5f7244a9293a4a6de4e734b834704ac5..0000000000000000000000000000000000000000 --- a/model-00004-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8ab411be714f98da9ed6404b91f28438ad2f685d55f1b07f11f9edd8a7108069 -size 4949486440 diff --git a/model-00005-of-00030.safetensors b/model-00005-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..760fe7d1ae34973b07f78bacf86cc9033960388b --- /dev/null +++ b/model-00005-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:663398a893950080c958cc4579b53309147e6e5f30aca51ee01cac1ba02f800e +size 4881228840 diff --git a/model-00005-of-00060.safetensors b/model-00005-of-00060.safetensors deleted file mode 100644 index 031991e700eb667cd30f0de119f4b91715d43d80..0000000000000000000000000000000000000000 --- a/model-00005-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cb6f806ea8dbf2eb7c45e8716d30c904060a7a02b4d01eeb193df3ca3e2e9300 -size 4949486440 diff --git a/model-00006-of-00030.safetensors b/model-00006-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..246e4c36fff0dcd2906c1fa8e43e9ac469246214 --- /dev/null +++ b/model-00006-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ece1546f84cb372599b9517d30c9d66424c7ab131e83777b6f254335bf34bb1c +size 4881228840 diff --git a/model-00006-of-00060.safetensors b/model-00006-of-00060.safetensors deleted file mode 100644 index 00a6c8b3700aa0aa607ac276c7a825f1d2ffe408..0000000000000000000000000000000000000000 --- a/model-00006-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c17c2ef16f33f58d29dab405d0d877fe227f97743d3fc473fa1508552fa73fde -size 4812966080 diff --git a/model-00007-of-00030.safetensors b/model-00007-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad4cc3a1d8e0b54667e20c5a3c7274c167db3ce3 --- /dev/null +++ b/model-00007-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdb2201653970d5af48123bd1ccab9578eb48c5f2c4986797af1d185b299d5de +size 4949489368 diff --git a/model-00007-of-00060.safetensors b/model-00007-of-00060.safetensors deleted file mode 100644 index ee3788e5ca398621bf8f736c34f6c4b27f75580b..0000000000000000000000000000000000000000 --- a/model-00007-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0d8474932b110934d7b8b20325d55268c64c49f2ccfebb5acceb2e385abc5304 -size 4949486440 diff --git a/model-00008-of-00030.safetensors b/model-00008-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3db3bac054b0c47a9907f7f909c8619a56dbaa1 --- /dev/null +++ b/model-00008-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9bdfce55d37be3952bb4e8714af8a13a9c5692632f0dd6d59ef673784de80b8 +size 4881228864 diff --git a/model-00008-of-00060.safetensors b/model-00008-of-00060.safetensors deleted file mode 100644 index e633e92140927a2f67bf3bc33061d15b539e115e..0000000000000000000000000000000000000000 --- a/model-00008-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:89288080ab2c4409d94005bb98556ecada4a22966b3c63788e0d2aa6add0ef56 -size 4949486440 diff --git a/model-00009-of-00030.safetensors b/model-00009-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ace2cf543a7995bb801dd36482d9a8849754bf21 --- /dev/null +++ b/model-00009-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0dfed66ff4834dbde00a9066878dbf756d79e8e80e5180464d5e2f3d1f77fd +size 4881228880 diff --git a/model-00009-of-00060.safetensors b/model-00009-of-00060.safetensors deleted file mode 100644 index 86fc260c3e798a316c640ca324d67c579bd90c7f..0000000000000000000000000000000000000000 --- a/model-00009-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:96a901119bd66d27f9dd61c7b02e3d74d236b4b0def2359200de001ef31638d3 -size 4812966080 diff --git a/model-00010-of-00030.safetensors b/model-00010-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10e5d2df2612e9bc0e1530a20e0a2cbfe15337a1 --- /dev/null +++ b/model-00010-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a96accd07eedcea1b0a8418e3ba20e53b99bd21cc8bde69e0391e5e0cdf8447 +size 4949489408 diff --git a/model-00010-of-00060.safetensors b/model-00010-of-00060.safetensors deleted file mode 100644 index 110a7d9c90cb17545ea5a3c881faeb7ca9cfb686..0000000000000000000000000000000000000000 --- a/model-00010-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:54e000336f67517786dce964cb748eeda0228207a9dc096b6678d7d4cd3a5b9c -size 4949486440 diff --git a/model-00011-of-00030.safetensors b/model-00011-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ce5f981b873fcd8b21e9b7323e93270b67f3bc0 --- /dev/null +++ b/model-00011-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6432022bb4866454ebbcf111a1c3c3ceb30655ca146781dc7e1ad408af5e73f6 +size 4881228880 diff --git a/model-00011-of-00060.safetensors b/model-00011-of-00060.safetensors deleted file mode 100644 index 108950c50b062c96adb9df7acf5560d8d70d4750..0000000000000000000000000000000000000000 --- a/model-00011-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5e32eb182d5a9541dca2d92da7ad66d53584e81a26f3c9d8fffea70746943116 -size 4949486440 diff --git a/model-00012-of-00030.safetensors b/model-00012-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0291e324a557b5ee85cbc651229cffb78c7d6ce --- /dev/null +++ b/model-00012-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:937fcf468f6e1ef82395ff51b45cf123ac8b6d7231a9522ac01058d1c0ba03d0 +size 4949489416 diff --git a/model-00012-of-00060.safetensors b/model-00012-of-00060.safetensors deleted file mode 100644 index db3a6cacce2da666862dd8815423247e9d8915d2..0000000000000000000000000000000000000000 --- a/model-00012-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1486d54d729534a03ecbe033ce7ccfb7c9cbe80f7116a3027c3ec5775559b5c3 -size 4812966080 diff --git a/model-00013-of-00030.safetensors b/model-00013-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3efa68f0cada8405c4e41d61549724c632f30c04 --- /dev/null +++ b/model-00013-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff6e1d4edd5f1d3cf953eb97bd23b222d7e911d8a1311cdf6b9753441b72602b +size 4881228880 diff --git a/model-00013-of-00060.safetensors b/model-00013-of-00060.safetensors deleted file mode 100644 index 7654ec8d2987e28717626012b0065a2141fc498f..0000000000000000000000000000000000000000 --- a/model-00013-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1787b4593ebcbd6b9e601b773701de1298ed4702648bc1f8c38717c7d494c5a0 -size 4949486440 diff --git a/model-00014-of-00030.safetensors b/model-00014-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71659f2073f0d0a7e3bc27aedea4da2f4b80c3de --- /dev/null +++ b/model-00014-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b7cd87139437991b2389c0ec0d9fed97bc3842eb3ffffe2609b3124e1fe37f +size 4881228880 diff --git a/model-00014-of-00060.safetensors b/model-00014-of-00060.safetensors deleted file mode 100644 index aa46ec460f472c75aac64da4380f3893f2613f55..0000000000000000000000000000000000000000 --- a/model-00014-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:37319046c406f9fc988ed3862ae23b2f0108525298ee9ef09dd81bb4161bc846 -size 4949486440 diff --git a/model-00015-of-00030.safetensors b/model-00015-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06f0757e34a404ee3501dd184bf01ea09d5e3b57 --- /dev/null +++ b/model-00015-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f60d7d42e9cd8d075b3e6767188c24c33d5bc14d2af830a45a3a5ec369da7371 +size 4949489408 diff --git a/model-00015-of-00060.safetensors b/model-00015-of-00060.safetensors deleted file mode 100644 index 370d963721498eb3d44a4be24e610aeff8ce5338..0000000000000000000000000000000000000000 --- a/model-00015-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:799ae8f62d59622d813f2c14d280982c22eb58b4ee7ac5fa761d5b20dd5b007d -size 4812966080 diff --git a/model-00016-of-00030.safetensors b/model-00016-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9ffb5d0a06fc207354ac81d9f6512da0aa85eee2 --- /dev/null +++ b/model-00016-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:445a859b84dcde9620791c45686f371aeabbf5702f7bf73fbf6b70de9151abd8 +size 4881228880 diff --git a/model-00016-of-00060.safetensors b/model-00016-of-00060.safetensors deleted file mode 100644 index 57db21874cd0729c2c11dead92e90ee778d851c0..0000000000000000000000000000000000000000 --- a/model-00016-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:606f3117da3ba0172d61da1120465705795ed6891c719e9bf0547870584a9884 -size 4949486456 diff --git a/model-00017-of-00030.safetensors b/model-00017-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ddfd4df14640ef9c4261fc6f773055dc38faa2ec --- /dev/null +++ b/model-00017-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25d183c811996a2ea2773490578624d6271f14b709ba49bccc737f44a6e3f4d3 +size 4881228880 diff --git a/model-00017-of-00060.safetensors b/model-00017-of-00060.safetensors deleted file mode 100644 index 4cad242366a9de21c17c179c22a2596bf3df9b1f..0000000000000000000000000000000000000000 --- a/model-00017-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a0f7eaaacb5a10ed507b29dd3f52879ef91ed8b81c6a5ae666ea07cd095d3f16 -size 4949486464 diff --git a/model-00018-of-00030.safetensors b/model-00018-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10c1a897ebf6bc8d3879b18f1b72047aa59ddb02 --- /dev/null +++ b/model-00018-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c0fab385ca3972c3ff842dd406d7beb7292fc089911bbd3070cab37ac64305b +size 4949489408 diff --git a/model-00018-of-00060.safetensors b/model-00018-of-00060.safetensors deleted file mode 100644 index aff194eb38451dc8ba73680079a3f76aa6645ff8..0000000000000000000000000000000000000000 --- a/model-00018-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4ed8b91d97b17d8a0977ddb52c3bd43188e7134d3767e7b71d196deeb29c6ce4 -size 4812966096 diff --git a/model-00019-of-00030.safetensors b/model-00019-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82fad2decb6bd56dc5107373b9a328990b2143c2 --- /dev/null +++ b/model-00019-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8ba7bb9f085ac0b8bb85b83f1f6f2e2773f3e42919b170f993b0f279b7bb193 +size 4881228880 diff --git a/model-00019-of-00060.safetensors b/model-00019-of-00060.safetensors deleted file mode 100644 index c5a3d2d6a0dec54b48b1135185f24d6ce9911504..0000000000000000000000000000000000000000 --- a/model-00019-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fafc6bccb4ffb0dd1245faf01406a178d9b10a5470947b9892648b5c60295c81 -size 4949486456 diff --git a/model-00020-of-00030.safetensors b/model-00020-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eeb3dc7b56bd81d74f1a083fbce4ad0d20f2e137 --- /dev/null +++ b/model-00020-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c7847bc2194709dede5763f16d487b06e06ca8f6e30c438f5fdb2cb123126a8 +size 4881228880 diff --git a/model-00020-of-00060.safetensors b/model-00020-of-00060.safetensors deleted file mode 100644 index 8a1ea36372dfbc327d225e13075c116fc120b7d9..0000000000000000000000000000000000000000 --- a/model-00020-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fa8e1b8aee54d75d59f480b35c7e9f5bcbec374b42df9f75f33202e2289e5b0e -size 4949486464 diff --git a/model-00021-of-00030.safetensors b/model-00021-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..efcae25b9c1c0beb9c7b1758b48310135bfde21c --- /dev/null +++ b/model-00021-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f3b5a9b8aef3f15a746bfc8d8f25cc08e408c98873ab528887690496adedaa +size 4949489408 diff --git a/model-00021-of-00060.safetensors b/model-00021-of-00060.safetensors deleted file mode 100644 index 0b6173fc74c83cc0e64ce213e8eef3fd68805bcc..0000000000000000000000000000000000000000 --- a/model-00021-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fabe2b32aff28f87614ad5c82cfe67d02cd4e81aca5c01dfd3da73612207cc75 -size 4812966096 diff --git a/model-00022-of-00030.safetensors b/model-00022-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d518874097e10957747b00426ef57fb0426b1627 --- /dev/null +++ b/model-00022-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce16b81a57f4ab97baaec5b350a03fd923f77fcde569fcee534d4a523daa0354 +size 4881228880 diff --git a/model-00022-of-00060.safetensors b/model-00022-of-00060.safetensors deleted file mode 100644 index ae0075456297d4bd4e543cb01a9232afed3a3e20..0000000000000000000000000000000000000000 --- a/model-00022-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:98c95ac6273d41bb92b5b9d17b09b2e438df51cf2dec828470645564638dd4ac -size 4949486456 diff --git a/model-00023-of-00030.safetensors b/model-00023-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9fb96da703f73295a1715a6a657b736faaade66c --- /dev/null +++ b/model-00023-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15ad462b1a7321616e4902ab4d6c486fec0dae943c6e201d77e52fcc079c0c6 +size 4986107432 diff --git a/model-00023-of-00060.safetensors b/model-00023-of-00060.safetensors deleted file mode 100644 index d7eb60a7c762895f34c9d9d99b035653701e14ab..0000000000000000000000000000000000000000 --- a/model-00023-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:80606c2af5eb44a09582da4e7d6d267b9442c1da43b3d778a052c0acb0cf0dad -size 4949486464 diff --git a/model-00024-of-00030.safetensors b/model-00024-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..637e4f606e836cfc05a1a44f9ec03ac0e36be106 --- /dev/null +++ b/model-00024-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45443c6d88fc477ce01d068d3db4a814ed1ae7650b3034b98c4834632fda0947 +size 4986168760 diff --git a/model-00024-of-00060.safetensors b/model-00024-of-00060.safetensors deleted file mode 100644 index 745bd8454a0a812ac1e6e53caa6eb0cb9fc08ff4..0000000000000000000000000000000000000000 --- a/model-00024-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2edac4cd5f27a753d66a21bd47db41dcb57d041290d5adf571feaa4f699c9f1d -size 4917865000 diff --git a/model-00025-of-00030.safetensors b/model-00025-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d16652c0fbba70cb36227ecaec824674a3a2fe2 --- /dev/null +++ b/model-00025-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc8a13024161d994c4f03b3816652b3a40fe4c328824722eb023ab054d32332b +size 4881228880 diff --git a/model-00025-of-00060.safetensors b/model-00025-of-00060.safetensors deleted file mode 100644 index b04599f5d1b04a7aa6aaab7bfc7c7b9295ddd58d..0000000000000000000000000000000000000000 --- a/model-00025-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:676c031a60bf447af1c02a12b6859e847a800e3167f6ddda8c7746b508c2965a -size 4844587552 diff --git a/model-00026-of-00030.safetensors b/model-00026-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..335110458ad17e0c27637fb0607dd91333fc1624 --- /dev/null +++ b/model-00026-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee17715a10fa0e64ab5a76782a4af664b0c7fc3566fd4b4ef3f26abccdb02ec +size 4949489408 diff --git a/model-00026-of-00060.safetensors b/model-00026-of-00060.safetensors deleted file mode 100644 index a81b218a32d7909efb53f65a75c8a892194e5f24..0000000000000000000000000000000000000000 --- a/model-00026-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:70a7df39feaee2957282ef5a81a630a5c5b67ed0e85a699279cd50f1a3ac5be6 -size 4949486464 diff --git a/model-00027-of-00030.safetensors b/model-00027-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8b1f7ea874f8814f26f40d63da90180a0b2114e --- /dev/null +++ b/model-00027-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36dfff60efed9c4c8e11d7be24ce244fd64e85a6652f35a9712a3659031cb00 +size 4881228880 diff --git a/model-00027-of-00060.safetensors b/model-00027-of-00060.safetensors deleted file mode 100644 index 8594a57b995a66d4af2faee993f592e9371592f2..0000000000000000000000000000000000000000 --- a/model-00027-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ad9c0aed9666017b07573a1eb858fcd4e7a3e8ca99123db698c5e31f4332bb6f -size 4949486464 diff --git a/model-00028-of-00030.safetensors b/model-00028-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eaf084e52a18866484c743be86ff02bb0845dc27 --- /dev/null +++ b/model-00028-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:208b844deb2db9fad0fbe94e5b2a55a5dd7903b8f53be131c75db09d771b7ac7 +size 4881228880 diff --git a/model-00028-of-00060.safetensors b/model-00028-of-00060.safetensors deleted file mode 100644 index 4204d880f812de5d67332488b1f23f48c3bcd3fd..0000000000000000000000000000000000000000 --- a/model-00028-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dcb6d3ed83cffe90c32765129ae1e76ad90ad58f2397651d76de24b9bad33ccb -size 4812966096 diff --git a/model-00029-of-00030.safetensors b/model-00029-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9c83d1fabe2f2e97f28d21f3b1721e0e5fb2b1c --- /dev/null +++ b/model-00029-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:870d3c2c7d3859d33c25094f0310f14a691c205a0b4feb107af0c4acb90693dd +size 4949489408 diff --git a/model-00029-of-00060.safetensors b/model-00029-of-00060.safetensors deleted file mode 100644 index 3f9c9aad515e542ce17eb0d6256dd4108ac5d66e..0000000000000000000000000000000000000000 --- a/model-00029-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0f728cb89cbff4987498d882204eed4dfaf3ad66ce3a87f86eff58802bbfe769 -size 4949486464 diff --git a/model-00030-of-00030.safetensors b/model-00030-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ed4bd831dd410cf05770057ce565ecfd1bdd5fb --- /dev/null +++ b/model-00030-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d73a35df68a08623e8ec617165029bf027b944eca38d492334ba1783a375e92 +size 3850929176 diff --git a/model-00030-of-00060.safetensors b/model-00030-of-00060.safetensors deleted file mode 100644 index e60017a24fbaa3ac48debf4da31f4df5d9384247..0000000000000000000000000000000000000000 --- a/model-00030-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b5114fa0c94f21452fe5a3d17299ccd3e45e0faf067e96eb9d2db9f6d1db9fa0 -size 4949486464 diff --git a/model-00031-of-00060.safetensors b/model-00031-of-00060.safetensors deleted file mode 100644 index 2cda61bb6224a21bff4d98fe3d6a53a86c8b7fca..0000000000000000000000000000000000000000 --- a/model-00031-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dfaf0c398083432fba10c6d20c3ed543a673dd48744a1bb9e7229dd03903bbcf -size 4812966096 diff --git a/model-00032-of-00060.safetensors b/model-00032-of-00060.safetensors deleted file mode 100644 index 62d03d302b1c144d11ccf41da7a71d5119af32fa..0000000000000000000000000000000000000000 --- a/model-00032-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:970ddea92ad66884ffc5d1d54a691aecdb6911ce609897956e4776a60b945289 -size 4949486464 diff --git a/model-00033-of-00060.safetensors b/model-00033-of-00060.safetensors deleted file mode 100644 index c662419af1d9c13f692599757873464f8d045e2c..0000000000000000000000000000000000000000 --- a/model-00033-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c3cba65405d2b1ace49f33a257fb086f6a0fbd51e6b42d4d32d6135c4d006e75 -size 4949486464 diff --git a/model-00034-of-00060.safetensors b/model-00034-of-00060.safetensors deleted file mode 100644 index 2d1caade9014cad25ec6ce405b99cc6739a70018..0000000000000000000000000000000000000000 --- a/model-00034-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:56d1bfdb91960e968255ee06f69acc1a29224239eb0b1f60522cf5246e2eb0a6 -size 4812966096 diff --git a/model-00035-of-00060.safetensors b/model-00035-of-00060.safetensors deleted file mode 100644 index 309b1ddc06f68baea807f03eacd20431eb4365e4..0000000000000000000000000000000000000000 --- a/model-00035-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7270deb859dc1683e5531d196bd5fad42928e8d308e814d0c7f80d696962fc46 -size 4949486464 diff --git a/model-00036-of-00060.safetensors b/model-00036-of-00060.safetensors deleted file mode 100644 index bfe7d5bbd8e55b4cd2e0defee247d8076fd6fdf0..0000000000000000000000000000000000000000 --- a/model-00036-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9b7a0f4a2b9e90ae29b4fb28a104915fc84bb5fa0753f7832086ae0fecd8375a -size 4949486464 diff --git a/model-00037-of-00060.safetensors b/model-00037-of-00060.safetensors deleted file mode 100644 index 288a204dceca277249dd337315088af80af49103..0000000000000000000000000000000000000000 --- a/model-00037-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:736e50150271042c1624e92113450666d7cd866fb8ad76585712269fb78f8483 -size 4812966096 diff --git a/model-00038-of-00060.safetensors b/model-00038-of-00060.safetensors deleted file mode 100644 index c6b94d8f2c5e41200f3b652677284af3726d9fee..0000000000000000000000000000000000000000 --- a/model-00038-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d489866b73f93e08fe9d7ba59efb736ee000d766d7603dc225a0c5ae1fee9202 -size 4949486464 diff --git a/model-00039-of-00060.safetensors b/model-00039-of-00060.safetensors deleted file mode 100644 index bac9dfeb33d483f09366ca6752312a7ab3209c6d..0000000000000000000000000000000000000000 --- a/model-00039-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d9ba2ee46237245ba88e674aadc55c583a71e449917e21d193b4ad87ddbb19a6 -size 4949486464 diff --git a/model-00040-of-00060.safetensors b/model-00040-of-00060.safetensors deleted file mode 100644 index b155a27b742ea9c88da927008a4bc3a65056df1d..0000000000000000000000000000000000000000 --- a/model-00040-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6b2803a7ea56be11b815fef8850497d1d918c09b6f34e3d7ef19350ce81629a3 -size 4812966096 diff --git a/model-00041-of-00060.safetensors b/model-00041-of-00060.safetensors deleted file mode 100644 index e8d35e0e0ae2ce56df5ef0bf64039de2bca8cc49..0000000000000000000000000000000000000000 --- a/model-00041-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:70a06745be7db1925ff81e0a465db5b1bb52e07edbdfe9dbaebddfd07e422586 -size 4949486456 diff --git a/model-00042-of-00060.safetensors b/model-00042-of-00060.safetensors deleted file mode 100644 index b993033ab460beb8226b7091d10e39aeea0fc38f..0000000000000000000000000000000000000000 --- a/model-00042-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:92bd6d3f01d4a49355f2b8fa037d55863ef90e724ddc264c2d00a9aa500a3c87 -size 4949486464 diff --git a/model-00043-of-00060.safetensors b/model-00043-of-00060.safetensors deleted file mode 100644 index e1f11560ce6fac2ba44d3b2fb49bc8d651a099c4..0000000000000000000000000000000000000000 --- a/model-00043-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:32886d29da1b0ee3d217f59d4a2c7742f842185125e53f59d102331d184361e9 -size 4812966096 diff --git a/model-00044-of-00060.safetensors b/model-00044-of-00060.safetensors deleted file mode 100644 index 14e9aabd6bc495d1c2f63fdfdba0ed26ed34587e..0000000000000000000000000000000000000000 --- a/model-00044-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1b27e3f019e6c985f7906a7989c8c1f5ac2a7e11eaafce5a019b3249d1a5a6e3 -size 4949486456 diff --git a/model-00045-of-00060.safetensors b/model-00045-of-00060.safetensors deleted file mode 100644 index 6803094fe3e3538b9583a5410ad66f0bff218661..0000000000000000000000000000000000000000 --- a/model-00045-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a0428e6062a877d0bbed915a243c4d7e1aa03a822dfcacaeab84f908e583639e -size 4949486464 diff --git a/model-00046-of-00060.safetensors b/model-00046-of-00060.safetensors deleted file mode 100644 index be9c515f0ad460264b4d9b69fa0b48e4e4e3a915..0000000000000000000000000000000000000000 --- a/model-00046-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1924f37f2e9f9aabe74959130c2c3b5bb2db90086ee9c6eae5a7cfd41d097378 -size 4812966096 diff --git a/model-00047-of-00060.safetensors b/model-00047-of-00060.safetensors deleted file mode 100644 index aa4f38e74ceeb6279e1dd266152186b7683a73bd..0000000000000000000000000000000000000000 --- a/model-00047-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:db2ba206ec03d50484f1f3ec54b1422d2ef4e9569dffe396c9c99ec4d4128de6 -size 4949486456 diff --git a/model-00048-of-00060.safetensors b/model-00048-of-00060.safetensors deleted file mode 100644 index 3fc5daf3708ed995e6e679a01963aad16b11a558..0000000000000000000000000000000000000000 --- a/model-00048-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:045037d945b94bb1f6cafa23faa5841d1bc44fce2c0284cb62724ea78a5a4142 -size 4949486464 diff --git a/model-00049-of-00060.safetensors b/model-00049-of-00060.safetensors deleted file mode 100644 index 223ab1f1cb0e8b51344aa8d193b9259e05c4a36e..0000000000000000000000000000000000000000 --- a/model-00049-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:061fa754d14cdeab5f629bdbb2ffa6307d29f4f394ea3d634fe958145eb91a3d -size 4917865000 diff --git a/model-00050-of-00060.safetensors b/model-00050-of-00060.safetensors deleted file mode 100644 index a599fb44ef3b07fea39a9559c974dc7a63293f38..0000000000000000000000000000000000000000 --- a/model-00050-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:09bd6da97f5ab2cb4cb775a85e62b679ebe6e4c305d8a11dd07f88575850bb8f -size 4844587552 diff --git a/model-00051-of-00060.safetensors b/model-00051-of-00060.safetensors deleted file mode 100644 index aa89541fa5384706ccd612d5cceeed4662002c7b..0000000000000000000000000000000000000000 --- a/model-00051-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ed996e5f06e9fa59ee64b388f13b20046a379c8a8a16f955d48890490e0d5eae -size 4949486464 diff --git a/model-00052-of-00060.safetensors b/model-00052-of-00060.safetensors deleted file mode 100644 index 092f8dfeafb37eb05185d05f555d74fe47e5f354..0000000000000000000000000000000000000000 --- a/model-00052-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0845e647bc742dd3fc88e5fffdd20cb0280d8af8606d14c22db104526927e66e -size 4949486464 diff --git a/model-00053-of-00060.safetensors b/model-00053-of-00060.safetensors deleted file mode 100644 index 03a6d68b079171eb04bee0d645db8d37fcf54651..0000000000000000000000000000000000000000 --- a/model-00053-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3fd971c3d897995cb07ab2646604a4f59da4f3bfc5a595e8f0ce4ab900cce16e -size 4812966096 diff --git a/model-00054-of-00060.safetensors b/model-00054-of-00060.safetensors deleted file mode 100644 index 6de9d81f2b6a48be2edc1e89e6492db8ad916fe2..0000000000000000000000000000000000000000 --- a/model-00054-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fff253532c0e2c79f8f13f66aa659313ed8c28efb062bd3e1caff4c87390c6fb -size 4949486464 diff --git a/model-00055-of-00060.safetensors b/model-00055-of-00060.safetensors deleted file mode 100644 index 7918c5cd991a54289101bdcefa2704c056a22d4f..0000000000000000000000000000000000000000 --- a/model-00055-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c52e3b0cd7fc7beb87643b120218b0995e0cb323a883ecfbbd0d310c4737514c -size 4949486464 diff --git a/model-00056-of-00060.safetensors b/model-00056-of-00060.safetensors deleted file mode 100644 index 63667083c4197ad0bcfb6bbd96dbb6911db3f99f..0000000000000000000000000000000000000000 --- a/model-00056-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:652f55b9e6e719b88c1716238b4f95fba4cd15d3a224ec59cb2aec696d7d60b7 -size 4812966096 diff --git a/model-00057-of-00060.safetensors b/model-00057-of-00060.safetensors deleted file mode 100644 index a0056ce592465bdd0dd7be1b6fe2dc90e098613e..0000000000000000000000000000000000000000 --- a/model-00057-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fe06b7eda15758ccfa0d70b347fef5ce5eb658051d1b2f01a14ab538e2565767 -size 4949486464 diff --git a/model-00058-of-00060.safetensors b/model-00058-of-00060.safetensors deleted file mode 100644 index 52d1fd441753739dc92650604ac8c669c5226d88..0000000000000000000000000000000000000000 --- a/model-00058-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a662a358e7ab33159fb041776715827eec5292e2a3d13d7f2eae477ae7d8270 -size 4949486464 diff --git a/model-00059-of-00060.safetensors b/model-00059-of-00060.safetensors deleted file mode 100644 index 063abb550b224624bb6b7081eee81c975a192500..0000000000000000000000000000000000000000 --- a/model-00059-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f517e9da0771163d796eebae68f3562ec3e19a95d00a66676ec2f47526e5e41 -size 4812966096 diff --git a/model-00060-of-00060.safetensors b/model-00060-of-00060.safetensors deleted file mode 100644 index b19715b3c144115dc35c0cbcc21a250da942bab6..0000000000000000000000000000000000000000 --- a/model-00060-of-00060.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e0ff821ce313fa4b4652155ac5ca9ea52588454dab55b3bc95f8591b609d730b -size 3455120464 diff --git a/model.safetensors.index.json b/model.safetensors.index.json index 3cac9a21d8893a7476aa4cc36d4d620e6aa9e3e5..047eb1fe0b6464e579d69e0432cb9f02eaf34c6d 100644 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -1,1250 +1,1250 @@ { "metadata": { - "total_size": 292655288320 + "total_size": 146327644160 }, "weight_map": { - "lm_head.weight": "model-00060-of-00060.safetensors", - "model.embed_tokens.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00060.safetensors", - "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00060.safetensors", - "model.layers.0.input_layernorm.weight": "model-00002-of-00060.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00060.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00060.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00060.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00060.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00060.safetensors", - "model.layers.1.block_sparse_moe.gate.weight": "model-00002-of-00060.safetensors", - "model.layers.1.input_layernorm.weight": "model-00004-of-00060.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00004-of-00060.safetensors", - "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00060.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00060.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00060.safetensors", - "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00060.safetensors", - "model.layers.10.block_sparse_moe.gate.weight": "model-00016-of-00060.safetensors", - "model.layers.10.input_layernorm.weight": "model-00017-of-00060.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00017-of-00060.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00016-of-00060.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00016-of-00060.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00016-of-00060.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00016-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00060.safetensors", - "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00060.safetensors", - "model.layers.11.block_sparse_moe.gate.weight": "model-00017-of-00060.safetensors", - "model.layers.11.input_layernorm.weight": "model-00019-of-00060.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00019-of-00060.safetensors", - "model.layers.11.self_attn.k_proj.weight": "model-00017-of-00060.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00017-of-00060.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00017-of-00060.safetensors", - "model.layers.11.self_attn.v_proj.weight": "model-00017-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00060.safetensors", - "model.layers.12.block_sparse_moe.gate.weight": "model-00019-of-00060.safetensors", - "model.layers.12.input_layernorm.weight": "model-00020-of-00060.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00020-of-00060.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00019-of-00060.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00019-of-00060.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00019-of-00060.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00019-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00060.safetensors", - "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00060.safetensors", - "model.layers.13.block_sparse_moe.gate.weight": "model-00020-of-00060.safetensors", - "model.layers.13.input_layernorm.weight": "model-00022-of-00060.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00022-of-00060.safetensors", - "model.layers.13.self_attn.k_proj.weight": "model-00020-of-00060.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00020-of-00060.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00020-of-00060.safetensors", - "model.layers.13.self_attn.v_proj.weight": "model-00020-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00060.safetensors", - "model.layers.14.block_sparse_moe.gate.weight": "model-00022-of-00060.safetensors", - "model.layers.14.input_layernorm.weight": "model-00023-of-00060.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00023-of-00060.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00022-of-00060.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00022-of-00060.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00022-of-00060.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00022-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00060.safetensors", - "model.layers.15.block_sparse_moe.gate.weight": "model-00023-of-00060.safetensors", - "model.layers.15.input_layernorm.weight": "model-00024-of-00060.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00024-of-00060.safetensors", - "model.layers.15.self_attn.k_proj.weight": "model-00023-of-00060.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00023-of-00060.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00023-of-00060.safetensors", - "model.layers.15.self_attn.v_proj.weight": "model-00023-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00060.safetensors", - "model.layers.16.block_sparse_moe.gate.weight": "model-00025-of-00060.safetensors", - "model.layers.16.input_layernorm.weight": "model-00026-of-00060.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00026-of-00060.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00025-of-00060.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00025-of-00060.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00024-of-00060.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00025-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00060.safetensors", - "model.layers.17.block_sparse_moe.gate.weight": "model-00026-of-00060.safetensors", - "model.layers.17.input_layernorm.weight": "model-00027-of-00060.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00027-of-00060.safetensors", - "model.layers.17.self_attn.k_proj.weight": "model-00026-of-00060.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00026-of-00060.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00026-of-00060.safetensors", - "model.layers.17.self_attn.v_proj.weight": "model-00026-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00060.safetensors", - "model.layers.18.block_sparse_moe.gate.weight": "model-00027-of-00060.safetensors", - "model.layers.18.input_layernorm.weight": "model-00029-of-00060.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00029-of-00060.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00027-of-00060.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00027-of-00060.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00027-of-00060.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00027-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00060.safetensors", - "model.layers.19.block_sparse_moe.gate.weight": "model-00029-of-00060.safetensors", - "model.layers.19.input_layernorm.weight": "model-00030-of-00060.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00030-of-00060.safetensors", - "model.layers.19.self_attn.k_proj.weight": "model-00029-of-00060.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00029-of-00060.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00029-of-00060.safetensors", - "model.layers.19.self_attn.v_proj.weight": "model-00029-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00060.safetensors", - "model.layers.2.block_sparse_moe.gate.weight": "model-00004-of-00060.safetensors", - "model.layers.2.input_layernorm.weight": "model-00005-of-00060.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00005-of-00060.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00004-of-00060.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00004-of-00060.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00004-of-00060.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00004-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00032-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00032-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00032-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00032-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00032-of-00060.safetensors", - "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00032-of-00060.safetensors", - "model.layers.20.block_sparse_moe.gate.weight": "model-00030-of-00060.safetensors", - "model.layers.20.input_layernorm.weight": "model-00032-of-00060.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00032-of-00060.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00030-of-00060.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00030-of-00060.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00030-of-00060.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00030-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00032-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00033-of-00060.safetensors", - "model.layers.21.block_sparse_moe.gate.weight": "model-00032-of-00060.safetensors", - "model.layers.21.input_layernorm.weight": "model-00033-of-00060.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00033-of-00060.safetensors", - "model.layers.21.self_attn.k_proj.weight": "model-00032-of-00060.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00032-of-00060.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00032-of-00060.safetensors", - "model.layers.21.self_attn.v_proj.weight": "model-00032-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00035-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00035-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00035-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00035-of-00060.safetensors", - "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00035-of-00060.safetensors", - "model.layers.22.block_sparse_moe.gate.weight": "model-00033-of-00060.safetensors", - "model.layers.22.input_layernorm.weight": "model-00035-of-00060.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00035-of-00060.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00033-of-00060.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00033-of-00060.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00033-of-00060.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00033-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00060.safetensors", - "model.layers.23.block_sparse_moe.gate.weight": "model-00035-of-00060.safetensors", - "model.layers.23.input_layernorm.weight": "model-00036-of-00060.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00036-of-00060.safetensors", - "model.layers.23.self_attn.k_proj.weight": "model-00035-of-00060.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00035-of-00060.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00035-of-00060.safetensors", - "model.layers.23.self_attn.v_proj.weight": "model-00035-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00038-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00038-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00038-of-00060.safetensors", - "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00038-of-00060.safetensors", - "model.layers.24.block_sparse_moe.gate.weight": "model-00036-of-00060.safetensors", - "model.layers.24.input_layernorm.weight": "model-00038-of-00060.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00038-of-00060.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00036-of-00060.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00036-of-00060.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00036-of-00060.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00036-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00060.safetensors", - "model.layers.25.block_sparse_moe.gate.weight": "model-00038-of-00060.safetensors", - "model.layers.25.input_layernorm.weight": "model-00039-of-00060.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00039-of-00060.safetensors", - "model.layers.25.self_attn.k_proj.weight": "model-00038-of-00060.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00038-of-00060.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00038-of-00060.safetensors", - "model.layers.25.self_attn.v_proj.weight": "model-00038-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00041-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00041-of-00060.safetensors", - "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00041-of-00060.safetensors", - "model.layers.26.block_sparse_moe.gate.weight": "model-00039-of-00060.safetensors", - "model.layers.26.input_layernorm.weight": "model-00041-of-00060.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00041-of-00060.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00039-of-00060.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00039-of-00060.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00039-of-00060.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00039-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00060.safetensors", - "model.layers.27.block_sparse_moe.gate.weight": "model-00041-of-00060.safetensors", - "model.layers.27.input_layernorm.weight": "model-00042-of-00060.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00042-of-00060.safetensors", - "model.layers.27.self_attn.k_proj.weight": "model-00041-of-00060.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00041-of-00060.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00041-of-00060.safetensors", - "model.layers.27.self_attn.v_proj.weight": "model-00041-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00044-of-00060.safetensors", - "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00044-of-00060.safetensors", - "model.layers.28.block_sparse_moe.gate.weight": "model-00042-of-00060.safetensors", - "model.layers.28.input_layernorm.weight": "model-00044-of-00060.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00044-of-00060.safetensors", - "model.layers.28.self_attn.k_proj.weight": "model-00042-of-00060.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00042-of-00060.safetensors", - "model.layers.28.self_attn.q_proj.weight": "model-00042-of-00060.safetensors", - "model.layers.28.self_attn.v_proj.weight": "model-00042-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00060.safetensors", - "model.layers.29.block_sparse_moe.gate.weight": "model-00044-of-00060.safetensors", - "model.layers.29.input_layernorm.weight": "model-00045-of-00060.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00045-of-00060.safetensors", - "model.layers.29.self_attn.k_proj.weight": "model-00044-of-00060.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00044-of-00060.safetensors", - "model.layers.29.self_attn.q_proj.weight": "model-00044-of-00060.safetensors", - "model.layers.29.self_attn.v_proj.weight": "model-00044-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00060.safetensors", - "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00060.safetensors", - "model.layers.3.block_sparse_moe.gate.weight": "model-00005-of-00060.safetensors", - "model.layers.3.input_layernorm.weight": "model-00007-of-00060.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00007-of-00060.safetensors", - "model.layers.3.self_attn.k_proj.weight": "model-00005-of-00060.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00005-of-00060.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00005-of-00060.safetensors", - "model.layers.3.self_attn.v_proj.weight": "model-00005-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00060.safetensors", - "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00047-of-00060.safetensors", - "model.layers.30.block_sparse_moe.gate.weight": "model-00045-of-00060.safetensors", - "model.layers.30.input_layernorm.weight": "model-00047-of-00060.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00047-of-00060.safetensors", - "model.layers.30.self_attn.k_proj.weight": "model-00045-of-00060.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00045-of-00060.safetensors", - "model.layers.30.self_attn.q_proj.weight": "model-00045-of-00060.safetensors", - "model.layers.30.self_attn.v_proj.weight": "model-00045-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00047-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00048-of-00060.safetensors", - "model.layers.31.block_sparse_moe.gate.weight": "model-00047-of-00060.safetensors", - "model.layers.31.input_layernorm.weight": "model-00048-of-00060.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00048-of-00060.safetensors", - "model.layers.31.self_attn.k_proj.weight": "model-00047-of-00060.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00047-of-00060.safetensors", - "model.layers.31.self_attn.q_proj.weight": "model-00047-of-00060.safetensors", - "model.layers.31.self_attn.v_proj.weight": "model-00047-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00048-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00060.safetensors", - "model.layers.32.block_sparse_moe.gate.weight": "model-00048-of-00060.safetensors", - "model.layers.32.input_layernorm.weight": "model-00049-of-00060.safetensors", - "model.layers.32.post_attention_layernorm.weight": "model-00049-of-00060.safetensors", - "model.layers.32.self_attn.k_proj.weight": "model-00048-of-00060.safetensors", - "model.layers.32.self_attn.o_proj.weight": "model-00048-of-00060.safetensors", - "model.layers.32.self_attn.q_proj.weight": "model-00048-of-00060.safetensors", - "model.layers.32.self_attn.v_proj.weight": "model-00048-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00050-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00051-of-00060.safetensors", - "model.layers.33.block_sparse_moe.gate.weight": "model-00050-of-00060.safetensors", - "model.layers.33.input_layernorm.weight": "model-00051-of-00060.safetensors", - "model.layers.33.post_attention_layernorm.weight": "model-00051-of-00060.safetensors", - "model.layers.33.self_attn.k_proj.weight": "model-00050-of-00060.safetensors", - "model.layers.33.self_attn.o_proj.weight": "model-00050-of-00060.safetensors", - "model.layers.33.self_attn.q_proj.weight": "model-00049-of-00060.safetensors", - "model.layers.33.self_attn.v_proj.weight": "model-00050-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00051-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00052-of-00060.safetensors", - "model.layers.34.block_sparse_moe.gate.weight": "model-00051-of-00060.safetensors", - "model.layers.34.input_layernorm.weight": "model-00052-of-00060.safetensors", - "model.layers.34.post_attention_layernorm.weight": "model-00052-of-00060.safetensors", - "model.layers.34.self_attn.k_proj.weight": "model-00051-of-00060.safetensors", - "model.layers.34.self_attn.o_proj.weight": "model-00051-of-00060.safetensors", - "model.layers.34.self_attn.q_proj.weight": "model-00051-of-00060.safetensors", - "model.layers.34.self_attn.v_proj.weight": "model-00051-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.4.w1.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.4.w2.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.4.w3.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.5.w1.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.5.w2.weight": "model-00053-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.5.w3.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.6.w1.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.6.w2.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.6.w3.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.7.w1.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.7.w2.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.experts.7.w3.weight": "model-00054-of-00060.safetensors", - "model.layers.35.block_sparse_moe.gate.weight": "model-00052-of-00060.safetensors", - "model.layers.35.input_layernorm.weight": "model-00054-of-00060.safetensors", - "model.layers.35.post_attention_layernorm.weight": "model-00054-of-00060.safetensors", - "model.layers.35.self_attn.k_proj.weight": "model-00052-of-00060.safetensors", - "model.layers.35.self_attn.o_proj.weight": "model-00052-of-00060.safetensors", - "model.layers.35.self_attn.q_proj.weight": "model-00052-of-00060.safetensors", - "model.layers.35.self_attn.v_proj.weight": "model-00052-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00054-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.4.w1.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.4.w2.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.4.w3.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.5.w1.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.5.w2.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.5.w3.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.6.w1.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.6.w2.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.6.w3.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.7.w1.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.7.w2.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.experts.7.w3.weight": "model-00055-of-00060.safetensors", - "model.layers.36.block_sparse_moe.gate.weight": "model-00054-of-00060.safetensors", - "model.layers.36.input_layernorm.weight": "model-00055-of-00060.safetensors", - "model.layers.36.post_attention_layernorm.weight": "model-00055-of-00060.safetensors", - "model.layers.36.self_attn.k_proj.weight": "model-00054-of-00060.safetensors", - "model.layers.36.self_attn.o_proj.weight": "model-00054-of-00060.safetensors", - "model.layers.36.self_attn.q_proj.weight": "model-00054-of-00060.safetensors", - "model.layers.36.self_attn.v_proj.weight": "model-00054-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00055-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.4.w1.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.4.w2.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.4.w3.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.5.w1.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.5.w2.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.5.w3.weight": "model-00056-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.6.w1.weight": "model-00057-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.6.w2.weight": "model-00057-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.6.w3.weight": "model-00057-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.7.w1.weight": "model-00057-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.7.w2.weight": "model-00057-of-00060.safetensors", - "model.layers.37.block_sparse_moe.experts.7.w3.weight": "model-00057-of-00060.safetensors", - "model.layers.37.block_sparse_moe.gate.weight": "model-00055-of-00060.safetensors", - "model.layers.37.input_layernorm.weight": "model-00057-of-00060.safetensors", - "model.layers.37.post_attention_layernorm.weight": "model-00057-of-00060.safetensors", - "model.layers.37.self_attn.k_proj.weight": "model-00055-of-00060.safetensors", - "model.layers.37.self_attn.o_proj.weight": "model-00055-of-00060.safetensors", - "model.layers.37.self_attn.q_proj.weight": "model-00055-of-00060.safetensors", - "model.layers.37.self_attn.v_proj.weight": "model-00055-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00057-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.4.w1.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.4.w2.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.4.w3.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.5.w1.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.5.w2.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.5.w3.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.6.w1.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.6.w2.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.6.w3.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.7.w1.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.7.w2.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.experts.7.w3.weight": "model-00058-of-00060.safetensors", - "model.layers.38.block_sparse_moe.gate.weight": "model-00057-of-00060.safetensors", - "model.layers.38.input_layernorm.weight": "model-00058-of-00060.safetensors", - "model.layers.38.post_attention_layernorm.weight": "model-00058-of-00060.safetensors", - "model.layers.38.self_attn.k_proj.weight": "model-00057-of-00060.safetensors", - "model.layers.38.self_attn.o_proj.weight": "model-00057-of-00060.safetensors", - "model.layers.38.self_attn.q_proj.weight": "model-00057-of-00060.safetensors", - "model.layers.38.self_attn.v_proj.weight": "model-00057-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00058-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00058-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.4.w1.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.4.w2.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.4.w3.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.5.w1.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.5.w2.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.5.w3.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.6.w1.weight": "model-00059-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.6.w2.weight": "model-00060-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.6.w3.weight": "model-00060-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.7.w1.weight": "model-00060-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.7.w2.weight": "model-00060-of-00060.safetensors", - "model.layers.39.block_sparse_moe.experts.7.w3.weight": "model-00060-of-00060.safetensors", - "model.layers.39.block_sparse_moe.gate.weight": "model-00058-of-00060.safetensors", - "model.layers.39.input_layernorm.weight": "model-00060-of-00060.safetensors", - "model.layers.39.post_attention_layernorm.weight": "model-00060-of-00060.safetensors", - "model.layers.39.self_attn.k_proj.weight": "model-00058-of-00060.safetensors", - "model.layers.39.self_attn.o_proj.weight": "model-00058-of-00060.safetensors", - "model.layers.39.self_attn.q_proj.weight": "model-00058-of-00060.safetensors", - "model.layers.39.self_attn.v_proj.weight": "model-00058-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00060.safetensors", - "model.layers.4.block_sparse_moe.gate.weight": "model-00007-of-00060.safetensors", - "model.layers.4.input_layernorm.weight": "model-00008-of-00060.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00008-of-00060.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00007-of-00060.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00007-of-00060.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00007-of-00060.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00007-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00060.safetensors", - "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00060.safetensors", - "model.layers.5.block_sparse_moe.gate.weight": "model-00008-of-00060.safetensors", - "model.layers.5.input_layernorm.weight": "model-00010-of-00060.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00010-of-00060.safetensors", - "model.layers.5.self_attn.k_proj.weight": "model-00008-of-00060.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00008-of-00060.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00008-of-00060.safetensors", - "model.layers.5.self_attn.v_proj.weight": "model-00008-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00060.safetensors", - "model.layers.6.block_sparse_moe.gate.weight": "model-00010-of-00060.safetensors", - "model.layers.6.input_layernorm.weight": "model-00011-of-00060.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00011-of-00060.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00010-of-00060.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00010-of-00060.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00010-of-00060.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00010-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00060.safetensors", - "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00060.safetensors", - "model.layers.7.block_sparse_moe.gate.weight": "model-00011-of-00060.safetensors", - "model.layers.7.input_layernorm.weight": "model-00013-of-00060.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00013-of-00060.safetensors", - "model.layers.7.self_attn.k_proj.weight": "model-00011-of-00060.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00011-of-00060.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00011-of-00060.safetensors", - "model.layers.7.self_attn.v_proj.weight": "model-00011-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00060.safetensors", - "model.layers.8.block_sparse_moe.gate.weight": "model-00013-of-00060.safetensors", - "model.layers.8.input_layernorm.weight": "model-00014-of-00060.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00014-of-00060.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00013-of-00060.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00013-of-00060.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00013-of-00060.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00013-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00016-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00016-of-00060.safetensors", - "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00016-of-00060.safetensors", - "model.layers.9.block_sparse_moe.gate.weight": "model-00014-of-00060.safetensors", - "model.layers.9.input_layernorm.weight": "model-00016-of-00060.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00016-of-00060.safetensors", - "model.layers.9.self_attn.k_proj.weight": "model-00014-of-00060.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00014-of-00060.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00014-of-00060.safetensors", - "model.layers.9.self_attn.v_proj.weight": "model-00014-of-00060.safetensors", - "model.norm.weight": "model-00060-of-00060.safetensors" + "lm_head.weight": "model-00030-of-00030.safetensors", + "model.embed_tokens.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00030.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00030.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00030.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00001-of-00030.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00008-of-00030.safetensors", + "model.layers.10.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00030.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00009-of-00030.safetensors", + "model.layers.11.input_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00010-of-00030.safetensors", + "model.layers.12.input_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00010-of-00030.safetensors", + "model.layers.13.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00011-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00011-of-00030.safetensors", + "model.layers.14.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00030.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00012-of-00030.safetensors", + "model.layers.15.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00012-of-00030.safetensors", + "model.layers.16.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00013-of-00030.safetensors", + "model.layers.17.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00014-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00014-of-00030.safetensors", + "model.layers.18.input_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00030.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00015-of-00030.safetensors", + "model.layers.19.input_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00002-of-00030.safetensors", + "model.layers.2.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00015-of-00030.safetensors", + "model.layers.20.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00016-of-00030.safetensors", + "model.layers.21.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00017-of-00030.safetensors", + "model.layers.22.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00018-of-00030.safetensors", + "model.layers.23.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00018-of-00030.safetensors", + "model.layers.24.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00019-of-00030.safetensors", + "model.layers.25.input_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00020-of-00030.safetensors", + "model.layers.26.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00021-of-00030.safetensors", + "model.layers.27.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00021-of-00030.safetensors", + "model.layers.28.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00022-of-00030.safetensors", + "model.layers.29.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00003-of-00030.safetensors", + "model.layers.3.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00030.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00023-of-00030.safetensors", + "model.layers.30.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00024-of-00030.safetensors", + "model.layers.31.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.32.block_sparse_moe.gate.weight": "model-00024-of-00030.safetensors", + "model.layers.32.input_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.33.block_sparse_moe.gate.weight": "model-00025-of-00030.safetensors", + "model.layers.33.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00030.safetensors", + "model.layers.34.block_sparse_moe.gate.weight": "model-00026-of-00030.safetensors", + "model.layers.34.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.35.block_sparse_moe.gate.weight": "model-00026-of-00030.safetensors", + "model.layers.35.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.36.block_sparse_moe.gate.weight": "model-00027-of-00030.safetensors", + "model.layers.36.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.37.block_sparse_moe.gate.weight": "model-00028-of-00030.safetensors", + "model.layers.37.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.38.block_sparse_moe.gate.weight": "model-00029-of-00030.safetensors", + "model.layers.38.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w1.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w1.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00030.safetensors", + "model.layers.39.block_sparse_moe.gate.weight": "model-00029-of-00030.safetensors", + "model.layers.39.input_layernorm.weight": "model-00030-of-00030.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00030-of-00030.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00004-of-00030.safetensors", + "model.layers.4.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00004-of-00030.safetensors", + "model.layers.5.input_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00005-of-00030.safetensors", + "model.layers.6.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00006-of-00030.safetensors", + "model.layers.7.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00007-of-00030.safetensors", + "model.layers.8.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00030.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00007-of-00030.safetensors", + "model.layers.9.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.norm.weight": "model-00030-of-00030.safetensors" } }