Holy-fox commited on
Commit
ae1afb6
·
verified ·
1 Parent(s): c0ef1c0

Upload 14 files

Browse files
config.json CHANGED
@@ -19,7 +19,7 @@
19
  "num_experts_per_tok": 2,
20
  "num_hidden_layers": 32,
21
  "num_key_value_heads": 8,
22
- "num_local_experts": 3,
23
  "output_router_logits": false,
24
  "pretraining_tp": 1,
25
  "rms_norm_eps": 1e-05,
 
19
  "num_experts_per_tok": 2,
20
  "num_hidden_layers": 32,
21
  "num_key_value_heads": 8,
22
+ "num_local_experts": 7,
23
  "output_router_logits": false,
24
  "pretraining_tp": 1,
25
  "rms_norm_eps": 1e-05,
mergekit_moe_config.yml CHANGED
@@ -2,6 +2,10 @@ base_model: sbintuitions/sarashina2.2-3b-instruct-v0.1
2
  gate_mode: random # 必要に応じて "random" や "cheap_embed" に変更可能
3
  dtype: bfloat16 # 出力のデータ型。元モデルに合わせて調整してください
4
  experts:
 
 
 
 
5
  - source_model: sbintuitions/sarashina2.2-3b-instruct-v0.1
6
  - source_model: sbintuitions/sarashina2.2-3b-instruct-v0.1
7
  - source_model: sbintuitions/sarashina2.2-3b-instruct-v0.1
 
2
  gate_mode: random # 必要に応じて "random" や "cheap_embed" に変更可能
3
  dtype: bfloat16 # 出力のデータ型。元モデルに合わせて調整してください
4
  experts:
5
+ - source_model: sbintuitions/sarashina2.2-3b-instruct-v0.1
6
+ - source_model: sbintuitions/sarashina2.2-3b-instruct-v0.1
7
+ - source_model: sbintuitions/sarashina2.2-3b-instruct-v0.1
8
+ - source_model: sbintuitions/sarashina2.2-3b-instruct-v0.1
9
  - source_model: sbintuitions/sarashina2.2-3b-instruct-v0.1
10
  - source_model: sbintuitions/sarashina2.2-3b-instruct-v0.1
11
  - source_model: sbintuitions/sarashina2.2-3b-instruct-v0.1
model-00001-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31d7875e59a1512df7c0821aeb56944a1a8fff894a724f1837456c83295f7a1f
3
+ size 4987356504
model-00002-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bba2f26ae5f4eb9c855affe2ecd5aba1753b83217be6549da00f8227b2da998
3
+ size 4967697000
model-00003-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b88b90d02585f8f9c762e09d77a8dacf08a75b08d1f476c2d4fafb430bc615e
3
+ size 4967697112
model-00004-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cce027c560ad648442f24378431a5529bfc1adbef27693983ce468e433821c73
3
+ size 4967697136
model-00005-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa4f4583d33836f0535d3694e71f57a37f0094a7790ee874ef10bf81bfb3cb0d
3
+ size 4967697136
model-00006-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9a14ef0e8da641ff0a05e3eba77dff5c89479eb89179ad01d8b3417ab00b164
3
+ size 4967697136
model-00007-of-00007.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3512963dd1642af02bdc517e7b02acf2acda978f6000ca0de42241c56fd4c915
3
+ size 3310753600
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff