albertvillanova HF Staff commited on
Commit
c956d7b
·
verified ·
1 Parent(s): 54e00cd

Upload DbrxForCausalLM

Browse files
Files changed (3) hide show
  1. config.json +14 -15
  2. generation_config.json +1 -1
  3. model.safetensors +2 -2
config.json CHANGED
@@ -4,38 +4,37 @@
4
  ],
5
  "attn_config": {
6
  "attn_pdrop": 0.0,
7
- "clip_qkv": 8,
8
- "kv_n_heads": 8,
9
  "model_type": "",
10
- "rope_theta": 500000
11
  },
12
- "d_model": 32,
13
  "dtype": "float32",
14
  "emb_pdrop": 0.0,
15
  "ffn_config": {
16
  "ffn_act_fn": {
17
  "name": "silu"
18
  },
19
- "ffn_hidden_size": 10752,
20
- "hidden_size": 32,
21
  "model_type": "",
22
- "moe_jitter_eps": 0,
23
- "moe_loss_weight": 0.05,
24
  "moe_normalize_expert_weights": 1.0,
25
- "moe_num_experts": 16,
26
- "moe_top_k": 4
27
  },
28
  "initializer_range": 0.02,
29
- "max_seq_len": 32768,
 
30
  "model_type": "dbrx",
31
  "n_heads": 4,
32
  "n_layers": 2,
33
- "num_key_value_heads": 8,
34
  "output_router_logits": false,
35
  "resid_pdrop": 0.0,
36
- "rope_theta": 10000.0,
37
  "tie_word_embeddings": false,
38
- "transformers_version": "4.57.0.dev0",
39
  "use_cache": true,
40
- "vocab_size": 100352
41
  }
 
4
  ],
5
  "attn_config": {
6
  "attn_pdrop": 0.0,
7
+ "clip_qkv": null,
8
+ "kv_n_heads": 1,
9
  "model_type": "",
10
+ "rope_theta": 10000.0
11
  },
12
+ "d_model": 8,
13
  "dtype": "float32",
14
  "emb_pdrop": 0.0,
15
  "ffn_config": {
16
  "ffn_act_fn": {
17
  "name": "silu"
18
  },
19
+ "ffn_hidden_size": 3584,
 
20
  "model_type": "",
21
+ "moe_jitter_eps": null,
22
+ "moe_loss_weight": 0.01,
23
  "moe_normalize_expert_weights": 1.0,
24
+ "moe_num_experts": 4,
25
+ "moe_top_k": 1
26
  },
27
  "initializer_range": 0.02,
28
+ "intermediate_size": 32,
29
+ "max_seq_len": 2048,
30
  "model_type": "dbrx",
31
  "n_heads": 4,
32
  "n_layers": 2,
33
+ "num_key_value_heads": 2,
34
  "output_router_logits": false,
35
  "resid_pdrop": 0.0,
 
36
  "tie_word_embeddings": false,
37
+ "transformers_version": "4.57.0",
38
  "use_cache": true,
39
+ "vocab_size": 100280
40
  }
generation_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
  "_from_model_config": true,
3
- "transformers_version": "4.57.0.dev0"
4
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "transformers_version": "4.57.0"
4
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca68fe84a06c2af3f0b51124a49048055af1b5d503cc249915f8f29eca48d1f6
3
- size 159238944
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d9db7399eeb0f7ebfbf1b716f456a57dfbd914e366c15dc8d230f28e0c636d5
3
+ size 9174240