Jackmin108 commited on
Commit
53d1022
·
1 Parent(s): 45a6ce4

.5B config

Browse files
Files changed (1) hide show
  1. config.json +11 -6
config.json CHANGED
@@ -2,16 +2,21 @@
2
  "architectures": [
3
  "GptOssForCausalLM"
4
  ],
 
 
 
 
 
5
  "attention_bias": true,
6
  "attention_dropout": 0.0,
7
  "eos_token_id": 200002,
8
  "experts_per_token": 4,
9
  "head_dim": 64,
10
  "hidden_act": "silu",
11
- "hidden_size": 2880,
12
  "initial_context_length": 4096,
13
  "initializer_range": 0.02,
14
- "intermediate_size": 2880,
15
  "layer_types": [
16
  "sliding_attention",
17
  "full_attention",
@@ -40,11 +45,11 @@
40
  ],
41
  "max_position_embeddings": 131072,
42
  "model_type": "gpt_oss",
43
- "num_attention_heads": 64,
44
  "num_experts_per_tok": 4,
45
- "num_hidden_layers": 24,
46
- "num_key_value_heads": 8,
47
- "num_local_experts": 32,
48
  "output_router_logits": false,
49
  "pad_token_id": 199999,
50
  "quantization_config": {
 
2
  "architectures": [
3
  "GptOssForCausalLM"
4
  ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_gpt_oss.GptOssConfig",
7
+ "AutoModelForCausalLM": "modeling_gpt_oss.GptOssForCausalLM",
8
+ "AutoModel": "modeling_gpt_oss.GptOssModel"
9
+ },
10
  "attention_bias": true,
11
  "attention_dropout": 0.0,
12
  "eos_token_id": 200002,
13
  "experts_per_token": 4,
14
  "head_dim": 64,
15
  "hidden_act": "silu",
16
+ "hidden_size": 1024,
17
  "initial_context_length": 4096,
18
  "initializer_range": 0.02,
19
+ "intermediate_size": 256,
20
  "layer_types": [
21
  "sliding_attention",
22
  "full_attention",
 
45
  ],
46
  "max_position_embeddings": 131072,
47
  "model_type": "gpt_oss",
48
+ "num_attention_heads": 12,
49
  "num_experts_per_tok": 4,
50
+ "num_hidden_layers": 12,
51
+ "num_key_value_heads": 4,
52
+ "num_local_experts": 8,
53
  "output_router_logits": false,
54
  "pad_token_id": 199999,
55
  "quantization_config": {