GritLs commited on
Commit
0187fe5
·
1 Parent(s): 73174e4

delete config

Browse files
Files changed (1) hide show
  1. config.json +1 -11
config.json CHANGED
@@ -8,15 +8,12 @@
8
  "d_ff": 1024,
9
  "d_kv": 64,
10
  "d_model": 256,
11
- "data_driven": true,
12
  "decoder_start_token_id": 0,
13
  "dense_act_fn": "relu",
14
- "diff_decoder_token_id": false,
15
  "dropout_rate": 0.1,
16
  "dtype": "float32",
17
  "eos_token_id": 1,
18
  "feed_forward_proj": "relu",
19
- "finetune": false,
20
  "initializer_factor": 0.05,
21
  "input_patch_size": 128,
22
  "input_patch_stride": 128,
@@ -31,7 +28,6 @@
31
  "min_period": "original_rope_init",
32
  "model_type": "kairos",
33
  "moe_inter_dim": 1408,
34
- "multi_pred_head": false,
35
  "n_activated_experts": 3,
36
  "n_expert_groups": 1,
37
  "n_limited_groups": 1,
@@ -60,7 +56,6 @@
60
  "relative_attention_max_distance": 128,
61
  "relative_attention_num_buckets": 32,
62
  "rope_init": "exp",
63
- "router_gumbel_softmax": false,
64
  "scale_method": "log",
65
  "score_func": "softmax",
66
  "seq_balance_factor": 0.0001,
@@ -71,14 +66,9 @@
71
  0.15,
72
  0.15
73
  ],
74
- "threshold": 0.6,
75
  "transformers_version": "4.56.1",
76
  "update_bias_rate": 0.01,
77
- "use_bias": true,
78
  "use_cache": true,
79
  "use_reg_token": true,
80
- "use_top1_bias": true,
81
- "use_topk": true,
82
- "vocab_size": 2,
83
- "weights_norm": true
84
  }
 
8
  "d_ff": 1024,
9
  "d_kv": 64,
10
  "d_model": 256,
 
11
  "decoder_start_token_id": 0,
12
  "dense_act_fn": "relu",
 
13
  "dropout_rate": 0.1,
14
  "dtype": "float32",
15
  "eos_token_id": 1,
16
  "feed_forward_proj": "relu",
 
17
  "initializer_factor": 0.05,
18
  "input_patch_size": 128,
19
  "input_patch_stride": 128,
 
28
  "min_period": "original_rope_init",
29
  "model_type": "kairos",
30
  "moe_inter_dim": 1408,
 
31
  "n_activated_experts": 3,
32
  "n_expert_groups": 1,
33
  "n_limited_groups": 1,
 
56
  "relative_attention_max_distance": 128,
57
  "relative_attention_num_buckets": 32,
58
  "rope_init": "exp",
 
59
  "scale_method": "log",
60
  "score_func": "softmax",
61
  "seq_balance_factor": 0.0001,
 
66
  0.15,
67
  0.15
68
  ],
 
69
  "transformers_version": "4.56.1",
70
  "update_bias_rate": 0.01,
 
71
  "use_cache": true,
72
  "use_reg_token": true,
73
+ "vocab_size": 2
 
 
 
74
  }