GritLs commited on
Commit
b72bfbb
·
1 Parent(s): 845d701

delete config

Browse files
Files changed (1) hide show
  1. config.json +1 -11
config.json CHANGED
@@ -8,15 +8,12 @@
8
  "d_ff": 2048,
9
  "d_kv": 64,
10
  "d_model": 512,
11
- "data_driven": true,
12
  "decoder_start_token_id": 0,
13
  "dense_act_fn": "relu",
14
- "diff_decoder_token_id": false,
15
  "dropout_rate": 0.1,
16
  "dtype": "float32",
17
  "eos_token_id": 1,
18
  "feed_forward_proj": "relu",
19
- "finetune": false,
20
  "initializer_factor": 0.05,
21
  "input_patch_size": 128,
22
  "input_patch_stride": 128,
@@ -31,7 +28,6 @@
31
  "min_period": "original_rope_init",
32
  "model_type": "kairos",
33
  "moe_inter_dim": 1408,
34
- "multi_pred_head": false,
35
  "n_activated_experts": 3,
36
  "n_expert_groups": 1,
37
  "n_limited_groups": 1,
@@ -61,7 +57,6 @@
61
  "relative_attention_num_buckets": 32,
62
  "rope_init": "exp",
63
  "route_scale": 1.0,
64
- "router_gumbel_softmax": false,
65
  "scale_method": "log",
66
  "score_func": "softmax",
67
  "seq_balance_factor": 0.0001,
@@ -72,14 +67,9 @@
72
  0.15,
73
  0.15
74
  ],
75
- "threshold": 0.6,
76
  "transformers_version": "4.56.1",
77
  "update_bias_rate": 0.01,
78
- "use_bias": true,
79
  "use_cache": true,
80
  "use_reg_token": true,
81
- "use_top1_bias": true,
82
- "use_topk": true,
83
- "vocab_size": 2,
84
- "weights_norm": true
85
  }
 
8
  "d_ff": 2048,
9
  "d_kv": 64,
10
  "d_model": 512,
 
11
  "decoder_start_token_id": 0,
12
  "dense_act_fn": "relu",
 
13
  "dropout_rate": 0.1,
14
  "dtype": "float32",
15
  "eos_token_id": 1,
16
  "feed_forward_proj": "relu",
 
17
  "initializer_factor": 0.05,
18
  "input_patch_size": 128,
19
  "input_patch_stride": 128,
 
28
  "min_period": "original_rope_init",
29
  "model_type": "kairos",
30
  "moe_inter_dim": 1408,
 
31
  "n_activated_experts": 3,
32
  "n_expert_groups": 1,
33
  "n_limited_groups": 1,
 
57
  "relative_attention_num_buckets": 32,
58
  "rope_init": "exp",
59
  "route_scale": 1.0,
 
60
  "scale_method": "log",
61
  "score_func": "softmax",
62
  "seq_balance_factor": 0.0001,
 
67
  0.15,
68
  0.15
69
  ],
 
70
  "transformers_version": "4.56.1",
71
  "update_bias_rate": 0.01,
 
72
  "use_cache": true,
73
  "use_reg_token": true,
74
+ "vocab_size": 2
 
 
 
75
  }