jkminder commited on
Commit
7994388
·
verified ·
1 Parent(s): 8c52098

Push model using huggingface_hub.

Browse files
Files changed (1) hide show
  1. config.json +3 -24
config.json CHANGED
@@ -1,26 +1,5 @@
1
  {
2
- "trainer": {
3
- "trainer_class": "BatchTopKCrossCoderTrainer",
4
- "dict_class": "BatchTopKCrossCoder",
5
- "lr": 0.0001,
6
- "steps": 97656,
7
- "auxk_alpha": 0.03125,
8
- "warmup_steps": 1000,
9
- "decay_start": null,
10
- "threshold_beta": 0.999,
11
- "threshold_start_step": 1000,
12
- "top_k_aux": 1152,
13
- "seed": null,
14
- "activation_dim": 2304,
15
- "dict_size": 73728,
16
- "k": 100,
17
- "sparsity_loss_type": "LossType.CROSSCODER",
18
- "sparsity_loss_alpha_sae": 1.0,
19
- "sparsity_loss_alpha_cc": 0.1,
20
- "device": "cuda",
21
- "layer": 13,
22
- "lm_name": "google/gemma-2-2b-it-google/gemma-2-2b",
23
- "wandb_name": "gemma-2-2b-L13-k100-lr1e-04-local-shuffling-CCLoss",
24
- "submodule_name": null
25
- }
26
  }
 
1
  {
2
+ "activation_dim": 2304,
3
+ "dict_size": 73728,
4
+ "num_layers": 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  }