jkminder commited on
Commit
3bae5d1
·
verified ·
1 Parent(s): 8d52241

Upload Meta-Llama-3.1-8B-L16-k200-lr1e-04-local-shuffling-Crosscoder-ni0.3-ka1k5k dictionary model

Browse files
Files changed (1) hide show
  1. trainer_config.json +38 -0
trainer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKCrossCoderTrainer",
4
+ "dict_class": "BatchTopKCrossCoder",
5
+ "lr": 0.0001,
6
+ "steps": 97656,
7
+ "auxk_alpha": 0.03125,
8
+ "warmup_steps": 1000,
9
+ "decay_start": null,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 2048,
13
+ "seed": null,
14
+ "activation_dim": 4096,
15
+ "dict_size": 131072,
16
+ "k": 1000,
17
+ "k_target": 200,
18
+ "k_initial": 1000,
19
+ "k_annealing_steps": 5000,
20
+ "code_normalization": "CROSSCODER",
21
+ "code_normalization_alpha_sae": 1.0,
22
+ "code_normalization_alpha_cc": 0.1,
23
+ "device": "cuda",
24
+ "layer": 16,
25
+ "lm_name": "meta-llama/Meta-Llama-3.1-8B-Instruct-meta-llama/Meta-Llama-3.1-8B",
26
+ "wandb_name": "Meta-Llama-3.1-8B-L16-k200-lr1e-04-local-shuffling-Crosscoder-ni0.3-ka1k5k",
27
+ "submodule_name": null,
28
+ "dict_class_kwargs": {
29
+ "same_init_for_all_layers": "True",
30
+ "norm_init_scale": "0.3",
31
+ "init_with_transpose": "True",
32
+ "encoder_layers": "None",
33
+ "code_normalization": "crosscoder",
34
+ "code_normalization_alpha_sae": "1.0",
35
+ "code_normalization_alpha_cc": "0.1"
36
+ }
37
+ }
38
+ }