jkminder commited on
Commit
3cda93d
·
verified ·
1 Parent(s): 2e667bd

Upload Meta-Llama-3.1-8B-L16-mu2.1e-02-lr1e-04-local-shuffling-CrosscoderLoss dictionary model

Browse files
Files changed (1) hide show
  1. trainer_config.json +21 -0
trainer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "CrossCoder",
4
+ "trainer_class": "CrossCoderTrainer",
5
+ "activation_dim": 4096,
6
+ "dict_size": 131072,
7
+ "lr": 0.0001,
8
+ "l1_penalty": 0.021,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda",
12
+ "layer": 16,
13
+ "lm_name": "meta-llama/Meta-Llama-3.1-8B-Instruct-meta-llama/Meta-Llama-3.1-8B",
14
+ "wandb_name": "Meta-Llama-3.1-8B-L16-mu2.1e-02-lr1e-04-local-shuffling-CrosscoderLoss",
15
+ "submodule_name": null,
16
+ "use_mse_loss": false,
17
+ "code_normalization": "CROSSCODER",
18
+ "code_normalization_alpha_sae": 1.0,
19
+ "code_normalization_alpha_cc": 0.1
20
+ }
21
+ }