jkminder commited on
Commit
3f20fc0
·
verified ·
1 Parent(s): 31f149a

Upload Llama-3.2-1B-L8-mu3.6e-02-lr1e-04-local-shuffling-CrosscoderLoss dictionary model

Browse files
Files changed (1) hide show
  1. trainer_config.json +21 -0
trainer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "dict_class": "CrossCoder",
4
+ "trainer_class": "CrossCoderTrainer",
5
+ "activation_dim": 2048,
6
+ "dict_size": 65536,
7
+ "lr": 0.0001,
8
+ "l1_penalty": 0.036,
9
+ "warmup_steps": 1000,
10
+ "resample_steps": null,
11
+ "device": "cuda",
12
+ "layer": 8,
13
+ "lm_name": "meta-llama/Llama-3.2-1B-Instruct-meta-llama/Llama-3.2-1B",
14
+ "wandb_name": "Llama-3.2-1B-L8-mu3.6e-02-lr1e-04-local-shuffling-CrosscoderLoss",
15
+ "submodule_name": null,
16
+ "use_mse_loss": false,
17
+ "code_normalization": "CROSSCODER",
18
+ "code_normalization_alpha_sae": 1.0,
19
+ "code_normalization_alpha_cc": 0.1
20
+ }
21
+ }