File size: 1,306 Bytes
3bae5d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
{
    "trainer": {
        "trainer_class": "BatchTopKCrossCoderTrainer",
        "dict_class": "BatchTopKCrossCoder",
        "lr": 0.0001,
        "steps": 97656,
        "auxk_alpha": 0.03125,
        "warmup_steps": 1000,
        "decay_start": null,
        "threshold_beta": 0.999,
        "threshold_start_step": 1000,
        "top_k_aux": 2048,
        "seed": null,
        "activation_dim": 4096,
        "dict_size": 131072,
        "k": 1000,
        "k_target": 200,
        "k_initial": 1000,
        "k_annealing_steps": 5000,
        "code_normalization": "CROSSCODER",
        "code_normalization_alpha_sae": 1.0,
        "code_normalization_alpha_cc": 0.1,
        "device": "cuda",
        "layer": 16,
        "lm_name": "meta-llama/Meta-Llama-3.1-8B-Instruct-meta-llama/Meta-Llama-3.1-8B",
        "wandb_name": "Meta-Llama-3.1-8B-L16-k200-lr1e-04-local-shuffling-Crosscoder-ni0.3-ka1k5k",
        "submodule_name": null,
        "dict_class_kwargs": {
            "same_init_for_all_layers": "True",
            "norm_init_scale": "0.3",
            "init_with_transpose": "True",
            "encoder_layers": "None",
            "code_normalization": "crosscoder",
            "code_normalization_alpha_sae": "1.0",
            "code_normalization_alpha_cc": "0.1"
        }
    }
}