jkminder's picture
Upload Meta-Llama-3.1-8B-L16-k200-lr1e-04-local-shuffling-Crosscoder-ni0.3-ka1k5k dictionary model
3bae5d1 verified
{
"trainer": {
"trainer_class": "BatchTopKCrossCoderTrainer",
"dict_class": "BatchTopKCrossCoder",
"lr": 0.0001,
"steps": 97656,
"auxk_alpha": 0.03125,
"warmup_steps": 1000,
"decay_start": null,
"threshold_beta": 0.999,
"threshold_start_step": 1000,
"top_k_aux": 2048,
"seed": null,
"activation_dim": 4096,
"dict_size": 131072,
"k": 1000,
"k_target": 200,
"k_initial": 1000,
"k_annealing_steps": 5000,
"code_normalization": "CROSSCODER",
"code_normalization_alpha_sae": 1.0,
"code_normalization_alpha_cc": 0.1,
"device": "cuda",
"layer": 16,
"lm_name": "meta-llama/Meta-Llama-3.1-8B-Instruct-meta-llama/Meta-Llama-3.1-8B",
"wandb_name": "Meta-Llama-3.1-8B-L16-k200-lr1e-04-local-shuffling-Crosscoder-ni0.3-ka1k5k",
"submodule_name": null,
"dict_class_kwargs": {
"same_init_for_all_layers": "True",
"norm_init_scale": "0.3",
"init_with_transpose": "True",
"encoder_layers": "None",
"code_normalization": "crosscoder",
"code_normalization_alpha_sae": "1.0",
"code_normalization_alpha_cc": "0.1"
}
}
}