{ "trainer": { "trainer_class": "BatchTopKCrossCoderTrainer", "dict_class": "BatchTopKCrossCoder", "lr": 0.0001, "steps": 97656, "auxk_alpha": 0.03125, "warmup_steps": 1000, "decay_start": null, "threshold_beta": 0.999, "threshold_start_step": 1000, "top_k_aux": 2048, "seed": null, "activation_dim": 4096, "dict_size": 131072, "k": 1000, "k_target": 200, "k_initial": 1000, "k_annealing_steps": 5000, "code_normalization": "CROSSCODER", "code_normalization_alpha_sae": 1.0, "code_normalization_alpha_cc": 0.1, "device": "cuda", "layer": 16, "lm_name": "meta-llama/Meta-Llama-3.1-8B-Instruct-meta-llama/Meta-Llama-3.1-8B", "wandb_name": "Meta-Llama-3.1-8B-L16-k200-lr1e-04-local-shuffling-Crosscoder-ni0.3-ka1k5k", "submodule_name": null, "dict_class_kwargs": { "same_init_for_all_layers": "True", "norm_init_scale": "0.3", "init_with_transpose": "True", "encoder_layers": "None", "code_normalization": "crosscoder", "code_normalization_alpha_sae": "1.0", "code_normalization_alpha_cc": "0.1" } } }