{ "trainer": { "trainer_class": "BatchTopKCrossCoderTrainer", "dict_class": "BatchTopKCrossCoder", "lr": 0.0001, "steps": 48828, "auxk_alpha": 0.03125, "warmup_steps": 1000, "decay_start": null, "threshold_beta": 0.999, "threshold_start_step": 1000, "top_k_aux": 1792, "seed": null, "activation_dim": 3584, "dict_size": 114688, "k": 100, "code_normalization": "CROSSCODER", "code_normalization_alpha_sae": 1.0, "code_normalization_alpha_cc": 0.1, "device": "cuda", "layer": 20, "lm_name": "princeton-nlp/gemma-2-9b-it-DPO-gemma-2-9b", "wandb_name": "gemma-2-9b-L20-k100-lr1e-04-base-dpo-Crosscoder", "submodule_name": null, "dict_class_kwargs": { "same_init_for_all_layers": false, "norm_init_scale": 0.005, "init_with_transpose": false, "encoder_layers": null, "code_normalization": "crosscoder", "code_normalization_alpha_sae": 1.0, "code_normalization_alpha_cc": 0.1 } } }