science-of-finetuning
/

gemma-2-2b-L13-k100-lr1e-04-local-shuffling-CCLoss

model_hub_mixin

pytorch_model_hub_mixin

Model card Files Files and versions Community

jkminder commited on Mar 5

Commit

7994388

·

verified ·

1 Parent(s): 8c52098

Push model using huggingface_hub.

Files changed (1) hide show

config.json +3 -24

config.json CHANGED Viewed

@@ -1,26 +1,5 @@
 {
-    "trainer": {
-        "trainer_class": "BatchTopKCrossCoderTrainer",
-        "dict_class": "BatchTopKCrossCoder",
-        "lr": 0.0001,
-        "steps": 97656,
-        "auxk_alpha": 0.03125,
-        "warmup_steps": 1000,
-        "decay_start": null,
-        "threshold_beta": 0.999,
-        "threshold_start_step": 1000,
-        "top_k_aux": 1152,
-        "seed": null,
-        "activation_dim": 2304,
-        "dict_size": 73728,
-        "k": 100,
-        "sparsity_loss_type": "LossType.CROSSCODER",
-        "sparsity_loss_alpha_sae": 1.0,
-        "sparsity_loss_alpha_cc": 0.1,
-        "device": "cuda",
-        "layer": 13,
-        "lm_name": "google/gemma-2-2b-it-google/gemma-2-2b",
-        "wandb_name": "gemma-2-2b-L13-k100-lr1e-04-local-shuffling-CCLoss",
-        "submodule_name": null
-    }
 }

 {
+  "activation_dim": 2304,
+  "dict_size": 73728,
+  "num_layers": 2
 }