| { | |
| "accumulate_grad_batches": 64, | |
| "autoencoder": null, | |
| "auxk": 256, | |
| "auxk_coef": 0.03125, | |
| "batch_size": 1, | |
| "dead_steps_threshold": null, | |
| "dead_threshold": 0.001, | |
| "dead_tokens_threshold": 10000000, | |
| "expansion_factor": 128, | |
| "k": 32, | |
| "layers": null, | |
| "lr": 0.0001, | |
| "max_length": 2048, | |
| "model_name": "EleutherAI/pythia-70m-deduped", | |
| "skip_special_tokens": true, | |
| "standardize": true, | |
| "transformer": null | |
| } |