my-gemma / trainer_state.json
gsaberon's picture
Upload 14 files
d57f57f verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.4,
"eval_steps": 500,
"global_step": 200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8,
"grad_norm": 1.7011343240737915,
"learning_rate": 0.00018,
"loss": 0.5995,
"step": 10
},
{
"epoch": 1.56,
"grad_norm": 0.22524015605449677,
"learning_rate": 0.0001905263157894737,
"loss": 0.0139,
"step": 20
},
{
"epoch": 2.32,
"grad_norm": 0.04040924832224846,
"learning_rate": 0.00018,
"loss": 0.0052,
"step": 30
},
{
"epoch": 3.08,
"grad_norm": 0.014496985822916031,
"learning_rate": 0.00016947368421052633,
"loss": 0.0044,
"step": 40
},
{
"epoch": 3.88,
"grad_norm": 0.04556073993444443,
"learning_rate": 0.00015894736842105264,
"loss": 0.0043,
"step": 50
},
{
"epoch": 4.64,
"grad_norm": 0.08168065547943115,
"learning_rate": 0.00014842105263157895,
"loss": 0.0053,
"step": 60
},
{
"epoch": 5.4,
"grad_norm": 0.036317914724349976,
"learning_rate": 0.00013789473684210527,
"loss": 0.0046,
"step": 70
},
{
"epoch": 6.16,
"grad_norm": 0.0234612375497818,
"learning_rate": 0.00012736842105263158,
"loss": 0.0045,
"step": 80
},
{
"epoch": 6.96,
"grad_norm": 0.023044303059577942,
"learning_rate": 0.00011684210526315791,
"loss": 0.0047,
"step": 90
},
{
"epoch": 7.72,
"grad_norm": 0.0161167923361063,
"learning_rate": 0.00010631578947368421,
"loss": 0.004,
"step": 100
},
{
"epoch": 8.48,
"grad_norm": 0.00939767062664032,
"learning_rate": 9.578947368421052e-05,
"loss": 0.0044,
"step": 110
},
{
"epoch": 9.24,
"grad_norm": 0.03683692589402199,
"learning_rate": 8.526315789473685e-05,
"loss": 0.004,
"step": 120
},
{
"epoch": 10.0,
"grad_norm": 0.02535077929496765,
"learning_rate": 7.473684210526316e-05,
"loss": 0.0042,
"step": 130
},
{
"epoch": 10.8,
"grad_norm": 0.024098610505461693,
"learning_rate": 6.421052631578948e-05,
"loss": 0.004,
"step": 140
},
{
"epoch": 11.56,
"grad_norm": 0.016196303069591522,
"learning_rate": 5.368421052631579e-05,
"loss": 0.0038,
"step": 150
},
{
"epoch": 12.32,
"grad_norm": 0.020965227857232094,
"learning_rate": 4.3157894736842105e-05,
"loss": 0.004,
"step": 160
},
{
"epoch": 13.08,
"grad_norm": 0.017107339575886726,
"learning_rate": 3.2631578947368426e-05,
"loss": 0.004,
"step": 170
},
{
"epoch": 13.88,
"grad_norm": 0.03667628765106201,
"learning_rate": 2.2105263157894736e-05,
"loss": 0.004,
"step": 180
},
{
"epoch": 14.64,
"grad_norm": 0.0402102991938591,
"learning_rate": 1.1578947368421053e-05,
"loss": 0.0038,
"step": 190
},
{
"epoch": 15.4,
"grad_norm": 0.04898475855588913,
"learning_rate": 1.0526315789473685e-06,
"loss": 0.004,
"step": 200
}
],
"logging_steps": 10,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 17,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3105837514930176.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}