File size: 1,696 Bytes
1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc ca8fcf6 1131fcc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9090909090909091,
"grad_norm": 0.5205761790275574,
"learning_rate": 0.0002,
"loss": 0.8035,
"step": 250
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.3349571228027344,
"learning_rate": 0.0002,
"loss": 0.3891,
"step": 500
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.42590829730033875,
"learning_rate": 0.0002,
"loss": 0.2422,
"step": 750
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.2039356529712677,
"learning_rate": 0.0002,
"loss": 0.1883,
"step": 1000
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.3233167231082916,
"learning_rate": 0.0002,
"loss": 0.158,
"step": 1250
},
{
"epoch": 5.454545454545454,
"grad_norm": 0.279433935880661,
"learning_rate": 0.0002,
"loss": 0.1395,
"step": 1500
}
],
"logging_steps": 250,
"max_steps": 1650,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.0351908666638336e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|