File size: 2,663 Bytes
5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 2c82cf9 5468fd0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 58,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1724137931034483,
"grad_norm": 2108.397216796875,
"learning_rate": 2.5454545454545454e-08,
"loss": 1035.4573,
"step": 5
},
{
"epoch": 0.3448275862068966,
"grad_norm": 2107.33642578125,
"learning_rate": 5.727272727272728e-08,
"loss": 1065.6187,
"step": 10
},
{
"epoch": 0.5172413793103449,
"grad_norm": 2219.214111328125,
"learning_rate": 8.90909090909091e-08,
"loss": 1036.3791,
"step": 15
},
{
"epoch": 0.6896551724137931,
"grad_norm": 1936.712890625,
"learning_rate": 1.2090909090909092e-07,
"loss": 1035.5998,
"step": 20
},
{
"epoch": 0.8620689655172413,
"grad_norm": 2196.76318359375,
"learning_rate": 1.5272727272727273e-07,
"loss": 1064.9285,
"step": 25
},
{
"epoch": 1.0344827586206897,
"grad_norm": 2548.19189453125,
"learning_rate": 1.8454545454545454e-07,
"loss": 1048.6884,
"step": 30
},
{
"epoch": 1.206896551724138,
"grad_norm": 2420.173828125,
"learning_rate": 2.1636363636363637e-07,
"loss": 1050.2486,
"step": 35
},
{
"epoch": 1.3793103448275863,
"grad_norm": 2168.558349609375,
"learning_rate": 2.481818181818182e-07,
"loss": 1027.9692,
"step": 40
},
{
"epoch": 1.5517241379310345,
"grad_norm": 2378.92431640625,
"learning_rate": 2.8e-07,
"loss": 1043.8671,
"step": 45
},
{
"epoch": 1.7241379310344827,
"grad_norm": 2213.712158203125,
"learning_rate": 3.1181818181818186e-07,
"loss": 1042.2683,
"step": 50
},
{
"epoch": 1.896551724137931,
"grad_norm": 2194.255615234375,
"learning_rate": 3.436363636363636e-07,
"loss": 1032.4518,
"step": 55
}
],
"logging_steps": 5,
"max_steps": 58,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.7211980486851994e+17,
"train_batch_size": 14,
"trial_name": null,
"trial_params": null
}
|