|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9988571428571429, |
|
"eval_steps": 500, |
|
"global_step": 437, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 1.8699, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 2.8546, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 1.8072, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 2.4662, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 2.9095, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 3.8613, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2e-05, |
|
"loss": 2.1535, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.2857142857142858e-05, |
|
"loss": 3.8349, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.5714285714285718e-05, |
|
"loss": 1.5684, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.8571428571428574e-05, |
|
"loss": 3.1305, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.142857142857143e-05, |
|
"loss": 1.5548, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.4285714285714284e-05, |
|
"loss": 1.4553, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.714285714285715e-05, |
|
"loss": 1.1268, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4e-05, |
|
"loss": 1.1313, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.9999448409199496e-05, |
|
"loss": 1.3773, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.999779366722321e-05, |
|
"loss": 1.587, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.9995035865345184e-05, |
|
"loss": 0.868, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.999117515568324e-05, |
|
"loss": 1.1585, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.998621175119058e-05, |
|
"loss": 4.0398, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.998014592564402e-05, |
|
"loss": 0.7112, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.997297801362891e-05, |
|
"loss": 0.724, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.996470841052069e-05, |
|
"loss": 1.1229, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.995533757246307e-05, |
|
"loss": 0.9368, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.9944866016342835e-05, |
|
"loss": 0.7026, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.9933294319761405e-05, |
|
"loss": 0.6165, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.9920623121002914e-05, |
|
"loss": 0.5194, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.9906853118999016e-05, |
|
"loss": 0.5856, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.9891985073290375e-05, |
|
"loss": 0.8067, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.98760198039847e-05, |
|
"loss": 0.4607, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.9858958191711566e-05, |
|
"loss": 0.6095, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.98408011775738e-05, |
|
"loss": 0.6495, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.9821549763095606e-05, |
|
"loss": 0.589, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.98012050101673e-05, |
|
"loss": 0.5097, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9779768040986726e-05, |
|
"loss": 0.4351, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.975724003799739e-05, |
|
"loss": 0.6051, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.973362224382322e-05, |
|
"loss": 0.9956, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.97089159612e-05, |
|
"loss": 0.4976, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.968312255290356e-05, |
|
"loss": 0.5614, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.9656243441674576e-05, |
|
"loss": 0.8428, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.962828011014009e-05, |
|
"loss": 0.7532, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 3.959923410073174e-05, |
|
"loss": 0.4086, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.956910701560069e-05, |
|
"loss": 0.4337, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.953790051652925e-05, |
|
"loss": 0.369, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.950561632483918e-05, |
|
"loss": 0.3884, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3.9472256221296815e-05, |
|
"loss": 0.4261, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.9437822046014764e-05, |
|
"loss": 0.4273, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.940231569835046e-05, |
|
"loss": 0.3396, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.9365739136801366e-05, |
|
"loss": 0.4417, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.932809437889699e-05, |
|
"loss": 0.62, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 3.928938350108754e-05, |
|
"loss": 0.5719, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.924960863862941e-05, |
|
"loss": 0.6614, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.9208771985467435e-05, |
|
"loss": 0.3308, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.916687579411383e-05, |
|
"loss": 0.3355, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.912392237552397e-05, |
|
"loss": 0.3821, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.907991409896892e-05, |
|
"loss": 0.3476, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.9034853391904715e-05, |
|
"loss": 0.3094, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.8988742739838505e-05, |
|
"loss": 0.3785, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.8941584686191436e-05, |
|
"loss": 0.4117, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.889338183215838e-05, |
|
"loss": 0.3387, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.884413683656441e-05, |
|
"loss": 0.3845, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.879385241571817e-05, |
|
"loss": 0.3232, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.8742531343262076e-05, |
|
"loss": 0.3627, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.869017645001925e-05, |
|
"loss": 0.3797, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.863679062383746e-05, |
|
"loss": 0.2881, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.858237680942975e-05, |
|
"loss": 0.8201, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.852693800821208e-05, |
|
"loss": 0.2627, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.847047727813771e-05, |
|
"loss": 0.3228, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.841299773352858e-05, |
|
"loss": 0.2972, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.8354502544903484e-05, |
|
"loss": 0.5059, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.829499493880322e-05, |
|
"loss": 0.3438, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.823447819761259e-05, |
|
"loss": 0.9574, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.8172955659379376e-05, |
|
"loss": 0.3248, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.811043071763018e-05, |
|
"loss": 0.3328, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.804690682118328e-05, |
|
"loss": 0.5611, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.798238747395835e-05, |
|
"loss": 0.355, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.791687623478324e-05, |
|
"loss": 0.6809, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.785037671719763e-05, |
|
"loss": 0.3312, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.7782892589253735e-05, |
|
"loss": 0.36, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.771442757331397e-05, |
|
"loss": 0.3337, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.764498544584564e-05, |
|
"loss": 0.3287, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.75745700372126e-05, |
|
"loss": 0.3119, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.7503185231464014e-05, |
|
"loss": 0.2839, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.74308349661201e-05, |
|
"loss": 0.9241, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.735752323195494e-05, |
|
"loss": 0.3167, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.7283254072776336e-05, |
|
"loss": 0.3737, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.720803158520279e-05, |
|
"loss": 0.2678, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.713185991843752e-05, |
|
"loss": 0.2083, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.705474327403958e-05, |
|
"loss": 0.7429, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.697668590569215e-05, |
|
"loss": 0.4499, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.689769211896784e-05, |
|
"loss": 0.4594, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.681776627109127e-05, |
|
"loss": 0.379, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.673691277069867e-05, |
|
"loss": 0.4662, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.665513607759474e-05, |
|
"loss": 0.357, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.657244070250665e-05, |
|
"loss": 0.7296, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.648883120683521e-05, |
|
"loss": 0.3254, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.640431220240329e-05, |
|
"loss": 0.3623, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.631888835120141e-05, |
|
"loss": 0.6485, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.6232564365130626e-05, |
|
"loss": 0.3066, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.614534500574259e-05, |
|
"loss": 0.3152, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.605723508397694e-05, |
|
"loss": 0.5848, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.596823945989589e-05, |
|
"loss": 0.3347, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.58783630424162e-05, |
|
"loss": 0.3636, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.5787610789038373e-05, |
|
"loss": 0.2241, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.5695987705573223e-05, |
|
"loss": 0.4166, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.560349884586574e-05, |
|
"loss": 0.6224, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.551014931151635e-05, |
|
"loss": 0.3595, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.5415944251599476e-05, |
|
"loss": 0.2988, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.532088886237956e-05, |
|
"loss": 0.3415, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.522498838702444e-05, |
|
"loss": 0.3732, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.5128248115316114e-05, |
|
"loss": 0.2805, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.5030673383358956e-05, |
|
"loss": 0.5045, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.493226957328542e-05, |
|
"loss": 0.3575, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.483304211295915e-05, |
|
"loss": 0.3387, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.473299647567558e-05, |
|
"loss": 0.5167, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.4632138179860013e-05, |
|
"loss": 0.252, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.453047278876327e-05, |
|
"loss": 0.299, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.442800591015479e-05, |
|
"loss": 0.4623, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.4324743196013335e-05, |
|
"loss": 0.3507, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.422069034221522e-05, |
|
"loss": 0.3827, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.411585308822014e-05, |
|
"loss": 0.3607, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.4010237216754575e-05, |
|
"loss": 0.3312, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.390384855349285e-05, |
|
"loss": 0.3545, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.379669296673574e-05, |
|
"loss": 0.3093, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.3688776367086837e-05, |
|
"loss": 0.5048, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.358010470712651e-05, |
|
"loss": 0.2603, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.347068398108353e-05, |
|
"loss": 0.2749, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.3360520224504505e-05, |
|
"loss": 0.3122, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.324961951392089e-05, |
|
"loss": 0.3832, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.313798796651386e-05, |
|
"loss": 0.2636, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.302563173977688e-05, |
|
"loss": 0.3716, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.291255703117605e-05, |
|
"loss": 0.7091, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.279877007780828e-05, |
|
"loss": 0.491, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.268427715605723e-05, |
|
"loss": 0.4181, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.2569084581247137e-05, |
|
"loss": 0.3262, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.245319870729446e-05, |
|
"loss": 0.3126, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.23366259263574e-05, |
|
"loss": 0.2856, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.221937266848331e-05, |
|
"loss": 0.7066, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.210144540125402e-05, |
|
"loss": 0.2677, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.198285062942912e-05, |
|
"loss": 0.2525, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.1863594894587106e-05, |
|
"loss": 0.3914, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.174368477476461e-05, |
|
"loss": 0.3101, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.1623126884093536e-05, |
|
"loss": 0.3466, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.150192787243621e-05, |
|
"loss": 0.2589, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.1380094425018634e-05, |
|
"loss": 0.2615, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.1257633262061674e-05, |
|
"loss": 0.4989, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.113455113841043e-05, |
|
"loss": 0.3507, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.101085484316162e-05, |
|
"loss": 0.7505, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.0886551199289074e-05, |
|
"loss": 0.304, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.0761647063267455e-05, |
|
"loss": 0.2395, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.063614932469399e-05, |
|
"loss": 0.2855, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.0510064905908492e-05, |
|
"loss": 0.3145, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.038340076161151e-05, |
|
"loss": 0.6382, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.0256163878480715e-05, |
|
"loss": 0.2493, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.012836127478553e-05, |
|
"loss": 0.2916, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.0000000000000004e-05, |
|
"loss": 0.2677, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.9871087134413968e-05, |
|
"loss": 0.346, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.974162978874249e-05, |
|
"loss": 0.2209, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.9611635103733677e-05, |
|
"loss": 0.4053, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.9481110249774755e-05, |
|
"loss": 0.6514, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.935006242649659e-05, |
|
"loss": 0.3108, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.921849886237656e-05, |
|
"loss": 0.7936, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.9086426814339838e-05, |
|
"loss": 0.3377, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.895385356735908e-05, |
|
"loss": 0.3292, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.8820786434052634e-05, |
|
"loss": 0.6782, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.8687232754281162e-05, |
|
"loss": 0.3089, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.8553199894742776e-05, |
|
"loss": 0.3611, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.8418695248566703e-05, |
|
"loss": 0.3621, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.8283726234905492e-05, |
|
"loss": 0.2988, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.8148300298525764e-05, |
|
"loss": 0.3555, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.8012424909397597e-05, |
|
"loss": 0.2199, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.7876107562282447e-05, |
|
"loss": 0.2688, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.7739355776319775e-05, |
|
"loss": 0.2502, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.7602177094612298e-05, |
|
"loss": 0.5406, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.7464579083809896e-05, |
|
"loss": 0.4893, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.732656933369227e-05, |
|
"loss": 0.2604, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.7188155456750257e-05, |
|
"loss": 0.2974, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.7049345087765995e-05, |
|
"loss": 0.3004, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.6910145883391726e-05, |
|
"loss": 0.3131, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.6770565521727516e-05, |
|
"loss": 0.305, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.6630611701897696e-05, |
|
"loss": 0.2949, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.6490292143626228e-05, |
|
"loss": 0.9866, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.6349614586810854e-05, |
|
"loss": 0.2713, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.6208586791096196e-05, |
|
"loss": 0.4035, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.606721653544572e-05, |
|
"loss": 1.2587, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.5925511617712683e-05, |
|
"loss": 0.2273, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.5783479854209983e-05, |
|
"loss": 0.2003, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.5641129079279025e-05, |
|
"loss": 0.8117, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.549846714485761e-05, |
|
"loss": 0.7241, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.5355501920046792e-05, |
|
"loss": 0.854, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.5212241290676857e-05, |
|
"loss": 0.2085, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.5068693158872315e-05, |
|
"loss": 0.3107, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.492486544261608e-05, |
|
"loss": 0.4577, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.4780766075312652e-05, |
|
"loss": 0.4021, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.463640300535057e-05, |
|
"loss": 0.7204, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.4491784195663965e-05, |
|
"loss": 0.2075, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.434691762329334e-05, |
|
"loss": 0.2459, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.420181127894556e-05, |
|
"loss": 0.3556, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.4056473166553087e-05, |
|
"loss": 0.4762, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.391091130283249e-05, |
|
"loss": 0.2244, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.3765133716842273e-05, |
|
"loss": 0.7107, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.3619148449539965e-05, |
|
"loss": 0.4242, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.3472963553338614e-05, |
|
"loss": 0.3044, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.3326587091662605e-05, |
|
"loss": 0.2674, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.3180027138502913e-05, |
|
"loss": 0.2977, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.303329177797172e-05, |
|
"loss": 0.3201, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2886389103856534e-05, |
|
"loss": 0.2589, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.2739327219173707e-05, |
|
"loss": 0.2949, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.259211423572152e-05, |
|
"loss": 0.2762, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.2444758273632693e-05, |
|
"loss": 0.3394, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.2297267460926548e-05, |
|
"loss": 0.2609, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.2149649933060625e-05, |
|
"loss": 0.295, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.200191383248197e-05, |
|
"loss": 0.3794, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.1854067308177967e-05, |
|
"loss": 0.4467, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.1706118515226894e-05, |
|
"loss": 0.2828, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.1558075614348065e-05, |
|
"loss": 0.3457, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.1409946771451705e-05, |
|
"loss": 0.2655, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.1261740157188498e-05, |
|
"loss": 0.3977, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.111346394649897e-05, |
|
"loss": 0.274, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.0965126318162476e-05, |
|
"loss": 0.4635, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.0816735454346134e-05, |
|
"loss": 0.3107, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.0668299540153494e-05, |
|
"loss": 0.4861, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.051982676317302e-05, |
|
"loss": 0.2846, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.0371325313026502e-05, |
|
"loss": 0.3598, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.022280338091731e-05, |
|
"loss": 0.2647, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.0074269159178606e-05, |
|
"loss": 0.4459, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.9925730840821404e-05, |
|
"loss": 0.5016, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.9777196619082693e-05, |
|
"loss": 0.3645, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.9628674686973508e-05, |
|
"loss": 0.5755, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.948017323682699e-05, |
|
"loss": 0.3132, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.9331700459846516e-05, |
|
"loss": 0.5478, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.918326454565387e-05, |
|
"loss": 0.274, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.9034873681837534e-05, |
|
"loss": 0.3535, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.8886536053501042e-05, |
|
"loss": 0.2747, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.873825984281151e-05, |
|
"loss": 0.2108, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8590053228548305e-05, |
|
"loss": 0.3552, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.844192438565194e-05, |
|
"loss": 0.3144, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.829388148477311e-05, |
|
"loss": 0.2116, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.814593269182204e-05, |
|
"loss": 0.3492, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7998086167518033e-05, |
|
"loss": 0.2632, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.785035006693938e-05, |
|
"loss": 0.4334, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.770273253907346e-05, |
|
"loss": 0.2417, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.7555241726367317e-05, |
|
"loss": 0.2955, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.7407885764278488e-05, |
|
"loss": 0.538, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.7260672780826296e-05, |
|
"loss": 0.2689, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.7113610896143473e-05, |
|
"loss": 0.2849, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.6966708222028284e-05, |
|
"loss": 0.5197, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1.681997286149709e-05, |
|
"loss": 0.4282, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6673412908337402e-05, |
|
"loss": 0.4306, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.6527036446661396e-05, |
|
"loss": 0.2655, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.638085155046004e-05, |
|
"loss": 0.2738, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 1.623486628315773e-05, |
|
"loss": 0.4238, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.608908869716751e-05, |
|
"loss": 0.2554, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.5943526833446917e-05, |
|
"loss": 0.295, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.579818872105444e-05, |
|
"loss": 0.3214, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.565308237670666e-05, |
|
"loss": 0.2114, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.550821580433604e-05, |
|
"loss": 0.3037, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.5363596994649433e-05, |
|
"loss": 0.3683, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.5219233924687351e-05, |
|
"loss": 0.4236, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.5075134557383931e-05, |
|
"loss": 0.2787, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.4931306841127691e-05, |
|
"loss": 0.588, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.4787758709323155e-05, |
|
"loss": 0.2892, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.4644498079953215e-05, |
|
"loss": 0.5691, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.45015328551424e-05, |
|
"loss": 0.2227, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.4358870920720982e-05, |
|
"loss": 0.5576, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.4216520145790027e-05, |
|
"loss": 0.6571, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.4074488382287324e-05, |
|
"loss": 0.4547, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.3932783464554286e-05, |
|
"loss": 0.3705, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.379141320890381e-05, |
|
"loss": 0.4904, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.3650385413189151e-05, |
|
"loss": 0.4299, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.3509707856373779e-05, |
|
"loss": 0.4775, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.3369388298102312e-05, |
|
"loss": 0.2769, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.3229434478272492e-05, |
|
"loss": 0.2478, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.3089854116608279e-05, |
|
"loss": 0.3056, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.295065491223401e-05, |
|
"loss": 0.4498, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.2811844543249748e-05, |
|
"loss": 0.5963, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.2673430666307738e-05, |
|
"loss": 0.4305, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.2535420916190106e-05, |
|
"loss": 0.3375, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.2397822905387707e-05, |
|
"loss": 0.3299, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.2260644223680228e-05, |
|
"loss": 0.537, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.212389243771756e-05, |
|
"loss": 0.2732, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.1987575090602408e-05, |
|
"loss": 0.325, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.185169970147424e-05, |
|
"loss": 1.0755, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.1716273765094517e-05, |
|
"loss": 0.6557, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.1581304751433305e-05, |
|
"loss": 0.4481, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.1446800105257232e-05, |
|
"loss": 0.3972, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.1312767245718836e-05, |
|
"loss": 0.3317, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.1179213565947366e-05, |
|
"loss": 0.2758, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.1046146432640923e-05, |
|
"loss": 0.2696, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.0913573185660167e-05, |
|
"loss": 0.3465, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.078150113762344e-05, |
|
"loss": 0.3807, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.0649937573503419e-05, |
|
"loss": 0.3017, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.051888975022525e-05, |
|
"loss": 0.3269, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.0388364896266326e-05, |
|
"loss": 0.1849, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.0258370211257511e-05, |
|
"loss": 0.4315, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.0128912865586038e-05, |
|
"loss": 0.2808, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.0000000000000006e-05, |
|
"loss": 0.292, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.871638725214481e-06, |
|
"loss": 0.2757, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.743836121519297e-06, |
|
"loss": 0.2608, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.616599238388501e-06, |
|
"loss": 0.2605, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.48993509409151e-06, |
|
"loss": 0.2572, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.363850675306013e-06, |
|
"loss": 0.2759, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.238352936732549e-06, |
|
"loss": 0.2789, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.113448800710929e-06, |
|
"loss": 0.2701, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 8.989145156838387e-06, |
|
"loss": 0.2623, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.865448861589572e-06, |
|
"loss": 0.2679, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.74236673793833e-06, |
|
"loss": 0.4513, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.619905574981378e-06, |
|
"loss": 0.2597, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.498072127563793e-06, |
|
"loss": 0.2465, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.37687311590647e-06, |
|
"loss": 0.2899, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.256315225235392e-06, |
|
"loss": 0.321, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.136405105412897e-06, |
|
"loss": 0.5139, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.017149370570884e-06, |
|
"loss": 0.4021, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.89855459874598e-06, |
|
"loss": 0.4034, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.780627331516697e-06, |
|
"loss": 0.3049, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.6633740736426e-06, |
|
"loss": 0.2607, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.546801292705539e-06, |
|
"loss": 0.4599, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.430915418752867e-06, |
|
"loss": 0.3117, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.3157228439427765e-06, |
|
"loss": 0.4483, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.201229922191726e-06, |
|
"loss": 0.2225, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 7.087442968823952e-06, |
|
"loss": 0.467, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 6.974368260223123e-06, |
|
"loss": 0.4134, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.862012033486145e-06, |
|
"loss": 0.3155, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.7503804860791115e-06, |
|
"loss": 0.4041, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.6394797754955055e-06, |
|
"loss": 0.2644, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 6.529316018916478e-06, |
|
"loss": 0.269, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 6.419895292873504e-06, |
|
"loss": 0.2354, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 6.311223632913173e-06, |
|
"loss": 0.2503, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 6.203307033264272e-06, |
|
"loss": 0.2419, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 6.096151446507155e-06, |
|
"loss": 0.3827, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.989762783245423e-06, |
|
"loss": 0.2289, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.884146911779865e-06, |
|
"loss": 0.3037, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.779309657784786e-06, |
|
"loss": 0.3167, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.6752568039866754e-06, |
|
"loss": 0.5754, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.5719940898452205e-06, |
|
"loss": 0.2943, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5.46952721123674e-06, |
|
"loss": 0.376, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5.367861820139995e-06, |
|
"loss": 0.4093, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5.267003524324423e-06, |
|
"loss": 0.4033, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5.166957887040849e-06, |
|
"loss": 0.3671, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 5.067730426714583e-06, |
|
"loss": 0.2423, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.969326616641052e-06, |
|
"loss": 0.2651, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.871751884683895e-06, |
|
"loss": 0.5764, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.775011612975562e-06, |
|
"loss": 0.3681, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.679111137620442e-06, |
|
"loss": 0.3841, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.5840557484005355e-06, |
|
"loss": 0.2937, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.4898506884836565e-06, |
|
"loss": 0.2957, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.3965011541342606e-06, |
|
"loss": 0.2435, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.304012294426781e-06, |
|
"loss": 0.3995, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.212389210961629e-06, |
|
"loss": 0.2983, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.121636957583805e-06, |
|
"loss": 0.4387, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.031760540104115e-06, |
|
"loss": 0.3402, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.942764916023067e-06, |
|
"loss": 0.3425, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.854654994257412e-06, |
|
"loss": 0.34, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7674356348693764e-06, |
|
"loss": 0.3359, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.681111648798592e-06, |
|
"loss": 0.393, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.5956877975967163e-06, |
|
"loss": 0.4085, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.5111687931647984e-06, |
|
"loss": 0.2618, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.427559297493359e-06, |
|
"loss": 0.2674, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.3448639224052703e-06, |
|
"loss": 0.251, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.2630872293013403e-06, |
|
"loss": 0.2529, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.182233728908741e-06, |
|
"loss": 0.1951, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.102307881032165e-06, |
|
"loss": 0.3467, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.023314094307859e-06, |
|
"loss": 0.3827, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.9452567259604215e-06, |
|
"loss": 0.2283, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.868140081562487e-06, |
|
"loss": 0.3542, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.791968414797217e-06, |
|
"loss": 0.2871, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.7167459272236718e-06, |
|
"loss": 0.323, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.6424767680450657e-06, |
|
"loss": 0.9199, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.5691650338799012e-06, |
|
"loss": 0.3337, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.496814768535989e-06, |
|
"loss": 0.4007, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.4254299627874045e-06, |
|
"loss": 0.3352, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.3550145541543666e-06, |
|
"loss": 0.3592, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.2855724266860314e-06, |
|
"loss": 0.2621, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.217107410746271e-06, |
|
"loss": 0.2718, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.149623282802378e-06, |
|
"loss": 0.1917, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.0831237652167656e-06, |
|
"loss": 0.2845, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.0176125260416544e-06, |
|
"loss": 0.2991, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.9530931788167274e-06, |
|
"loss": 0.3188, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.889569282369823e-06, |
|
"loss": 0.2927, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.8270443406206273e-06, |
|
"loss": 0.2112, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.7655218023874131e-06, |
|
"loss": 0.2621, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.7050050611967872e-06, |
|
"loss": 0.2712, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 1.6454974550965185e-06, |
|
"loss": 0.527, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.5870022664714225e-06, |
|
"loss": 0.3912, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.529522721862291e-06, |
|
"loss": 0.4273, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.473061991787923e-06, |
|
"loss": 0.2741, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4176231905702476e-06, |
|
"loss": 0.4739, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.363209376162542e-06, |
|
"loss": 0.7205, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.309823549980751e-06, |
|
"loss": 0.3439, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.2574686567379324e-06, |
|
"loss": 0.3784, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.2061475842818337e-06, |
|
"loss": 0.3971, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.1558631634356e-06, |
|
"loss": 0.2784, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.1066181678416266e-06, |
|
"loss": 0.3908, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.058415313808565e-06, |
|
"loss": 0.2788, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.0112572601615022e-06, |
|
"loss": 0.2195, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.65146608095293e-07, |
|
"loss": 0.2877, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.200859010310847e-07, |
|
"loss": 0.1981, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.760776244760283e-07, |
|
"loss": 0.2398, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.33124205886171e-07, |
|
"loss": 0.217, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 7.912280145325702e-07, |
|
"loss": 0.3294, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 7.503913613705971e-07, |
|
"loss": 0.3187, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 7.106164989124708e-07, |
|
"loss": 0.2297, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.719056211030128e-07, |
|
"loss": 0.4698, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.342608631986346e-07, |
|
"loss": 0.2209, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 5.976843016495482e-07, |
|
"loss": 0.3496, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5.621779539852435e-07, |
|
"loss": 0.3261, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5.277437787031892e-07, |
|
"loss": 0.2379, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.943836751608211e-07, |
|
"loss": 0.347, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.6209948347075483e-07, |
|
"loss": 0.3664, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.308929843993115e-07, |
|
"loss": 0.2325, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.0076589926826503e-07, |
|
"loss": 0.3251, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.7171988985991835e-07, |
|
"loss": 0.3222, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4375655832542763e-07, |
|
"loss": 0.3088, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.1687744709644197e-07, |
|
"loss": 0.3003, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9108403880000247e-07, |
|
"loss": 0.359, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.663777561767855e-07, |
|
"loss": 0.4534, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.4275996200261e-07, |
|
"loss": 0.3747, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.2023195901327731e-07, |
|
"loss": 0.2838, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.9879498983270685e-07, |
|
"loss": 0.2783, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.7845023690439944e-07, |
|
"loss": 0.4112, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.591988224262053e-07, |
|
"loss": 0.3825, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.4104180828844237e-07, |
|
"loss": 0.496, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.239801960153053e-07, |
|
"loss": 0.4617, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0801492670962976e-07, |
|
"loss": 0.2298, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.314688100098502e-08, |
|
"loss": 0.2499, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.9376878997095e-08, |
|
"loss": 0.5606, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.670568023859902e-08, |
|
"loss": 0.2629, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.5133983657167376e-08, |
|
"loss": 0.4196, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.4662427536936727e-08, |
|
"loss": 0.2699, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.529158947930933e-08, |
|
"loss": 0.3988, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.70219863710941e-08, |
|
"loss": 0.2276, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.9854074355987186e-08, |
|
"loss": 0.243, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.37882488094232e-08, |
|
"loss": 0.3782, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.82484431675712e-09, |
|
"loss": 0.2823, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.9641346548190415e-09, |
|
"loss": 0.3616, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.206332776797382e-09, |
|
"loss": 0.359, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.51590800510482e-10, |
|
"loss": 0.5555, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.5426, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 437, |
|
"total_flos": 4.122039303326254e+20, |
|
"train_loss": 0.46482172025286633, |
|
"train_runtime": 2900.7464, |
|
"train_samples_per_second": 2.413, |
|
"train_steps_per_second": 0.151 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 437, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 4.122039303326254e+20, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|