|
{ |
|
"best_global_step": 648, |
|
"best_metric": 0.00023728572705294937, |
|
"best_model_checkpoint": "Qwen2.5_3B_VL_flip_detection/checkpoint-648", |
|
"epoch": 1.0, |
|
"eval_steps": 72, |
|
"global_step": 725, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001380738695201933, |
|
"grad_norm": 318.0, |
|
"learning_rate": 0.0, |
|
"loss": 6.0874, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002761477390403866, |
|
"grad_norm": 326.0, |
|
"learning_rate": 9.090909090909091e-07, |
|
"loss": 6.1564, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.004142216085605799, |
|
"grad_norm": 312.0, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 6.0274, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.005522954780807732, |
|
"grad_norm": 338.0, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 5.9388, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0069036934760096655, |
|
"grad_norm": 282.0, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"loss": 5.6881, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.008284432171211598, |
|
"grad_norm": 290.0, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 5.5135, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.009665170866413532, |
|
"grad_norm": 219.0, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 4.9453, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.011045909561615464, |
|
"grad_norm": 140.0, |
|
"learning_rate": 6.363636363636364e-06, |
|
"loss": 4.438, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.012426648256817397, |
|
"grad_norm": 125.0, |
|
"learning_rate": 7.272727272727273e-06, |
|
"loss": 4.0221, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.013807386952019331, |
|
"grad_norm": 152.0, |
|
"learning_rate": 8.181818181818183e-06, |
|
"loss": 3.5748, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.015188125647221263, |
|
"grad_norm": 84.0, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 3.0537, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.016568864342423197, |
|
"grad_norm": 72.0, |
|
"learning_rate": 1e-05, |
|
"loss": 2.5683, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01794960303762513, |
|
"grad_norm": 62.0, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 2.1136, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.019330341732827064, |
|
"grad_norm": 63.75, |
|
"learning_rate": 1.181818181818182e-05, |
|
"loss": 1.8072, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.020711080428028994, |
|
"grad_norm": 66.0, |
|
"learning_rate": 1.2727272727272728e-05, |
|
"loss": 1.4696, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.022091819123230928, |
|
"grad_norm": 49.25, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 1.1919, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02347255781843286, |
|
"grad_norm": 53.75, |
|
"learning_rate": 1.4545454545454546e-05, |
|
"loss": 0.9495, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.024853296513634795, |
|
"grad_norm": 46.5, |
|
"learning_rate": 1.5454545454545454e-05, |
|
"loss": 0.8079, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02623403520883673, |
|
"grad_norm": 58.75, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 0.6889, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.027614773904038662, |
|
"grad_norm": 31.625, |
|
"learning_rate": 1.7272727272727274e-05, |
|
"loss": 0.5888, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.028995512599240592, |
|
"grad_norm": 61.75, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.5087, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.030376251294442526, |
|
"grad_norm": 21.625, |
|
"learning_rate": 1.9090909090909094e-05, |
|
"loss": 0.4641, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03175698998964446, |
|
"grad_norm": 37.25, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4248, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.03313772868484639, |
|
"grad_norm": 19.25, |
|
"learning_rate": 1.997155049786629e-05, |
|
"loss": 0.3976, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03451846738004832, |
|
"grad_norm": 54.25, |
|
"learning_rate": 1.9943100995732575e-05, |
|
"loss": 0.3755, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03589920607525026, |
|
"grad_norm": 37.75, |
|
"learning_rate": 1.9914651493598865e-05, |
|
"loss": 0.3674, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03727994477045219, |
|
"grad_norm": 12.3125, |
|
"learning_rate": 1.9886201991465152e-05, |
|
"loss": 0.3343, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.03866068346565413, |
|
"grad_norm": 23.875, |
|
"learning_rate": 1.985775248933144e-05, |
|
"loss": 0.3108, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04004142216085606, |
|
"grad_norm": 16.0, |
|
"learning_rate": 1.9829302987197725e-05, |
|
"loss": 0.3134, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.04142216085605799, |
|
"grad_norm": 140.0, |
|
"learning_rate": 1.9800853485064012e-05, |
|
"loss": 0.2831, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.042802899551259925, |
|
"grad_norm": 12.875, |
|
"learning_rate": 1.97724039829303e-05, |
|
"loss": 0.2804, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.044183638246461855, |
|
"grad_norm": 70.0, |
|
"learning_rate": 1.974395448079659e-05, |
|
"loss": 0.2625, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04556437694166379, |
|
"grad_norm": 20.625, |
|
"learning_rate": 1.9715504978662876e-05, |
|
"loss": 0.2517, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04694511563686572, |
|
"grad_norm": 33.25, |
|
"learning_rate": 1.9687055476529162e-05, |
|
"loss": 0.2638, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.04832585433206766, |
|
"grad_norm": 7.1875, |
|
"learning_rate": 1.965860597439545e-05, |
|
"loss": 0.2471, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.04970659302726959, |
|
"grad_norm": 4.53125, |
|
"learning_rate": 1.9630156472261736e-05, |
|
"loss": 0.228, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.05108733172247152, |
|
"grad_norm": 3.828125, |
|
"learning_rate": 1.9601706970128026e-05, |
|
"loss": 0.2205, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.05246807041767346, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 1.957325746799431e-05, |
|
"loss": 0.221, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.05384880911287539, |
|
"grad_norm": 22.25, |
|
"learning_rate": 1.95448079658606e-05, |
|
"loss": 0.2166, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.055229547808077324, |
|
"grad_norm": 3.203125, |
|
"learning_rate": 1.9516358463726886e-05, |
|
"loss": 0.206, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.056610286503279254, |
|
"grad_norm": 10.5, |
|
"learning_rate": 1.9487908961593173e-05, |
|
"loss": 0.2014, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.057991025198481184, |
|
"grad_norm": 3.265625, |
|
"learning_rate": 1.9459459459459463e-05, |
|
"loss": 0.1947, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.05937176389368312, |
|
"grad_norm": 8.875, |
|
"learning_rate": 1.943100995732575e-05, |
|
"loss": 0.1937, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.06075250258888505, |
|
"grad_norm": 4.375, |
|
"learning_rate": 1.9402560455192037e-05, |
|
"loss": 0.1874, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.06213324128408699, |
|
"grad_norm": 3.625, |
|
"learning_rate": 1.9374110953058323e-05, |
|
"loss": 0.1797, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.06351397997928893, |
|
"grad_norm": 3.359375, |
|
"learning_rate": 1.934566145092461e-05, |
|
"loss": 0.177, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.06489471867449086, |
|
"grad_norm": 4.0, |
|
"learning_rate": 1.9317211948790897e-05, |
|
"loss": 0.1697, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.06627545736969279, |
|
"grad_norm": 3.359375, |
|
"learning_rate": 1.9288762446657187e-05, |
|
"loss": 0.163, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.06765619606489472, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 1.926031294452347e-05, |
|
"loss": 0.1563, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.06903693476009665, |
|
"grad_norm": 3.8125, |
|
"learning_rate": 1.923186344238976e-05, |
|
"loss": 0.1531, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.07041767345529859, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 1.9203413940256047e-05, |
|
"loss": 0.1461, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.07179841215050052, |
|
"grad_norm": 3.265625, |
|
"learning_rate": 1.9174964438122334e-05, |
|
"loss": 0.1408, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.07317915084570245, |
|
"grad_norm": 6.21875, |
|
"learning_rate": 1.9146514935988624e-05, |
|
"loss": 0.1403, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.07455988954090438, |
|
"grad_norm": 3.15625, |
|
"learning_rate": 1.9118065433854907e-05, |
|
"loss": 0.1285, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.07594062823610631, |
|
"grad_norm": 3.125, |
|
"learning_rate": 1.9089615931721197e-05, |
|
"loss": 0.1205, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.07732136693130826, |
|
"grad_norm": 3.3125, |
|
"learning_rate": 1.9061166429587484e-05, |
|
"loss": 0.1147, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.07870210562651019, |
|
"grad_norm": 3.0625, |
|
"learning_rate": 1.903271692745377e-05, |
|
"loss": 0.1054, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.08008284432171212, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 1.9004267425320058e-05, |
|
"loss": 0.1014, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.08146358301691405, |
|
"grad_norm": 3.3125, |
|
"learning_rate": 1.8975817923186348e-05, |
|
"loss": 0.0919, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.08284432171211598, |
|
"grad_norm": 3.078125, |
|
"learning_rate": 1.894736842105263e-05, |
|
"loss": 0.0857, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.08422506040731792, |
|
"grad_norm": 2.921875, |
|
"learning_rate": 1.891891891891892e-05, |
|
"loss": 0.0781, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.08560579910251985, |
|
"grad_norm": 2.890625, |
|
"learning_rate": 1.8890469416785208e-05, |
|
"loss": 0.0702, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.08698653779772178, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 1.8862019914651495e-05, |
|
"loss": 0.0619, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.08836727649292371, |
|
"grad_norm": 2.53125, |
|
"learning_rate": 1.8833570412517785e-05, |
|
"loss": 0.055, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.08974801518812564, |
|
"grad_norm": 2.46875, |
|
"learning_rate": 1.8805120910384068e-05, |
|
"loss": 0.049, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.09112875388332758, |
|
"grad_norm": 2.53125, |
|
"learning_rate": 1.8776671408250358e-05, |
|
"loss": 0.0431, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.09250949257852951, |
|
"grad_norm": 2.734375, |
|
"learning_rate": 1.8748221906116645e-05, |
|
"loss": 0.0366, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.09389023127373144, |
|
"grad_norm": 4.0625, |
|
"learning_rate": 1.871977240398293e-05, |
|
"loss": 0.0433, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.09527096996893337, |
|
"grad_norm": 1.6875, |
|
"learning_rate": 1.869132290184922e-05, |
|
"loss": 0.0275, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.09665170866413532, |
|
"grad_norm": 1.7109375, |
|
"learning_rate": 1.8662873399715505e-05, |
|
"loss": 0.0239, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09803244735933725, |
|
"grad_norm": 1.53125, |
|
"learning_rate": 1.8634423897581795e-05, |
|
"loss": 0.0238, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.09941318605453918, |
|
"grad_norm": 2.984375, |
|
"learning_rate": 1.8605974395448082e-05, |
|
"loss": 0.019, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.09941318605453918, |
|
"eval_loss": 0.016371596604585648, |
|
"eval_runtime": 592.2167, |
|
"eval_samples_per_second": 2.175, |
|
"eval_steps_per_second": 2.175, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.10079392474974111, |
|
"grad_norm": 1.515625, |
|
"learning_rate": 1.857752489331437e-05, |
|
"loss": 0.0154, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.10217466344494304, |
|
"grad_norm": 0.9296875, |
|
"learning_rate": 1.8549075391180655e-05, |
|
"loss": 0.013, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.10355540214014498, |
|
"grad_norm": 0.91796875, |
|
"learning_rate": 1.8520625889046942e-05, |
|
"loss": 0.0154, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.10493614083534691, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 1.849217638691323e-05, |
|
"loss": 0.0101, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.10631687953054884, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 1.846372688477952e-05, |
|
"loss": 0.0098, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.10769761822575077, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 1.8435277382645806e-05, |
|
"loss": 0.0114, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1090783569209527, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 1.8406827880512092e-05, |
|
"loss": 0.0065, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.11045909561615465, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 1.8378378378378383e-05, |
|
"loss": 0.0067, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.11183983431135658, |
|
"grad_norm": 1.0859375, |
|
"learning_rate": 1.8349928876244666e-05, |
|
"loss": 0.0109, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.11322057300655851, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 1.8321479374110956e-05, |
|
"loss": 0.0045, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.11460131170176044, |
|
"grad_norm": 0.6875, |
|
"learning_rate": 1.8293029871977243e-05, |
|
"loss": 0.0065, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.11598205039696237, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 1.826458036984353e-05, |
|
"loss": 0.0036, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.11736278909216431, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.8236130867709816e-05, |
|
"loss": 0.0029, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.11874352778736624, |
|
"grad_norm": 0.22265625, |
|
"learning_rate": 1.8207681365576103e-05, |
|
"loss": 0.0025, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.12012426648256817, |
|
"grad_norm": 0.208984375, |
|
"learning_rate": 1.817923186344239e-05, |
|
"loss": 0.0023, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.1215050051777701, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 1.815078236130868e-05, |
|
"loss": 0.0021, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.12288574387297203, |
|
"grad_norm": 0.1748046875, |
|
"learning_rate": 1.8122332859174966e-05, |
|
"loss": 0.0019, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.12426648256817398, |
|
"grad_norm": 0.150390625, |
|
"learning_rate": 1.8093883357041253e-05, |
|
"loss": 0.0016, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1256472212633759, |
|
"grad_norm": 0.134765625, |
|
"learning_rate": 1.806543385490754e-05, |
|
"loss": 0.0014, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.12702795995857785, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 1.8036984352773827e-05, |
|
"loss": 0.0027, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.12840869865377977, |
|
"grad_norm": 0.1162109375, |
|
"learning_rate": 1.8008534850640117e-05, |
|
"loss": 0.0012, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.1297894373489817, |
|
"grad_norm": 0.107421875, |
|
"learning_rate": 1.7980085348506404e-05, |
|
"loss": 0.0011, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.13117017604418363, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 1.795163584637269e-05, |
|
"loss": 0.0025, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.13255091473938557, |
|
"grad_norm": 0.0869140625, |
|
"learning_rate": 1.7923186344238977e-05, |
|
"loss": 0.0009, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.13393165343458752, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 1.7894736842105264e-05, |
|
"loss": 0.001, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.13531239212978943, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 1.7866287339971554e-05, |
|
"loss": 0.0008, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.13669313082499138, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 1.783783783783784e-05, |
|
"loss": 0.0015, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.1380738695201933, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 1.7809388335704127e-05, |
|
"loss": 0.001, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.13945460821539524, |
|
"grad_norm": 0.06640625, |
|
"learning_rate": 1.7780938833570414e-05, |
|
"loss": 0.0007, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.14083534691059718, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 1.77524893314367e-05, |
|
"loss": 0.0006, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.1422160856057991, |
|
"grad_norm": 0.058837890625, |
|
"learning_rate": 1.7724039829302988e-05, |
|
"loss": 0.0006, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.14359682430100104, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 1.7695590327169278e-05, |
|
"loss": 0.0005, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.14497756299620296, |
|
"grad_norm": 0.054931640625, |
|
"learning_rate": 1.766714082503556e-05, |
|
"loss": 0.0005, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.1463583016914049, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 1.763869132290185e-05, |
|
"loss": 0.0005, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.14773904038660685, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 1.7610241820768138e-05, |
|
"loss": 0.0005, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.14911977908180876, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 1.7581792318634425e-05, |
|
"loss": 0.001, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.1505005177770107, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 1.7553342816500715e-05, |
|
"loss": 0.0005, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.15188125647221262, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 1.7524893314367e-05, |
|
"loss": 0.0005, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.15326199516741457, |
|
"grad_norm": 0.26953125, |
|
"learning_rate": 1.7496443812233288e-05, |
|
"loss": 0.0005, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.1546427338626165, |
|
"grad_norm": 2.5625, |
|
"learning_rate": 1.7467994310099575e-05, |
|
"loss": 0.0025, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.15602347255781843, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 1.743954480796586e-05, |
|
"loss": 0.0029, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.15740421125302037, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 1.741109530583215e-05, |
|
"loss": 0.0007, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1587849499482223, |
|
"grad_norm": 0.044189453125, |
|
"learning_rate": 1.738264580369844e-05, |
|
"loss": 0.0004, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.16016568864342423, |
|
"grad_norm": 0.04443359375, |
|
"learning_rate": 1.7354196301564722e-05, |
|
"loss": 0.0004, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.16154642733862618, |
|
"grad_norm": 0.045654296875, |
|
"learning_rate": 1.7325746799431012e-05, |
|
"loss": 0.0004, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.1629271660338281, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 1.72972972972973e-05, |
|
"loss": 0.0005, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.16430790472903004, |
|
"grad_norm": 0.046875, |
|
"learning_rate": 1.7268847795163585e-05, |
|
"loss": 0.0004, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.16568864342423195, |
|
"grad_norm": 0.0439453125, |
|
"learning_rate": 1.7240398293029875e-05, |
|
"loss": 0.0004, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1670693821194339, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 1.721194879089616e-05, |
|
"loss": 0.0056, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.16845012081463584, |
|
"grad_norm": 0.07275390625, |
|
"learning_rate": 1.718349928876245e-05, |
|
"loss": 0.0005, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.16983085950983776, |
|
"grad_norm": 0.042236328125, |
|
"learning_rate": 1.7155049786628736e-05, |
|
"loss": 0.0004, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.1712115982050397, |
|
"grad_norm": 0.10107421875, |
|
"learning_rate": 1.7126600284495022e-05, |
|
"loss": 0.0005, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.17259233690024162, |
|
"grad_norm": 0.040283203125, |
|
"learning_rate": 1.709815078236131e-05, |
|
"loss": 0.0004, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.17397307559544356, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 1.7069701280227596e-05, |
|
"loss": 0.0034, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.1753538142906455, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 1.7041251778093886e-05, |
|
"loss": 0.0008, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.17673455298584742, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 1.7012802275960173e-05, |
|
"loss": 0.0019, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.17811529168104936, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.698435277382646e-05, |
|
"loss": 0.0018, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.17949603037625128, |
|
"grad_norm": 0.10498046875, |
|
"learning_rate": 1.6955903271692746e-05, |
|
"loss": 0.0006, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.18087676907145323, |
|
"grad_norm": 0.1474609375, |
|
"learning_rate": 1.6927453769559036e-05, |
|
"loss": 0.001, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.18225750776665517, |
|
"grad_norm": 0.12060546875, |
|
"learning_rate": 1.689900426742532e-05, |
|
"loss": 0.0011, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.18363824646185709, |
|
"grad_norm": 0.034912109375, |
|
"learning_rate": 1.687055476529161e-05, |
|
"loss": 0.0003, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.18501898515705903, |
|
"grad_norm": 0.03515625, |
|
"learning_rate": 1.6842105263157896e-05, |
|
"loss": 0.0003, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.18639972385226097, |
|
"grad_norm": 0.08544921875, |
|
"learning_rate": 1.6813655761024183e-05, |
|
"loss": 0.0007, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1877804625474629, |
|
"grad_norm": 0.03466796875, |
|
"learning_rate": 1.6785206258890473e-05, |
|
"loss": 0.0003, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.18916120124266483, |
|
"grad_norm": 0.033203125, |
|
"learning_rate": 1.6756756756756757e-05, |
|
"loss": 0.0003, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.19054193993786675, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 1.6728307254623047e-05, |
|
"loss": 0.0004, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.1919226786330687, |
|
"grad_norm": 0.0703125, |
|
"learning_rate": 1.6699857752489334e-05, |
|
"loss": 0.0004, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.19330341732827064, |
|
"grad_norm": 0.03955078125, |
|
"learning_rate": 1.667140825035562e-05, |
|
"loss": 0.0004, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.19468415602347255, |
|
"grad_norm": 0.138671875, |
|
"learning_rate": 1.6642958748221907e-05, |
|
"loss": 0.0004, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1960648947186745, |
|
"grad_norm": 1.015625, |
|
"learning_rate": 1.6614509246088194e-05, |
|
"loss": 0.0013, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.19744563341387641, |
|
"grad_norm": 0.03271484375, |
|
"learning_rate": 1.658605974395448e-05, |
|
"loss": 0.0003, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.19882637210907836, |
|
"grad_norm": 0.03369140625, |
|
"learning_rate": 1.655761024182077e-05, |
|
"loss": 0.0003, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.19882637210907836, |
|
"eval_loss": 0.0005994443781673908, |
|
"eval_runtime": 594.9971, |
|
"eval_samples_per_second": 2.165, |
|
"eval_steps_per_second": 2.165, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2002071108042803, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 1.6529160739687057e-05, |
|
"loss": 0.0005, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.20158784949948222, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 1.6500711237553344e-05, |
|
"loss": 0.0005, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.20296858819468416, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 1.647226173541963e-05, |
|
"loss": 0.0038, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.20434932688988608, |
|
"grad_norm": 0.09228515625, |
|
"learning_rate": 1.6443812233285917e-05, |
|
"loss": 0.0005, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.20573006558508802, |
|
"grad_norm": 0.0380859375, |
|
"learning_rate": 1.6415362731152208e-05, |
|
"loss": 0.0003, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.20711080428028997, |
|
"grad_norm": 0.044189453125, |
|
"learning_rate": 1.6386913229018494e-05, |
|
"loss": 0.0005, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.20849154297549188, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 1.635846372688478e-05, |
|
"loss": 0.0018, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.20987228167069383, |
|
"grad_norm": 0.2275390625, |
|
"learning_rate": 1.6330014224751068e-05, |
|
"loss": 0.0006, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.21125302036589574, |
|
"grad_norm": 0.08349609375, |
|
"learning_rate": 1.6301564722617355e-05, |
|
"loss": 0.0005, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.2126337590610977, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 1.627311522048364e-05, |
|
"loss": 0.0007, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.21401449775629963, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 1.624466571834993e-05, |
|
"loss": 0.0041, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.21539523645150155, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.6216216216216218e-05, |
|
"loss": 0.0024, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.2167759751467035, |
|
"grad_norm": 2.28125, |
|
"learning_rate": 1.6187766714082505e-05, |
|
"loss": 0.0014, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.2181567138419054, |
|
"grad_norm": 0.09716796875, |
|
"learning_rate": 1.615931721194879e-05, |
|
"loss": 0.0005, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.21953745253710735, |
|
"grad_norm": 0.2431640625, |
|
"learning_rate": 1.6130867709815078e-05, |
|
"loss": 0.001, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.2209181912323093, |
|
"grad_norm": 0.047119140625, |
|
"learning_rate": 1.610241820768137e-05, |
|
"loss": 0.0004, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2222989299275112, |
|
"grad_norm": 0.0303955078125, |
|
"learning_rate": 1.6073968705547652e-05, |
|
"loss": 0.0003, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.22367966862271316, |
|
"grad_norm": 0.07470703125, |
|
"learning_rate": 1.6045519203413942e-05, |
|
"loss": 0.0004, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.22506040731791507, |
|
"grad_norm": 0.042724609375, |
|
"learning_rate": 1.601706970128023e-05, |
|
"loss": 0.0003, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.22644114601311702, |
|
"grad_norm": 0.02880859375, |
|
"learning_rate": 1.5988620199146515e-05, |
|
"loss": 0.0003, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.22782188470831896, |
|
"grad_norm": 0.029541015625, |
|
"learning_rate": 1.5960170697012805e-05, |
|
"loss": 0.0003, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.22920262340352088, |
|
"grad_norm": 0.028076171875, |
|
"learning_rate": 1.5931721194879092e-05, |
|
"loss": 0.0003, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.23058336209872282, |
|
"grad_norm": 0.0291748046875, |
|
"learning_rate": 1.590327169274538e-05, |
|
"loss": 0.0003, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.23196410079392474, |
|
"grad_norm": 0.0284423828125, |
|
"learning_rate": 1.5874822190611666e-05, |
|
"loss": 0.0003, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.23334483948912668, |
|
"grad_norm": 0.0279541015625, |
|
"learning_rate": 1.5846372688477952e-05, |
|
"loss": 0.0003, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.23472557818432863, |
|
"grad_norm": 0.0277099609375, |
|
"learning_rate": 1.581792318634424e-05, |
|
"loss": 0.0003, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.23610631687953054, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 1.578947368421053e-05, |
|
"loss": 0.0036, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.2374870555747325, |
|
"grad_norm": 0.0274658203125, |
|
"learning_rate": 1.5761024182076813e-05, |
|
"loss": 0.0003, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.2388677942699344, |
|
"grad_norm": 1.0625, |
|
"learning_rate": 1.5732574679943103e-05, |
|
"loss": 0.0027, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.24024853296513635, |
|
"grad_norm": 0.0274658203125, |
|
"learning_rate": 1.570412517780939e-05, |
|
"loss": 0.0003, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.2416292716603383, |
|
"grad_norm": 0.0277099609375, |
|
"learning_rate": 1.5675675675675676e-05, |
|
"loss": 0.0003, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2430100103555402, |
|
"grad_norm": 0.0262451171875, |
|
"learning_rate": 1.5647226173541966e-05, |
|
"loss": 0.0003, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.24439074905074215, |
|
"grad_norm": 0.0264892578125, |
|
"learning_rate": 1.561877667140825e-05, |
|
"loss": 0.0003, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.24577148774594407, |
|
"grad_norm": 0.0274658203125, |
|
"learning_rate": 1.559032716927454e-05, |
|
"loss": 0.0003, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.247152226441146, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 1.5561877667140826e-05, |
|
"loss": 0.001, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.24853296513634796, |
|
"grad_norm": 0.034423828125, |
|
"learning_rate": 1.5533428165007113e-05, |
|
"loss": 0.0003, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.24991370383154987, |
|
"grad_norm": 0.038330078125, |
|
"learning_rate": 1.55049786628734e-05, |
|
"loss": 0.0003, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.2512944425267518, |
|
"grad_norm": 0.0262451171875, |
|
"learning_rate": 1.547652916073969e-05, |
|
"loss": 0.0003, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.25267518122195376, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 1.5448079658605977e-05, |
|
"loss": 0.0017, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.2540559199171557, |
|
"grad_norm": 0.025634765625, |
|
"learning_rate": 1.5419630156472263e-05, |
|
"loss": 0.0002, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.2554366586123576, |
|
"grad_norm": 0.02685546875, |
|
"learning_rate": 1.539118065433855e-05, |
|
"loss": 0.0003, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.25681739730755954, |
|
"grad_norm": 3.921875, |
|
"learning_rate": 1.5362731152204837e-05, |
|
"loss": 0.0042, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.2581981360027615, |
|
"grad_norm": 0.81640625, |
|
"learning_rate": 1.5334281650071127e-05, |
|
"loss": 0.0012, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.2595788746979634, |
|
"grad_norm": 0.0390625, |
|
"learning_rate": 1.530583214793741e-05, |
|
"loss": 0.0004, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.26095961339316537, |
|
"grad_norm": 0.0262451171875, |
|
"learning_rate": 1.52773826458037e-05, |
|
"loss": 0.0003, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.26234035208836726, |
|
"grad_norm": 0.0250244140625, |
|
"learning_rate": 1.5248933143669986e-05, |
|
"loss": 0.0002, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2637210907835692, |
|
"grad_norm": 0.1689453125, |
|
"learning_rate": 1.5220483641536274e-05, |
|
"loss": 0.0004, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.26510182947877114, |
|
"grad_norm": 0.0294189453125, |
|
"learning_rate": 1.5192034139402562e-05, |
|
"loss": 0.0003, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.2664825681739731, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 1.5163584637268849e-05, |
|
"loss": 0.0013, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.26786330686917503, |
|
"grad_norm": 0.0260009765625, |
|
"learning_rate": 1.5135135135135138e-05, |
|
"loss": 0.0002, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.2692440455643769, |
|
"grad_norm": 0.02685546875, |
|
"learning_rate": 1.5106685633001423e-05, |
|
"loss": 0.0003, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.27062478425957887, |
|
"grad_norm": 0.025634765625, |
|
"learning_rate": 1.5078236130867711e-05, |
|
"loss": 0.0002, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.2720055229547808, |
|
"grad_norm": 0.0244140625, |
|
"learning_rate": 1.5049786628733998e-05, |
|
"loss": 0.0002, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.27338626164998275, |
|
"grad_norm": 0.029296875, |
|
"learning_rate": 1.5021337126600286e-05, |
|
"loss": 0.0003, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.2747670003451847, |
|
"grad_norm": 0.02880859375, |
|
"learning_rate": 1.4992887624466573e-05, |
|
"loss": 0.0003, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.2761477390403866, |
|
"grad_norm": 0.024658203125, |
|
"learning_rate": 1.4964438122332861e-05, |
|
"loss": 0.0002, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27752847773558853, |
|
"grad_norm": 0.0732421875, |
|
"learning_rate": 1.4935988620199146e-05, |
|
"loss": 0.0004, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.2789092164307905, |
|
"grad_norm": 0.03466796875, |
|
"learning_rate": 1.4907539118065435e-05, |
|
"loss": 0.0003, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.2802899551259924, |
|
"grad_norm": 0.0235595703125, |
|
"learning_rate": 1.4879089615931723e-05, |
|
"loss": 0.0002, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.28167069382119436, |
|
"grad_norm": 0.0257568359375, |
|
"learning_rate": 1.485064011379801e-05, |
|
"loss": 0.0002, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.28305143251639625, |
|
"grad_norm": 0.0263671875, |
|
"learning_rate": 1.4822190611664298e-05, |
|
"loss": 0.0002, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.2844321712115982, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 1.4793741109530583e-05, |
|
"loss": 0.0004, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.28581290990680014, |
|
"grad_norm": 0.0252685546875, |
|
"learning_rate": 1.4765291607396872e-05, |
|
"loss": 0.0002, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.2871936486020021, |
|
"grad_norm": 0.703125, |
|
"learning_rate": 1.4736842105263159e-05, |
|
"loss": 0.0015, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.288574387297204, |
|
"grad_norm": 0.0233154296875, |
|
"learning_rate": 1.4708392603129447e-05, |
|
"loss": 0.0002, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.2899551259924059, |
|
"grad_norm": 0.0255126953125, |
|
"learning_rate": 1.4679943100995732e-05, |
|
"loss": 0.0003, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.29133586468760786, |
|
"grad_norm": 0.0233154296875, |
|
"learning_rate": 1.465149359886202e-05, |
|
"loss": 0.0002, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.2927166033828098, |
|
"grad_norm": 0.023193359375, |
|
"learning_rate": 1.4623044096728309e-05, |
|
"loss": 0.0002, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.29409734207801175, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 1.4594594594594596e-05, |
|
"loss": 0.0004, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.2954780807732137, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 1.4566145092460884e-05, |
|
"loss": 0.0003, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.2968588194684156, |
|
"grad_norm": 0.02392578125, |
|
"learning_rate": 1.453769559032717e-05, |
|
"loss": 0.0002, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.2982395581636175, |
|
"grad_norm": 0.0234375, |
|
"learning_rate": 1.4509246088193457e-05, |
|
"loss": 0.0002, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.2982395581636175, |
|
"eval_loss": 0.000599406601395458, |
|
"eval_runtime": 586.817, |
|
"eval_samples_per_second": 2.195, |
|
"eval_steps_per_second": 2.195, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.29962029685881947, |
|
"grad_norm": 0.024658203125, |
|
"learning_rate": 1.4480796586059744e-05, |
|
"loss": 0.0002, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.3010010355540214, |
|
"grad_norm": 0.16796875, |
|
"learning_rate": 1.4452347083926033e-05, |
|
"loss": 0.0028, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.30238177424922336, |
|
"grad_norm": 0.0230712890625, |
|
"learning_rate": 1.442389758179232e-05, |
|
"loss": 0.0002, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.30376251294442524, |
|
"grad_norm": 0.024658203125, |
|
"learning_rate": 1.4395448079658608e-05, |
|
"loss": 0.0002, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3051432516396272, |
|
"grad_norm": 0.0238037109375, |
|
"learning_rate": 1.4366998577524896e-05, |
|
"loss": 0.0002, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.30652399033482913, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 1.4338549075391181e-05, |
|
"loss": 0.0009, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.3079047290300311, |
|
"grad_norm": 0.023193359375, |
|
"learning_rate": 1.431009957325747e-05, |
|
"loss": 0.0002, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.309285467725233, |
|
"grad_norm": 0.023193359375, |
|
"learning_rate": 1.4281650071123756e-05, |
|
"loss": 0.0002, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.3106662064204349, |
|
"grad_norm": 0.0223388671875, |
|
"learning_rate": 1.4253200568990045e-05, |
|
"loss": 0.0002, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.31204694511563685, |
|
"grad_norm": 0.022216796875, |
|
"learning_rate": 1.422475106685633e-05, |
|
"loss": 0.0002, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.3134276838108388, |
|
"grad_norm": 0.02294921875, |
|
"learning_rate": 1.4196301564722618e-05, |
|
"loss": 0.0002, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.31480842250604074, |
|
"grad_norm": 0.0233154296875, |
|
"learning_rate": 1.4167852062588905e-05, |
|
"loss": 0.0002, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.3161891612012427, |
|
"grad_norm": 0.0238037109375, |
|
"learning_rate": 1.4139402560455193e-05, |
|
"loss": 0.0002, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.3175698998964446, |
|
"grad_norm": 1.25, |
|
"learning_rate": 1.4110953058321482e-05, |
|
"loss": 0.012, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.3189506385916465, |
|
"grad_norm": 0.0224609375, |
|
"learning_rate": 1.4082503556187767e-05, |
|
"loss": 0.0002, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.32033137728684846, |
|
"grad_norm": 0.0218505859375, |
|
"learning_rate": 1.4054054054054055e-05, |
|
"loss": 0.0002, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.3217121159820504, |
|
"grad_norm": 0.06005859375, |
|
"learning_rate": 1.4025604551920342e-05, |
|
"loss": 0.0004, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.32309285467725235, |
|
"grad_norm": 0.042236328125, |
|
"learning_rate": 1.399715504978663e-05, |
|
"loss": 0.0004, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.32447359337245424, |
|
"grad_norm": 0.031494140625, |
|
"learning_rate": 1.3968705547652917e-05, |
|
"loss": 0.0003, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.3258543320676562, |
|
"grad_norm": 0.0264892578125, |
|
"learning_rate": 1.3940256045519206e-05, |
|
"loss": 0.0003, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.3272350707628581, |
|
"grad_norm": 0.1318359375, |
|
"learning_rate": 1.391180654338549e-05, |
|
"loss": 0.0007, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.32861580945806007, |
|
"grad_norm": 0.034423828125, |
|
"learning_rate": 1.3883357041251779e-05, |
|
"loss": 0.0003, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.329996548153262, |
|
"grad_norm": 0.036865234375, |
|
"learning_rate": 1.3854907539118068e-05, |
|
"loss": 0.0003, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.3313772868484639, |
|
"grad_norm": 0.039794921875, |
|
"learning_rate": 1.3826458036984354e-05, |
|
"loss": 0.0003, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.33275802554366585, |
|
"grad_norm": 0.0390625, |
|
"learning_rate": 1.3798008534850643e-05, |
|
"loss": 0.0003, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.3341387642388678, |
|
"grad_norm": 0.03759765625, |
|
"learning_rate": 1.3769559032716928e-05, |
|
"loss": 0.0003, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.33551950293406974, |
|
"grad_norm": 0.045654296875, |
|
"learning_rate": 1.3741109530583216e-05, |
|
"loss": 0.0004, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.3369002416292717, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 1.3712660028449503e-05, |
|
"loss": 0.0004, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.33828098032447357, |
|
"grad_norm": 0.0260009765625, |
|
"learning_rate": 1.3684210526315791e-05, |
|
"loss": 0.0003, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.3396617190196755, |
|
"grad_norm": 0.051513671875, |
|
"learning_rate": 1.3655761024182076e-05, |
|
"loss": 0.0004, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.34104245771487746, |
|
"grad_norm": 0.023193359375, |
|
"learning_rate": 1.3627311522048365e-05, |
|
"loss": 0.0002, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.3424231964100794, |
|
"grad_norm": 0.062255859375, |
|
"learning_rate": 1.3598862019914653e-05, |
|
"loss": 0.0003, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.34380393510528134, |
|
"grad_norm": 0.0250244140625, |
|
"learning_rate": 1.357041251778094e-05, |
|
"loss": 0.0003, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.34518467380048323, |
|
"grad_norm": 0.0235595703125, |
|
"learning_rate": 1.3541963015647228e-05, |
|
"loss": 0.0002, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3465654124956852, |
|
"grad_norm": 0.021728515625, |
|
"learning_rate": 1.3513513513513515e-05, |
|
"loss": 0.0002, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.3479461511908871, |
|
"grad_norm": 1.6015625, |
|
"learning_rate": 1.3485064011379802e-05, |
|
"loss": 0.0012, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.34932688988608906, |
|
"grad_norm": 0.0224609375, |
|
"learning_rate": 1.3456614509246089e-05, |
|
"loss": 0.0002, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.350707628581291, |
|
"grad_norm": 0.034912109375, |
|
"learning_rate": 1.3428165007112377e-05, |
|
"loss": 0.0002, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.3520883672764929, |
|
"grad_norm": 0.034423828125, |
|
"learning_rate": 1.3399715504978664e-05, |
|
"loss": 0.0003, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.35346910597169484, |
|
"grad_norm": 0.0206298828125, |
|
"learning_rate": 1.3371266002844952e-05, |
|
"loss": 0.0002, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.3548498446668968, |
|
"grad_norm": 0.0247802734375, |
|
"learning_rate": 1.3342816500711237e-05, |
|
"loss": 0.0002, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.35623058336209873, |
|
"grad_norm": 0.023193359375, |
|
"learning_rate": 1.3314366998577526e-05, |
|
"loss": 0.0002, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.3576113220573007, |
|
"grad_norm": 0.0208740234375, |
|
"learning_rate": 1.3285917496443814e-05, |
|
"loss": 0.0002, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.35899206075250256, |
|
"grad_norm": 0.02099609375, |
|
"learning_rate": 1.32574679943101e-05, |
|
"loss": 0.0002, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3603727994477045, |
|
"grad_norm": 0.70703125, |
|
"learning_rate": 1.322901849217639e-05, |
|
"loss": 0.0005, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.36175353814290645, |
|
"grad_norm": 0.02001953125, |
|
"learning_rate": 1.3200568990042674e-05, |
|
"loss": 0.0002, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.3631342768381084, |
|
"grad_norm": 0.021728515625, |
|
"learning_rate": 1.3172119487908963e-05, |
|
"loss": 0.0002, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.36451501553331034, |
|
"grad_norm": 0.0203857421875, |
|
"learning_rate": 1.314366998577525e-05, |
|
"loss": 0.0002, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.3658957542285122, |
|
"grad_norm": 0.03564453125, |
|
"learning_rate": 1.3115220483641538e-05, |
|
"loss": 0.0003, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.36727649292371417, |
|
"grad_norm": 0.020263671875, |
|
"learning_rate": 1.3086770981507825e-05, |
|
"loss": 0.0002, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.3686572316189161, |
|
"grad_norm": 0.0201416015625, |
|
"learning_rate": 1.3058321479374111e-05, |
|
"loss": 0.0002, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.37003797031411806, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 1.30298719772404e-05, |
|
"loss": 0.0002, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.37141870900932, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 1.3001422475106686e-05, |
|
"loss": 0.0006, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.37279944770452195, |
|
"grad_norm": 0.02001953125, |
|
"learning_rate": 1.2972972972972975e-05, |
|
"loss": 0.0002, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.37418018639972384, |
|
"grad_norm": 0.0264892578125, |
|
"learning_rate": 1.2944523470839262e-05, |
|
"loss": 0.0002, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.3755609250949258, |
|
"grad_norm": 0.019775390625, |
|
"learning_rate": 1.291607396870555e-05, |
|
"loss": 0.0002, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.3769416637901277, |
|
"grad_norm": 0.019775390625, |
|
"learning_rate": 1.2887624466571835e-05, |
|
"loss": 0.0002, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.37832240248532967, |
|
"grad_norm": 0.019287109375, |
|
"learning_rate": 1.2859174964438123e-05, |
|
"loss": 0.0002, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.3797031411805316, |
|
"grad_norm": 0.02978515625, |
|
"learning_rate": 1.283072546230441e-05, |
|
"loss": 0.0002, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.3810838798757335, |
|
"grad_norm": 0.0189208984375, |
|
"learning_rate": 1.2802275960170699e-05, |
|
"loss": 0.0002, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.38246461857093544, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 1.2773826458036987e-05, |
|
"loss": 0.0008, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.3838453572661374, |
|
"grad_norm": 0.018798828125, |
|
"learning_rate": 1.2745376955903272e-05, |
|
"loss": 0.0002, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.38522609596133933, |
|
"grad_norm": 0.019775390625, |
|
"learning_rate": 1.271692745376956e-05, |
|
"loss": 0.0002, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.3866068346565413, |
|
"grad_norm": 0.267578125, |
|
"learning_rate": 1.2688477951635847e-05, |
|
"loss": 0.0031, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.38798757335174316, |
|
"grad_norm": 0.0224609375, |
|
"learning_rate": 1.2660028449502136e-05, |
|
"loss": 0.0002, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.3893683120469451, |
|
"grad_norm": 0.0194091796875, |
|
"learning_rate": 1.263157894736842e-05, |
|
"loss": 0.0002, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.39074905074214705, |
|
"grad_norm": 0.041259765625, |
|
"learning_rate": 1.2603129445234709e-05, |
|
"loss": 0.0002, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.392129789437349, |
|
"grad_norm": 0.0245361328125, |
|
"learning_rate": 1.2574679943100996e-05, |
|
"loss": 0.0003, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.39351052813255094, |
|
"grad_norm": 0.0186767578125, |
|
"learning_rate": 1.2546230440967284e-05, |
|
"loss": 0.0002, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.39489126682775283, |
|
"grad_norm": 0.0194091796875, |
|
"learning_rate": 1.2517780938833573e-05, |
|
"loss": 0.0002, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.3962720055229548, |
|
"grad_norm": 0.018798828125, |
|
"learning_rate": 1.248933143669986e-05, |
|
"loss": 0.0002, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.3976527442181567, |
|
"grad_norm": 0.02001953125, |
|
"learning_rate": 1.2460881934566146e-05, |
|
"loss": 0.0002, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.3976527442181567, |
|
"eval_loss": 0.00039300136268138885, |
|
"eval_runtime": 581.6065, |
|
"eval_samples_per_second": 2.215, |
|
"eval_steps_per_second": 2.215, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.39903348291335866, |
|
"grad_norm": 0.2041015625, |
|
"learning_rate": 1.2432432432432433e-05, |
|
"loss": 0.0005, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.4004142216085606, |
|
"grad_norm": 0.0208740234375, |
|
"learning_rate": 1.2403982930298721e-05, |
|
"loss": 0.0002, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4017949603037625, |
|
"grad_norm": 0.09619140625, |
|
"learning_rate": 1.2375533428165008e-05, |
|
"loss": 0.0002, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.40317569899896444, |
|
"grad_norm": 0.15625, |
|
"learning_rate": 1.2347083926031296e-05, |
|
"loss": 0.0021, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.4045564376941664, |
|
"grad_norm": 0.0196533203125, |
|
"learning_rate": 1.2318634423897581e-05, |
|
"loss": 0.0002, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.4059371763893683, |
|
"grad_norm": 0.0189208984375, |
|
"learning_rate": 1.229018492176387e-05, |
|
"loss": 0.0002, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.40731791508457027, |
|
"grad_norm": 0.018310546875, |
|
"learning_rate": 1.2261735419630158e-05, |
|
"loss": 0.0002, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.40869865377977216, |
|
"grad_norm": 0.263671875, |
|
"learning_rate": 1.2233285917496445e-05, |
|
"loss": 0.0033, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.4100793924749741, |
|
"grad_norm": 0.068359375, |
|
"learning_rate": 1.2204836415362733e-05, |
|
"loss": 0.0003, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.41146013117017605, |
|
"grad_norm": 0.0196533203125, |
|
"learning_rate": 1.2176386913229019e-05, |
|
"loss": 0.0002, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.412840869865378, |
|
"grad_norm": 0.019287109375, |
|
"learning_rate": 1.2147937411095307e-05, |
|
"loss": 0.0002, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.41422160856057993, |
|
"grad_norm": 0.0223388671875, |
|
"learning_rate": 1.2119487908961594e-05, |
|
"loss": 0.0002, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4156023472557818, |
|
"grad_norm": 0.0186767578125, |
|
"learning_rate": 1.2091038406827882e-05, |
|
"loss": 0.0002, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.41698308595098377, |
|
"grad_norm": 0.66796875, |
|
"learning_rate": 1.2062588904694169e-05, |
|
"loss": 0.0017, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.4183638246461857, |
|
"grad_norm": 0.018798828125, |
|
"learning_rate": 1.2034139402560456e-05, |
|
"loss": 0.0002, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.41974456334138766, |
|
"grad_norm": 0.0196533203125, |
|
"learning_rate": 1.2005689900426742e-05, |
|
"loss": 0.0002, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.4211253020365896, |
|
"grad_norm": 0.020751953125, |
|
"learning_rate": 1.197724039829303e-05, |
|
"loss": 0.0002, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.4225060407317915, |
|
"grad_norm": 0.03662109375, |
|
"learning_rate": 1.1948790896159319e-05, |
|
"loss": 0.0003, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.42388677942699343, |
|
"grad_norm": 0.0196533203125, |
|
"learning_rate": 1.1920341394025606e-05, |
|
"loss": 0.0002, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.4252675181221954, |
|
"grad_norm": 0.036376953125, |
|
"learning_rate": 1.1891891891891894e-05, |
|
"loss": 0.0002, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.4266482568173973, |
|
"grad_norm": 0.197265625, |
|
"learning_rate": 1.186344238975818e-05, |
|
"loss": 0.0002, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.42802899551259926, |
|
"grad_norm": 0.02197265625, |
|
"learning_rate": 1.1834992887624468e-05, |
|
"loss": 0.0002, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.42940973420780115, |
|
"grad_norm": 0.0198974609375, |
|
"learning_rate": 1.1806543385490754e-05, |
|
"loss": 0.0002, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.4307904729030031, |
|
"grad_norm": 0.02001953125, |
|
"learning_rate": 1.1778093883357043e-05, |
|
"loss": 0.0002, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.43217121159820504, |
|
"grad_norm": 0.02197265625, |
|
"learning_rate": 1.1749644381223328e-05, |
|
"loss": 0.0002, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.433551950293407, |
|
"grad_norm": 0.0208740234375, |
|
"learning_rate": 1.1721194879089616e-05, |
|
"loss": 0.0002, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.43493268898860893, |
|
"grad_norm": 0.05322265625, |
|
"learning_rate": 1.1692745376955905e-05, |
|
"loss": 0.0005, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.4363134276838108, |
|
"grad_norm": 0.0179443359375, |
|
"learning_rate": 1.1664295874822192e-05, |
|
"loss": 0.0002, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.43769416637901276, |
|
"grad_norm": 0.01904296875, |
|
"learning_rate": 1.163584637268848e-05, |
|
"loss": 0.0002, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.4390749050742147, |
|
"grad_norm": 0.0184326171875, |
|
"learning_rate": 1.1607396870554765e-05, |
|
"loss": 0.0002, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.44045564376941665, |
|
"grad_norm": 0.03125, |
|
"learning_rate": 1.1578947368421053e-05, |
|
"loss": 0.0003, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.4418363824646186, |
|
"grad_norm": 0.031005859375, |
|
"learning_rate": 1.155049786628734e-05, |
|
"loss": 0.0002, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4432171211598205, |
|
"grad_norm": 0.0712890625, |
|
"learning_rate": 1.1522048364153629e-05, |
|
"loss": 0.0003, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.4445978598550224, |
|
"grad_norm": 0.024169921875, |
|
"learning_rate": 1.1493598862019915e-05, |
|
"loss": 0.0002, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.44597859855022437, |
|
"grad_norm": 0.019287109375, |
|
"learning_rate": 1.1465149359886204e-05, |
|
"loss": 0.0002, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.4473593372454263, |
|
"grad_norm": 0.040283203125, |
|
"learning_rate": 1.143669985775249e-05, |
|
"loss": 0.0003, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.44874007594062826, |
|
"grad_norm": 0.0179443359375, |
|
"learning_rate": 1.1408250355618777e-05, |
|
"loss": 0.0002, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.45012081463583015, |
|
"grad_norm": 0.016845703125, |
|
"learning_rate": 1.1379800853485066e-05, |
|
"loss": 0.0002, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.4515015533310321, |
|
"grad_norm": 0.0179443359375, |
|
"learning_rate": 1.1351351351351352e-05, |
|
"loss": 0.0002, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.45288229202623403, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 1.132290184921764e-05, |
|
"loss": 0.0007, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.454263030721436, |
|
"grad_norm": 0.0206298828125, |
|
"learning_rate": 1.1294452347083926e-05, |
|
"loss": 0.0002, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.4556437694166379, |
|
"grad_norm": 0.07421875, |
|
"learning_rate": 1.1266002844950214e-05, |
|
"loss": 0.0003, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.4570245081118398, |
|
"grad_norm": 0.0177001953125, |
|
"learning_rate": 1.1237553342816501e-05, |
|
"loss": 0.0002, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.45840524680704176, |
|
"grad_norm": 0.016845703125, |
|
"learning_rate": 1.120910384068279e-05, |
|
"loss": 0.0002, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.4597859855022437, |
|
"grad_norm": 0.032958984375, |
|
"learning_rate": 1.1180654338549078e-05, |
|
"loss": 0.0002, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.46116672419744564, |
|
"grad_norm": 0.2109375, |
|
"learning_rate": 1.1152204836415363e-05, |
|
"loss": 0.0002, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.4625474628926476, |
|
"grad_norm": 0.0181884765625, |
|
"learning_rate": 1.1123755334281651e-05, |
|
"loss": 0.0002, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.4639282015878495, |
|
"grad_norm": 0.01708984375, |
|
"learning_rate": 1.1095305832147938e-05, |
|
"loss": 0.0002, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.4653089402830514, |
|
"grad_norm": 0.0167236328125, |
|
"learning_rate": 1.1066856330014226e-05, |
|
"loss": 0.0002, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.46668967897825336, |
|
"grad_norm": 0.0255126953125, |
|
"learning_rate": 1.1038406827880513e-05, |
|
"loss": 0.0002, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.4680704176734553, |
|
"grad_norm": 0.019287109375, |
|
"learning_rate": 1.10099573257468e-05, |
|
"loss": 0.0002, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.46945115636865725, |
|
"grad_norm": 0.01708984375, |
|
"learning_rate": 1.0981507823613087e-05, |
|
"loss": 0.0002, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.47083189506385914, |
|
"grad_norm": 0.0172119140625, |
|
"learning_rate": 1.0953058321479375e-05, |
|
"loss": 0.0002, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.4722126337590611, |
|
"grad_norm": 0.04541015625, |
|
"learning_rate": 1.0924608819345663e-05, |
|
"loss": 0.0004, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.47359337245426303, |
|
"grad_norm": 0.038818359375, |
|
"learning_rate": 1.089615931721195e-05, |
|
"loss": 0.0003, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.474974111149465, |
|
"grad_norm": 0.059326171875, |
|
"learning_rate": 1.0867709815078239e-05, |
|
"loss": 0.0003, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.4763548498446669, |
|
"grad_norm": 0.017333984375, |
|
"learning_rate": 1.0839260312944524e-05, |
|
"loss": 0.0002, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.4777355885398688, |
|
"grad_norm": 0.02001953125, |
|
"learning_rate": 1.0810810810810812e-05, |
|
"loss": 0.0002, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.47911632723507075, |
|
"grad_norm": 0.0419921875, |
|
"learning_rate": 1.0782361308677099e-05, |
|
"loss": 0.0002, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.4804970659302727, |
|
"grad_norm": 0.021240234375, |
|
"learning_rate": 1.0753911806543387e-05, |
|
"loss": 0.0002, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.48187780462547464, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 1.0725462304409672e-05, |
|
"loss": 0.0004, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.4832585433206766, |
|
"grad_norm": 0.0174560546875, |
|
"learning_rate": 1.069701280227596e-05, |
|
"loss": 0.0002, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.48463928201587847, |
|
"grad_norm": 0.01708984375, |
|
"learning_rate": 1.0668563300142247e-05, |
|
"loss": 0.0002, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.4860200207110804, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 1.0640113798008536e-05, |
|
"loss": 0.0002, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.48740075940628236, |
|
"grad_norm": 0.02001953125, |
|
"learning_rate": 1.0611664295874824e-05, |
|
"loss": 0.0002, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.4887814981014843, |
|
"grad_norm": 0.0172119140625, |
|
"learning_rate": 1.058321479374111e-05, |
|
"loss": 0.0002, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.49016223679668625, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 1.0554765291607398e-05, |
|
"loss": 0.0002, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.49154297549188813, |
|
"grad_norm": 0.5, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 0.0021, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.4929237141870901, |
|
"grad_norm": 0.01611328125, |
|
"learning_rate": 1.0497866287339973e-05, |
|
"loss": 0.0002, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.494304452882292, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 1.046941678520626e-05, |
|
"loss": 0.0002, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.49568519157749397, |
|
"grad_norm": 0.016845703125, |
|
"learning_rate": 1.0440967283072548e-05, |
|
"loss": 0.0002, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.4970659302726959, |
|
"grad_norm": 0.0194091796875, |
|
"learning_rate": 1.0412517780938833e-05, |
|
"loss": 0.0002, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.4970659302726959, |
|
"eval_loss": 0.00029756984440609813, |
|
"eval_runtime": 582.415, |
|
"eval_samples_per_second": 2.211, |
|
"eval_steps_per_second": 2.211, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.4984466689678978, |
|
"grad_norm": 0.0169677734375, |
|
"learning_rate": 1.0384068278805121e-05, |
|
"loss": 0.0002, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.49982740766309974, |
|
"grad_norm": 0.020263671875, |
|
"learning_rate": 1.035561877667141e-05, |
|
"loss": 0.0002, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.5012081463583017, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 1.0327169274537697e-05, |
|
"loss": 0.0002, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.5025888850535036, |
|
"grad_norm": 0.01953125, |
|
"learning_rate": 1.0298719772403985e-05, |
|
"loss": 0.0002, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.5039696237487056, |
|
"grad_norm": 0.0159912109375, |
|
"learning_rate": 1.027027027027027e-05, |
|
"loss": 0.0002, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.5053503624439075, |
|
"grad_norm": 0.0166015625, |
|
"learning_rate": 1.0241820768136559e-05, |
|
"loss": 0.0002, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.5067311011391095, |
|
"grad_norm": 0.04296875, |
|
"learning_rate": 1.0213371266002845e-05, |
|
"loss": 0.0002, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.5081118398343114, |
|
"grad_norm": 0.0673828125, |
|
"learning_rate": 1.0184921763869134e-05, |
|
"loss": 0.0003, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.5094925785295132, |
|
"grad_norm": 0.0164794921875, |
|
"learning_rate": 1.0156472261735419e-05, |
|
"loss": 0.0002, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.5108733172247152, |
|
"grad_norm": 0.0174560546875, |
|
"learning_rate": 1.0128022759601707e-05, |
|
"loss": 0.0002, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5122540559199171, |
|
"grad_norm": 0.0224609375, |
|
"learning_rate": 1.0099573257467996e-05, |
|
"loss": 0.0002, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.5136347946151191, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 1.0071123755334282e-05, |
|
"loss": 0.0002, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.515015533310321, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 1.004267425320057e-05, |
|
"loss": 0.0014, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.516396272005523, |
|
"grad_norm": 0.015869140625, |
|
"learning_rate": 1.0014224751066857e-05, |
|
"loss": 0.0002, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.5177770107007249, |
|
"grad_norm": 0.1337890625, |
|
"learning_rate": 9.985775248933144e-06, |
|
"loss": 0.0014, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5191577493959268, |
|
"grad_norm": 0.040771484375, |
|
"learning_rate": 9.957325746799433e-06, |
|
"loss": 0.0004, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.5205384880911288, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 9.92887624466572e-06, |
|
"loss": 0.0003, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.5219192267863307, |
|
"grad_norm": 0.01708984375, |
|
"learning_rate": 9.900426742532006e-06, |
|
"loss": 0.0002, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.5232999654815326, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 9.871977240398294e-06, |
|
"loss": 0.0008, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.5246807041767345, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 9.843527738264581e-06, |
|
"loss": 0.0002, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5260614428719365, |
|
"grad_norm": 0.0162353515625, |
|
"learning_rate": 9.815078236130868e-06, |
|
"loss": 0.0002, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.5274421815671384, |
|
"grad_norm": 0.06591796875, |
|
"learning_rate": 9.786628733997155e-06, |
|
"loss": 0.0004, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.5288229202623403, |
|
"grad_norm": 0.015625, |
|
"learning_rate": 9.758179231863443e-06, |
|
"loss": 0.0001, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.5302036589575423, |
|
"grad_norm": 0.0230712890625, |
|
"learning_rate": 9.729729729729732e-06, |
|
"loss": 0.0002, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.5315843976527442, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 9.701280227596018e-06, |
|
"loss": 0.0002, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.5329651363479462, |
|
"grad_norm": 0.01611328125, |
|
"learning_rate": 9.672830725462305e-06, |
|
"loss": 0.0002, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.5343458750431481, |
|
"grad_norm": 0.0223388671875, |
|
"learning_rate": 9.644381223328593e-06, |
|
"loss": 0.0002, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.5357266137383501, |
|
"grad_norm": 0.0299072265625, |
|
"learning_rate": 9.61593172119488e-06, |
|
"loss": 0.0002, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.5371073524335519, |
|
"grad_norm": 0.0174560546875, |
|
"learning_rate": 9.587482219061167e-06, |
|
"loss": 0.0002, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.5384880911287538, |
|
"grad_norm": 0.016845703125, |
|
"learning_rate": 9.559032716927454e-06, |
|
"loss": 0.0002, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5398688298239558, |
|
"grad_norm": 0.63671875, |
|
"learning_rate": 9.530583214793742e-06, |
|
"loss": 0.002, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.5412495685191577, |
|
"grad_norm": 0.0225830078125, |
|
"learning_rate": 9.502133712660029e-06, |
|
"loss": 0.0002, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.5426303072143597, |
|
"grad_norm": 0.0296630859375, |
|
"learning_rate": 9.473684210526315e-06, |
|
"loss": 0.0002, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.5440110459095616, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 9.445234708392604e-06, |
|
"loss": 0.0011, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.5453917846047636, |
|
"grad_norm": 0.0673828125, |
|
"learning_rate": 9.416785206258892e-06, |
|
"loss": 0.0005, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.5467725232999655, |
|
"grad_norm": 0.0213623046875, |
|
"learning_rate": 9.388335704125179e-06, |
|
"loss": 0.0002, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.5481532619951675, |
|
"grad_norm": 0.039306640625, |
|
"learning_rate": 9.359886201991466e-06, |
|
"loss": 0.0002, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.5495340006903694, |
|
"grad_norm": 0.0162353515625, |
|
"learning_rate": 9.331436699857753e-06, |
|
"loss": 0.0002, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.5509147393855712, |
|
"grad_norm": 0.01519775390625, |
|
"learning_rate": 9.302987197724041e-06, |
|
"loss": 0.0001, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.5522954780807732, |
|
"grad_norm": 0.0224609375, |
|
"learning_rate": 9.274537695590328e-06, |
|
"loss": 0.0002, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5536762167759751, |
|
"grad_norm": 0.033935546875, |
|
"learning_rate": 9.246088193456614e-06, |
|
"loss": 0.0002, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.5550569554711771, |
|
"grad_norm": 0.019287109375, |
|
"learning_rate": 9.217638691322903e-06, |
|
"loss": 0.0002, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.556437694166379, |
|
"grad_norm": 0.11962890625, |
|
"learning_rate": 9.189189189189191e-06, |
|
"loss": 0.0007, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.557818432861581, |
|
"grad_norm": 0.0179443359375, |
|
"learning_rate": 9.160739687055478e-06, |
|
"loss": 0.0001, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.5591991715567829, |
|
"grad_norm": 0.0400390625, |
|
"learning_rate": 9.132290184921765e-06, |
|
"loss": 0.0002, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.5605799102519848, |
|
"grad_norm": 0.61328125, |
|
"learning_rate": 9.103840682788051e-06, |
|
"loss": 0.0011, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.5619606489471868, |
|
"grad_norm": 0.0174560546875, |
|
"learning_rate": 9.07539118065434e-06, |
|
"loss": 0.0002, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.5633413876423887, |
|
"grad_norm": 0.061767578125, |
|
"learning_rate": 9.046941678520627e-06, |
|
"loss": 0.0003, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.5647221263375906, |
|
"grad_norm": 0.01708984375, |
|
"learning_rate": 9.018492176386913e-06, |
|
"loss": 0.0002, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.5661028650327925, |
|
"grad_norm": 0.1650390625, |
|
"learning_rate": 8.990042674253202e-06, |
|
"loss": 0.0011, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.5674836037279944, |
|
"grad_norm": 0.01519775390625, |
|
"learning_rate": 8.961593172119488e-06, |
|
"loss": 0.0001, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.5688643424231964, |
|
"grad_norm": 0.0194091796875, |
|
"learning_rate": 8.933143669985777e-06, |
|
"loss": 0.0002, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.5702450811183983, |
|
"grad_norm": 0.0206298828125, |
|
"learning_rate": 8.904694167852064e-06, |
|
"loss": 0.0001, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.5716258198136003, |
|
"grad_norm": 0.01483154296875, |
|
"learning_rate": 8.87624466571835e-06, |
|
"loss": 0.0001, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.5730065585088022, |
|
"grad_norm": 0.018798828125, |
|
"learning_rate": 8.847795163584639e-06, |
|
"loss": 0.0002, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.5743872972040042, |
|
"grad_norm": 0.040771484375, |
|
"learning_rate": 8.819345661450926e-06, |
|
"loss": 0.0002, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.5757680358992061, |
|
"grad_norm": 0.015380859375, |
|
"learning_rate": 8.790896159317212e-06, |
|
"loss": 0.0001, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.577148774594408, |
|
"grad_norm": 0.01495361328125, |
|
"learning_rate": 8.7624466571835e-06, |
|
"loss": 0.0001, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.5785295132896099, |
|
"grad_norm": 0.0152587890625, |
|
"learning_rate": 8.733997155049787e-06, |
|
"loss": 0.0001, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.5799102519848118, |
|
"grad_norm": 0.03515625, |
|
"learning_rate": 8.705547652916074e-06, |
|
"loss": 0.0002, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.5812909906800138, |
|
"grad_norm": 0.0167236328125, |
|
"learning_rate": 8.677098150782361e-06, |
|
"loss": 0.0002, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.5826717293752157, |
|
"grad_norm": 0.0263671875, |
|
"learning_rate": 8.64864864864865e-06, |
|
"loss": 0.0002, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.5840524680704177, |
|
"grad_norm": 0.015625, |
|
"learning_rate": 8.620199146514938e-06, |
|
"loss": 0.0001, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.5854332067656196, |
|
"grad_norm": 0.0198974609375, |
|
"learning_rate": 8.591749644381224e-06, |
|
"loss": 0.0002, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.5868139454608216, |
|
"grad_norm": 0.0225830078125, |
|
"learning_rate": 8.563300142247511e-06, |
|
"loss": 0.0002, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5881946841560235, |
|
"grad_norm": 0.03125, |
|
"learning_rate": 8.534850640113798e-06, |
|
"loss": 0.0002, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.5895754228512254, |
|
"grad_norm": 0.01470947265625, |
|
"learning_rate": 8.506401137980086e-06, |
|
"loss": 0.0001, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.5909561615464274, |
|
"grad_norm": 0.02392578125, |
|
"learning_rate": 8.477951635846373e-06, |
|
"loss": 0.0002, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.5923369002416292, |
|
"grad_norm": 0.01531982421875, |
|
"learning_rate": 8.44950213371266e-06, |
|
"loss": 0.0001, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.5937176389368312, |
|
"grad_norm": 0.01446533203125, |
|
"learning_rate": 8.421052631578948e-06, |
|
"loss": 0.0001, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.5950983776320331, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 8.392603129445237e-06, |
|
"loss": 0.0002, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.596479116327235, |
|
"grad_norm": 0.017578125, |
|
"learning_rate": 8.364153627311523e-06, |
|
"loss": 0.0001, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.596479116327235, |
|
"eval_loss": 0.0002615667472127825, |
|
"eval_runtime": 581.6024, |
|
"eval_samples_per_second": 2.215, |
|
"eval_steps_per_second": 2.215, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.597859855022437, |
|
"grad_norm": 0.01483154296875, |
|
"learning_rate": 8.33570412517781e-06, |
|
"loss": 0.0001, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.5992405937176389, |
|
"grad_norm": 0.0211181640625, |
|
"learning_rate": 8.307254623044097e-06, |
|
"loss": 0.0002, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.6006213324128409, |
|
"grad_norm": 0.01953125, |
|
"learning_rate": 8.278805120910385e-06, |
|
"loss": 0.0002, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.6020020711080428, |
|
"grad_norm": 0.0157470703125, |
|
"learning_rate": 8.250355618776672e-06, |
|
"loss": 0.0001, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.6033828098032448, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 8.221906116642959e-06, |
|
"loss": 0.0005, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.6047635484984467, |
|
"grad_norm": 0.01470947265625, |
|
"learning_rate": 8.193456614509247e-06, |
|
"loss": 0.0001, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.6061442871936487, |
|
"grad_norm": 0.015869140625, |
|
"learning_rate": 8.165007112375534e-06, |
|
"loss": 0.0002, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.6075250258888505, |
|
"grad_norm": 0.0162353515625, |
|
"learning_rate": 8.13655761024182e-06, |
|
"loss": 0.0001, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6089057645840524, |
|
"grad_norm": 0.0223388671875, |
|
"learning_rate": 8.108108108108109e-06, |
|
"loss": 0.0002, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.6102865032792544, |
|
"grad_norm": 0.062255859375, |
|
"learning_rate": 8.079658605974396e-06, |
|
"loss": 0.0005, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.6116672419744563, |
|
"grad_norm": 0.020263671875, |
|
"learning_rate": 8.051209103840684e-06, |
|
"loss": 0.0001, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.6130479806696583, |
|
"grad_norm": 0.01544189453125, |
|
"learning_rate": 8.022759601706971e-06, |
|
"loss": 0.0001, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.6144287193648602, |
|
"grad_norm": 0.6953125, |
|
"learning_rate": 7.994310099573258e-06, |
|
"loss": 0.0031, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.6158094580600622, |
|
"grad_norm": 0.0262451171875, |
|
"learning_rate": 7.965860597439546e-06, |
|
"loss": 0.0002, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.6171901967552641, |
|
"grad_norm": 0.060791015625, |
|
"learning_rate": 7.937411095305833e-06, |
|
"loss": 0.0003, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.618570935450466, |
|
"grad_norm": 0.0172119140625, |
|
"learning_rate": 7.90896159317212e-06, |
|
"loss": 0.0002, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.619951674145668, |
|
"grad_norm": 0.0146484375, |
|
"learning_rate": 7.880512091038406e-06, |
|
"loss": 0.0001, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.6213324128408698, |
|
"grad_norm": 0.014892578125, |
|
"learning_rate": 7.852062588904695e-06, |
|
"loss": 0.0001, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6227131515360718, |
|
"grad_norm": 0.0145263671875, |
|
"learning_rate": 7.823613086770983e-06, |
|
"loss": 0.0001, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.6240938902312737, |
|
"grad_norm": 0.014892578125, |
|
"learning_rate": 7.79516358463727e-06, |
|
"loss": 0.0001, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.6254746289264757, |
|
"grad_norm": 0.017333984375, |
|
"learning_rate": 7.766714082503557e-06, |
|
"loss": 0.0001, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.6268553676216776, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 7.738264580369845e-06, |
|
"loss": 0.002, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.6282361063168795, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 7.709815078236132e-06, |
|
"loss": 0.0019, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.6296168450120815, |
|
"grad_norm": 0.01531982421875, |
|
"learning_rate": 7.681365576102418e-06, |
|
"loss": 0.0001, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.6309975837072834, |
|
"grad_norm": 0.015625, |
|
"learning_rate": 7.652916073968705e-06, |
|
"loss": 0.0001, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.6323783224024854, |
|
"grad_norm": 0.01531982421875, |
|
"learning_rate": 7.624466571834993e-06, |
|
"loss": 0.0001, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.6337590610976873, |
|
"grad_norm": 0.014404296875, |
|
"learning_rate": 7.596017069701281e-06, |
|
"loss": 0.0001, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.6351397997928891, |
|
"grad_norm": 2.171875, |
|
"learning_rate": 7.567567567567569e-06, |
|
"loss": 0.0005, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6365205384880911, |
|
"grad_norm": 0.0191650390625, |
|
"learning_rate": 7.5391180654338555e-06, |
|
"loss": 0.0002, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.637901277183293, |
|
"grad_norm": 0.018798828125, |
|
"learning_rate": 7.510668563300143e-06, |
|
"loss": 0.0002, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.639282015878495, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 7.482219061166431e-06, |
|
"loss": 0.0017, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.6406627545736969, |
|
"grad_norm": 0.013916015625, |
|
"learning_rate": 7.453769559032717e-06, |
|
"loss": 0.0001, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.6420434932688989, |
|
"grad_norm": 0.01806640625, |
|
"learning_rate": 7.425320056899005e-06, |
|
"loss": 0.0001, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.6434242319641008, |
|
"grad_norm": 0.013916015625, |
|
"learning_rate": 7.396870554765292e-06, |
|
"loss": 0.0001, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.6448049706593028, |
|
"grad_norm": 0.06201171875, |
|
"learning_rate": 7.368421052631579e-06, |
|
"loss": 0.0004, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.6461857093545047, |
|
"grad_norm": 0.0179443359375, |
|
"learning_rate": 7.339971550497866e-06, |
|
"loss": 0.0001, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.6475664480497066, |
|
"grad_norm": 0.0225830078125, |
|
"learning_rate": 7.3115220483641544e-06, |
|
"loss": 0.0002, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.6489471867449085, |
|
"grad_norm": 0.0242919921875, |
|
"learning_rate": 7.283072546230442e-06, |
|
"loss": 0.0002, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.6503279254401104, |
|
"grad_norm": 0.0242919921875, |
|
"learning_rate": 7.254623044096729e-06, |
|
"loss": 0.0002, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.6517086641353124, |
|
"grad_norm": 0.01446533203125, |
|
"learning_rate": 7.226173541963016e-06, |
|
"loss": 0.0001, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.6530894028305143, |
|
"grad_norm": 0.039306640625, |
|
"learning_rate": 7.197724039829304e-06, |
|
"loss": 0.0003, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.6544701415257163, |
|
"grad_norm": 0.1201171875, |
|
"learning_rate": 7.169274537695591e-06, |
|
"loss": 0.0002, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.6558508802209182, |
|
"grad_norm": 0.01483154296875, |
|
"learning_rate": 7.140825035561878e-06, |
|
"loss": 0.0001, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.6572316189161201, |
|
"grad_norm": 0.035888671875, |
|
"learning_rate": 7.112375533428165e-06, |
|
"loss": 0.0002, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.6586123576113221, |
|
"grad_norm": 0.08447265625, |
|
"learning_rate": 7.0839260312944525e-06, |
|
"loss": 0.0004, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.659993096306524, |
|
"grad_norm": 0.01556396484375, |
|
"learning_rate": 7.055476529160741e-06, |
|
"loss": 0.0001, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.661373835001726, |
|
"grad_norm": 0.0240478515625, |
|
"learning_rate": 7.027027027027028e-06, |
|
"loss": 0.0002, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.6627545736969278, |
|
"grad_norm": 0.0181884765625, |
|
"learning_rate": 6.998577524893315e-06, |
|
"loss": 0.0001, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.6641353123921298, |
|
"grad_norm": 0.0145263671875, |
|
"learning_rate": 6.970128022759603e-06, |
|
"loss": 0.0001, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.6655160510873317, |
|
"grad_norm": 0.029541015625, |
|
"learning_rate": 6.9416785206258896e-06, |
|
"loss": 0.0002, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.6668967897825336, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 6.913229018492177e-06, |
|
"loss": 0.0003, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.6682775284777356, |
|
"grad_norm": 0.0242919921875, |
|
"learning_rate": 6.884779516358464e-06, |
|
"loss": 0.0002, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.6696582671729375, |
|
"grad_norm": 0.01611328125, |
|
"learning_rate": 6.8563300142247514e-06, |
|
"loss": 0.0002, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.6710390058681395, |
|
"grad_norm": 0.11474609375, |
|
"learning_rate": 6.827880512091038e-06, |
|
"loss": 0.001, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.6724197445633414, |
|
"grad_norm": 0.0230712890625, |
|
"learning_rate": 6.799431009957327e-06, |
|
"loss": 0.0002, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.6738004832585434, |
|
"grad_norm": 0.11083984375, |
|
"learning_rate": 6.770981507823614e-06, |
|
"loss": 0.0005, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.6751812219537453, |
|
"grad_norm": 0.0184326171875, |
|
"learning_rate": 6.742532005689901e-06, |
|
"loss": 0.0001, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.6765619606489471, |
|
"grad_norm": 0.01409912109375, |
|
"learning_rate": 6.7140825035561885e-06, |
|
"loss": 0.0001, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.6779426993441491, |
|
"grad_norm": 0.01531982421875, |
|
"learning_rate": 6.685633001422476e-06, |
|
"loss": 0.0001, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.679323438039351, |
|
"grad_norm": 0.055908203125, |
|
"learning_rate": 6.657183499288763e-06, |
|
"loss": 0.0004, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.680704176734553, |
|
"grad_norm": 0.0152587890625, |
|
"learning_rate": 6.62873399715505e-06, |
|
"loss": 0.0001, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.6820849154297549, |
|
"grad_norm": 0.01458740234375, |
|
"learning_rate": 6.600284495021337e-06, |
|
"loss": 0.0001, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.6834656541249569, |
|
"grad_norm": 0.0244140625, |
|
"learning_rate": 6.571834992887625e-06, |
|
"loss": 0.0002, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.6848463928201588, |
|
"grad_norm": 0.1591796875, |
|
"learning_rate": 6.543385490753912e-06, |
|
"loss": 0.0003, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.6862271315153607, |
|
"grad_norm": 0.01397705078125, |
|
"learning_rate": 6.5149359886202e-06, |
|
"loss": 0.0001, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.6876078702105627, |
|
"grad_norm": 0.0142822265625, |
|
"learning_rate": 6.486486486486487e-06, |
|
"loss": 0.0001, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.6889886089057646, |
|
"grad_norm": 0.013916015625, |
|
"learning_rate": 6.458036984352775e-06, |
|
"loss": 0.0001, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.6903693476009665, |
|
"grad_norm": 0.07470703125, |
|
"learning_rate": 6.429587482219062e-06, |
|
"loss": 0.0002, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6917500862961684, |
|
"grad_norm": 0.018798828125, |
|
"learning_rate": 6.401137980085349e-06, |
|
"loss": 0.0002, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.6931308249913704, |
|
"grad_norm": 0.0206298828125, |
|
"learning_rate": 6.372688477951636e-06, |
|
"loss": 0.0002, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.6945115636865723, |
|
"grad_norm": 0.01544189453125, |
|
"learning_rate": 6.344238975817924e-06, |
|
"loss": 0.0001, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.6958923023817742, |
|
"grad_norm": 0.0137939453125, |
|
"learning_rate": 6.31578947368421e-06, |
|
"loss": 0.0001, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.6958923023817742, |
|
"eval_loss": 0.00025013022241182625, |
|
"eval_runtime": 580.5157, |
|
"eval_samples_per_second": 2.219, |
|
"eval_steps_per_second": 2.219, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.6972730410769762, |
|
"grad_norm": 0.1513671875, |
|
"learning_rate": 6.287339971550498e-06, |
|
"loss": 0.0005, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.6986537797721781, |
|
"grad_norm": 0.01806640625, |
|
"learning_rate": 6.258890469416786e-06, |
|
"loss": 0.0001, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.7000345184673801, |
|
"grad_norm": 0.0169677734375, |
|
"learning_rate": 6.230440967283073e-06, |
|
"loss": 0.0001, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.701415257162582, |
|
"grad_norm": 0.014892578125, |
|
"learning_rate": 6.201991465149361e-06, |
|
"loss": 0.0001, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.702795995857784, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 6.173541963015648e-06, |
|
"loss": 0.0009, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.7041767345529858, |
|
"grad_norm": 0.01373291015625, |
|
"learning_rate": 6.145092460881935e-06, |
|
"loss": 0.0001, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7055574732481877, |
|
"grad_norm": 0.0164794921875, |
|
"learning_rate": 6.1166429587482225e-06, |
|
"loss": 0.0001, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.7069382119433897, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 6.088193456614509e-06, |
|
"loss": 0.0001, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.7083189506385916, |
|
"grad_norm": 0.0181884765625, |
|
"learning_rate": 6.059743954480797e-06, |
|
"loss": 0.0001, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.7096996893337936, |
|
"grad_norm": 0.0230712890625, |
|
"learning_rate": 6.031294452347084e-06, |
|
"loss": 0.0002, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.7110804280289955, |
|
"grad_norm": 0.0146484375, |
|
"learning_rate": 6.002844950213371e-06, |
|
"loss": 0.0001, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.7124611667241975, |
|
"grad_norm": 0.11328125, |
|
"learning_rate": 5.9743954480796596e-06, |
|
"loss": 0.0007, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.7138419054193994, |
|
"grad_norm": 0.02099609375, |
|
"learning_rate": 5.945945945945947e-06, |
|
"loss": 0.0001, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.7152226441146013, |
|
"grad_norm": 0.015625, |
|
"learning_rate": 5.917496443812234e-06, |
|
"loss": 0.0001, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.7166033828098033, |
|
"grad_norm": 0.01422119140625, |
|
"learning_rate": 5.8890469416785214e-06, |
|
"loss": 0.0001, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.7179841215050051, |
|
"grad_norm": 0.01361083984375, |
|
"learning_rate": 5.860597439544808e-06, |
|
"loss": 0.0001, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7193648602002071, |
|
"grad_norm": 0.013916015625, |
|
"learning_rate": 5.832147937411096e-06, |
|
"loss": 0.0001, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.720745598895409, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 5.8036984352773825e-06, |
|
"loss": 0.0002, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.722126337590611, |
|
"grad_norm": 0.01611328125, |
|
"learning_rate": 5.77524893314367e-06, |
|
"loss": 0.0001, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.7235070762858129, |
|
"grad_norm": 0.01806640625, |
|
"learning_rate": 5.746799431009958e-06, |
|
"loss": 0.0001, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.7248878149810148, |
|
"grad_norm": 0.0137939453125, |
|
"learning_rate": 5.718349928876245e-06, |
|
"loss": 0.0001, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7262685536762168, |
|
"grad_norm": 0.07861328125, |
|
"learning_rate": 5.689900426742533e-06, |
|
"loss": 0.0002, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.7276492923714187, |
|
"grad_norm": 0.12060546875, |
|
"learning_rate": 5.66145092460882e-06, |
|
"loss": 0.0005, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.7290300310666207, |
|
"grad_norm": 0.0137939453125, |
|
"learning_rate": 5.633001422475107e-06, |
|
"loss": 0.0001, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.7304107697618226, |
|
"grad_norm": 0.0162353515625, |
|
"learning_rate": 5.604551920341395e-06, |
|
"loss": 0.0001, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.7317915084570245, |
|
"grad_norm": 0.017333984375, |
|
"learning_rate": 5.576102418207681e-06, |
|
"loss": 0.0002, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.7331722471522264, |
|
"grad_norm": 0.0458984375, |
|
"learning_rate": 5.547652916073969e-06, |
|
"loss": 0.0003, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.7345529858474283, |
|
"grad_norm": 0.01373291015625, |
|
"learning_rate": 5.5192034139402566e-06, |
|
"loss": 0.0001, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.7359337245426303, |
|
"grad_norm": 0.01397705078125, |
|
"learning_rate": 5.490753911806543e-06, |
|
"loss": 0.0001, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.7373144632378322, |
|
"grad_norm": 0.01361083984375, |
|
"learning_rate": 5.462304409672832e-06, |
|
"loss": 0.0001, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.7386952019330342, |
|
"grad_norm": 0.0634765625, |
|
"learning_rate": 5.433854907539119e-06, |
|
"loss": 0.0003, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.7400759406282361, |
|
"grad_norm": 0.12353515625, |
|
"learning_rate": 5.405405405405406e-06, |
|
"loss": 0.0012, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.7414566793234381, |
|
"grad_norm": 0.0157470703125, |
|
"learning_rate": 5.376955903271694e-06, |
|
"loss": 0.0001, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.74283741801864, |
|
"grad_norm": 0.01416015625, |
|
"learning_rate": 5.34850640113798e-06, |
|
"loss": 0.0001, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.744218156713842, |
|
"grad_norm": 0.11376953125, |
|
"learning_rate": 5.320056899004268e-06, |
|
"loss": 0.0003, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.7455988954090439, |
|
"grad_norm": 0.01373291015625, |
|
"learning_rate": 5.291607396870555e-06, |
|
"loss": 0.0001, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7469796341042457, |
|
"grad_norm": 0.08984375, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 0.0003, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.7483603727994477, |
|
"grad_norm": 0.0137939453125, |
|
"learning_rate": 5.23470839260313e-06, |
|
"loss": 0.0001, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.7497411114946496, |
|
"grad_norm": 0.048583984375, |
|
"learning_rate": 5.2062588904694165e-06, |
|
"loss": 0.0004, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.7511218501898516, |
|
"grad_norm": 0.0135498046875, |
|
"learning_rate": 5.177809388335705e-06, |
|
"loss": 0.0001, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.7525025888850535, |
|
"grad_norm": 0.01397705078125, |
|
"learning_rate": 5.1493598862019925e-06, |
|
"loss": 0.0001, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.7538833275802554, |
|
"grad_norm": 0.01446533203125, |
|
"learning_rate": 5.120910384068279e-06, |
|
"loss": 0.0001, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.7552640662754574, |
|
"grad_norm": 0.040283203125, |
|
"learning_rate": 5.092460881934567e-06, |
|
"loss": 0.0002, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.7566448049706593, |
|
"grad_norm": 0.01422119140625, |
|
"learning_rate": 5.0640113798008536e-06, |
|
"loss": 0.0001, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.7580255436658613, |
|
"grad_norm": 0.01361083984375, |
|
"learning_rate": 5.035561877667141e-06, |
|
"loss": 0.0001, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.7594062823610632, |
|
"grad_norm": 0.013916015625, |
|
"learning_rate": 5.007112375533429e-06, |
|
"loss": 0.0001, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7607870210562651, |
|
"grad_norm": 0.03857421875, |
|
"learning_rate": 4.978662873399716e-06, |
|
"loss": 0.0002, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.762167759751467, |
|
"grad_norm": 0.013671875, |
|
"learning_rate": 4.950213371266003e-06, |
|
"loss": 0.0001, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.7635484984466689, |
|
"grad_norm": 0.0205078125, |
|
"learning_rate": 4.921763869132291e-06, |
|
"loss": 0.0001, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.7649292371418709, |
|
"grad_norm": 0.0145263671875, |
|
"learning_rate": 4.893314366998577e-06, |
|
"loss": 0.0001, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.7663099758370728, |
|
"grad_norm": 0.01336669921875, |
|
"learning_rate": 4.864864864864866e-06, |
|
"loss": 0.0001, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.7676907145322748, |
|
"grad_norm": 0.01336669921875, |
|
"learning_rate": 4.8364153627311525e-06, |
|
"loss": 0.0001, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.7690714532274767, |
|
"grad_norm": 0.01336669921875, |
|
"learning_rate": 4.80796586059744e-06, |
|
"loss": 0.0001, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.7704521919226787, |
|
"grad_norm": 0.01324462890625, |
|
"learning_rate": 4.779516358463727e-06, |
|
"loss": 0.0001, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.7718329306178806, |
|
"grad_norm": 0.01361083984375, |
|
"learning_rate": 4.751066856330014e-06, |
|
"loss": 0.0001, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.7732136693130826, |
|
"grad_norm": 0.01348876953125, |
|
"learning_rate": 4.722617354196302e-06, |
|
"loss": 0.0001, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.7745944080082844, |
|
"grad_norm": 0.0142822265625, |
|
"learning_rate": 4.6941678520625895e-06, |
|
"loss": 0.0001, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.7759751467034863, |
|
"grad_norm": 0.119140625, |
|
"learning_rate": 4.665718349928876e-06, |
|
"loss": 0.0007, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.7773558853986883, |
|
"grad_norm": 0.0223388671875, |
|
"learning_rate": 4.637268847795164e-06, |
|
"loss": 0.0002, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.7787366240938902, |
|
"grad_norm": 0.01318359375, |
|
"learning_rate": 4.608819345661451e-06, |
|
"loss": 0.0001, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.7801173627890922, |
|
"grad_norm": 0.013671875, |
|
"learning_rate": 4.580369843527739e-06, |
|
"loss": 0.0001, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.7814981014842941, |
|
"grad_norm": 0.013427734375, |
|
"learning_rate": 4.551920341394026e-06, |
|
"loss": 0.0001, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.782878840179496, |
|
"grad_norm": 0.0174560546875, |
|
"learning_rate": 4.523470839260313e-06, |
|
"loss": 0.0001, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.784259578874698, |
|
"grad_norm": 0.0203857421875, |
|
"learning_rate": 4.495021337126601e-06, |
|
"loss": 0.0001, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.7856403175698999, |
|
"grad_norm": 0.013427734375, |
|
"learning_rate": 4.4665718349928885e-06, |
|
"loss": 0.0001, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.7870210562651019, |
|
"grad_norm": 0.01318359375, |
|
"learning_rate": 4.438122332859175e-06, |
|
"loss": 0.0001, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.7884017949603037, |
|
"grad_norm": 0.162109375, |
|
"learning_rate": 4.409672830725463e-06, |
|
"loss": 0.0005, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.7897825336555057, |
|
"grad_norm": 0.0133056640625, |
|
"learning_rate": 4.38122332859175e-06, |
|
"loss": 0.0001, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.7911632723507076, |
|
"grad_norm": 0.013427734375, |
|
"learning_rate": 4.352773826458037e-06, |
|
"loss": 0.0001, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.7925440110459095, |
|
"grad_norm": 0.053466796875, |
|
"learning_rate": 4.324324324324325e-06, |
|
"loss": 0.0003, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.7939247497411115, |
|
"grad_norm": 0.01373291015625, |
|
"learning_rate": 4.295874822190612e-06, |
|
"loss": 0.0001, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.7953054884363134, |
|
"grad_norm": 0.0224609375, |
|
"learning_rate": 4.267425320056899e-06, |
|
"loss": 0.0002, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.7953054884363134, |
|
"eval_loss": 0.0002442169061396271, |
|
"eval_runtime": 582.9379, |
|
"eval_samples_per_second": 2.209, |
|
"eval_steps_per_second": 2.209, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.7966862271315154, |
|
"grad_norm": 0.01361083984375, |
|
"learning_rate": 4.2389758179231865e-06, |
|
"loss": 0.0001, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.7980669658267173, |
|
"grad_norm": 0.01953125, |
|
"learning_rate": 4.210526315789474e-06, |
|
"loss": 0.0002, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.7994477045219193, |
|
"grad_norm": 0.0201416015625, |
|
"learning_rate": 4.182076813655762e-06, |
|
"loss": 0.0002, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.8008284432171212, |
|
"grad_norm": 0.01336669921875, |
|
"learning_rate": 4.1536273115220484e-06, |
|
"loss": 0.0001, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.802209181912323, |
|
"grad_norm": 0.0130615234375, |
|
"learning_rate": 4.125177809388336e-06, |
|
"loss": 0.0001, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.803589920607525, |
|
"grad_norm": 0.0133056640625, |
|
"learning_rate": 4.096728307254624e-06, |
|
"loss": 0.0001, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.8049706593027269, |
|
"grad_norm": 0.01373291015625, |
|
"learning_rate": 4.06827880512091e-06, |
|
"loss": 0.0001, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.8063513979979289, |
|
"grad_norm": 0.03173828125, |
|
"learning_rate": 4.039829302987198e-06, |
|
"loss": 0.0002, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.8077321366931308, |
|
"grad_norm": 0.212890625, |
|
"learning_rate": 4.0113798008534855e-06, |
|
"loss": 0.001, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.8091128753883328, |
|
"grad_norm": 0.0439453125, |
|
"learning_rate": 3.982930298719773e-06, |
|
"loss": 0.0004, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.8104936140835347, |
|
"grad_norm": 0.04443359375, |
|
"learning_rate": 3.95448079658606e-06, |
|
"loss": 0.0003, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.8118743527787367, |
|
"grad_norm": 0.078125, |
|
"learning_rate": 3.926031294452347e-06, |
|
"loss": 0.0004, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.8132550914739386, |
|
"grad_norm": 0.0233154296875, |
|
"learning_rate": 3.897581792318635e-06, |
|
"loss": 0.0002, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.8146358301691405, |
|
"grad_norm": 0.01416015625, |
|
"learning_rate": 3.8691322901849225e-06, |
|
"loss": 0.0001, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.8160165688643424, |
|
"grad_norm": 0.044677734375, |
|
"learning_rate": 3.840682788051209e-06, |
|
"loss": 0.0002, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.8173973075595443, |
|
"grad_norm": 0.17578125, |
|
"learning_rate": 3.8122332859174964e-06, |
|
"loss": 0.0004, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.8187780462547463, |
|
"grad_norm": 0.255859375, |
|
"learning_rate": 3.7837837837837844e-06, |
|
"loss": 0.0003, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.8201587849499482, |
|
"grad_norm": 0.01416015625, |
|
"learning_rate": 3.7553342816500715e-06, |
|
"loss": 0.0001, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.8215395236451501, |
|
"grad_norm": 0.0137939453125, |
|
"learning_rate": 3.7268847795163587e-06, |
|
"loss": 0.0001, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.8229202623403521, |
|
"grad_norm": 0.01373291015625, |
|
"learning_rate": 3.698435277382646e-06, |
|
"loss": 0.0001, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.824301001035554, |
|
"grad_norm": 0.01385498046875, |
|
"learning_rate": 3.669985775248933e-06, |
|
"loss": 0.0001, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.825681739730756, |
|
"grad_norm": 0.0133056640625, |
|
"learning_rate": 3.641536273115221e-06, |
|
"loss": 0.0001, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.8270624784259579, |
|
"grad_norm": 0.04248046875, |
|
"learning_rate": 3.613086770981508e-06, |
|
"loss": 0.0003, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.8284432171211599, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 3.5846372688477953e-06, |
|
"loss": 0.0013, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8298239558163617, |
|
"grad_norm": 0.0145263671875, |
|
"learning_rate": 3.5561877667140825e-06, |
|
"loss": 0.0001, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.8312046945115636, |
|
"grad_norm": 0.043212890625, |
|
"learning_rate": 3.5277382645803705e-06, |
|
"loss": 0.0004, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.8325854332067656, |
|
"grad_norm": 0.02099609375, |
|
"learning_rate": 3.4992887624466576e-06, |
|
"loss": 0.0001, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.8339661719019675, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 3.4708392603129448e-06, |
|
"loss": 0.0004, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.8353469105971695, |
|
"grad_norm": 0.045166015625, |
|
"learning_rate": 3.442389758179232e-06, |
|
"loss": 0.0004, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.8367276492923714, |
|
"grad_norm": 0.080078125, |
|
"learning_rate": 3.413940256045519e-06, |
|
"loss": 0.0002, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.8381083879875734, |
|
"grad_norm": 0.01416015625, |
|
"learning_rate": 3.385490753911807e-06, |
|
"loss": 0.0001, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.8394891266827753, |
|
"grad_norm": 0.01336669921875, |
|
"learning_rate": 3.3570412517780942e-06, |
|
"loss": 0.0001, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.8408698653779773, |
|
"grad_norm": 0.048828125, |
|
"learning_rate": 3.3285917496443814e-06, |
|
"loss": 0.0002, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.8422506040731792, |
|
"grad_norm": 0.016357421875, |
|
"learning_rate": 3.3001422475106685e-06, |
|
"loss": 0.0001, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.843631342768381, |
|
"grad_norm": 0.146484375, |
|
"learning_rate": 3.271692745376956e-06, |
|
"loss": 0.0008, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.845012081463583, |
|
"grad_norm": 0.01336669921875, |
|
"learning_rate": 3.2432432432432437e-06, |
|
"loss": 0.0001, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.8463928201587849, |
|
"grad_norm": 0.01336669921875, |
|
"learning_rate": 3.214793741109531e-06, |
|
"loss": 0.0001, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.8477735588539869, |
|
"grad_norm": 0.072265625, |
|
"learning_rate": 3.186344238975818e-06, |
|
"loss": 0.0002, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.8491542975491888, |
|
"grad_norm": 0.0133056640625, |
|
"learning_rate": 3.157894736842105e-06, |
|
"loss": 0.0001, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.8505350362443908, |
|
"grad_norm": 0.01397705078125, |
|
"learning_rate": 3.129445234708393e-06, |
|
"loss": 0.0001, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.8519157749395927, |
|
"grad_norm": 0.05615234375, |
|
"learning_rate": 3.1009957325746803e-06, |
|
"loss": 0.0002, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.8532965136347946, |
|
"grad_norm": 0.0257568359375, |
|
"learning_rate": 3.0725462304409675e-06, |
|
"loss": 0.0001, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.8546772523299966, |
|
"grad_norm": 0.21875, |
|
"learning_rate": 3.0440967283072546e-06, |
|
"loss": 0.0014, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.8560579910251985, |
|
"grad_norm": 0.013671875, |
|
"learning_rate": 3.015647226173542e-06, |
|
"loss": 0.0001, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.8574387297204004, |
|
"grad_norm": 0.08203125, |
|
"learning_rate": 2.9871977240398298e-06, |
|
"loss": 0.0003, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.8588194684156023, |
|
"grad_norm": 0.0135498046875, |
|
"learning_rate": 2.958748221906117e-06, |
|
"loss": 0.0001, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.8602002071108042, |
|
"grad_norm": 0.053955078125, |
|
"learning_rate": 2.930298719772404e-06, |
|
"loss": 0.0003, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.8615809458060062, |
|
"grad_norm": 0.01348876953125, |
|
"learning_rate": 2.9018492176386912e-06, |
|
"loss": 0.0001, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.8629616845012081, |
|
"grad_norm": 0.01385498046875, |
|
"learning_rate": 2.873399715504979e-06, |
|
"loss": 0.0001, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.8643424231964101, |
|
"grad_norm": 0.01348876953125, |
|
"learning_rate": 2.8449502133712664e-06, |
|
"loss": 0.0001, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.865723161891612, |
|
"grad_norm": 0.01409912109375, |
|
"learning_rate": 2.8165007112375536e-06, |
|
"loss": 0.0001, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.867103900586814, |
|
"grad_norm": 0.018798828125, |
|
"learning_rate": 2.7880512091038407e-06, |
|
"loss": 0.0001, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.8684846392820159, |
|
"grad_norm": 0.01397705078125, |
|
"learning_rate": 2.7596017069701283e-06, |
|
"loss": 0.0001, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.8698653779772179, |
|
"grad_norm": 0.01348876953125, |
|
"learning_rate": 2.731152204836416e-06, |
|
"loss": 0.0001, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.8712461166724197, |
|
"grad_norm": 0.0133056640625, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 0.0001, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.8726268553676216, |
|
"grad_norm": 0.012939453125, |
|
"learning_rate": 2.67425320056899e-06, |
|
"loss": 0.0001, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.8740075940628236, |
|
"grad_norm": 0.0135498046875, |
|
"learning_rate": 2.6458036984352773e-06, |
|
"loss": 0.0001, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.8753883327580255, |
|
"grad_norm": 0.01300048828125, |
|
"learning_rate": 2.617354196301565e-06, |
|
"loss": 0.0001, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.8767690714532275, |
|
"grad_norm": 0.16015625, |
|
"learning_rate": 2.5889046941678525e-06, |
|
"loss": 0.0004, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.8781498101484294, |
|
"grad_norm": 0.01336669921875, |
|
"learning_rate": 2.5604551920341396e-06, |
|
"loss": 0.0001, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.8795305488436314, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 2.5320056899004268e-06, |
|
"loss": 0.0006, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.8809112875388333, |
|
"grad_norm": 0.0189208984375, |
|
"learning_rate": 2.5035561877667144e-06, |
|
"loss": 0.0002, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.8822920262340352, |
|
"grad_norm": 0.06494140625, |
|
"learning_rate": 2.4751066856330015e-06, |
|
"loss": 0.0003, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.8836727649292372, |
|
"grad_norm": 0.01324462890625, |
|
"learning_rate": 2.4466571834992887e-06, |
|
"loss": 0.0001, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.885053503624439, |
|
"grad_norm": 0.19921875, |
|
"learning_rate": 2.4182076813655762e-06, |
|
"loss": 0.0013, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.886434242319641, |
|
"grad_norm": 0.027099609375, |
|
"learning_rate": 2.3897581792318634e-06, |
|
"loss": 0.0002, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.8878149810148429, |
|
"grad_norm": 0.1884765625, |
|
"learning_rate": 2.361308677098151e-06, |
|
"loss": 0.0003, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.8891957197100449, |
|
"grad_norm": 0.279296875, |
|
"learning_rate": 2.332859174964438e-06, |
|
"loss": 0.0014, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.8905764584052468, |
|
"grad_norm": 0.0732421875, |
|
"learning_rate": 2.3044096728307257e-06, |
|
"loss": 0.0002, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.8919571971004487, |
|
"grad_norm": 0.01361083984375, |
|
"learning_rate": 2.275960170697013e-06, |
|
"loss": 0.0001, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.8933379357956507, |
|
"grad_norm": 0.01318359375, |
|
"learning_rate": 2.2475106685633004e-06, |
|
"loss": 0.0001, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.8947186744908526, |
|
"grad_norm": 0.10302734375, |
|
"learning_rate": 2.2190611664295876e-06, |
|
"loss": 0.0005, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.8947186744908526, |
|
"eval_loss": 0.00023728572705294937, |
|
"eval_runtime": 582.6042, |
|
"eval_samples_per_second": 2.211, |
|
"eval_steps_per_second": 2.211, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.8960994131860546, |
|
"grad_norm": 0.0244140625, |
|
"learning_rate": 2.190611664295875e-06, |
|
"loss": 0.0002, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.8974801518812565, |
|
"grad_norm": 0.0218505859375, |
|
"learning_rate": 2.1621621621621623e-06, |
|
"loss": 0.0002, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.8988608905764585, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 2.1337126600284495e-06, |
|
"loss": 0.0004, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.9002416292716603, |
|
"grad_norm": 0.0137939453125, |
|
"learning_rate": 2.105263157894737e-06, |
|
"loss": 0.0001, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.9016223679668622, |
|
"grad_norm": 0.0196533203125, |
|
"learning_rate": 2.0768136557610242e-06, |
|
"loss": 0.0002, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.9030031066620642, |
|
"grad_norm": 0.0133056640625, |
|
"learning_rate": 2.048364153627312e-06, |
|
"loss": 0.0001, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.9043838453572661, |
|
"grad_norm": 0.01422119140625, |
|
"learning_rate": 2.019914651493599e-06, |
|
"loss": 0.0001, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.9057645840524681, |
|
"grad_norm": 0.1435546875, |
|
"learning_rate": 1.9914651493598865e-06, |
|
"loss": 0.0007, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.90714532274767, |
|
"grad_norm": 0.01531982421875, |
|
"learning_rate": 1.9630156472261737e-06, |
|
"loss": 0.0001, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.908526061442872, |
|
"grad_norm": 0.07470703125, |
|
"learning_rate": 1.9345661450924613e-06, |
|
"loss": 0.0003, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.9099068001380739, |
|
"grad_norm": 0.023681640625, |
|
"learning_rate": 1.9061166429587482e-06, |
|
"loss": 0.0002, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.9112875388332758, |
|
"grad_norm": 0.052001953125, |
|
"learning_rate": 1.8776671408250358e-06, |
|
"loss": 0.0002, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9126682775284778, |
|
"grad_norm": 0.045654296875, |
|
"learning_rate": 1.849217638691323e-06, |
|
"loss": 0.0003, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.9140490162236796, |
|
"grad_norm": 0.1142578125, |
|
"learning_rate": 1.8207681365576105e-06, |
|
"loss": 0.0004, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.9154297549188816, |
|
"grad_norm": 0.01324462890625, |
|
"learning_rate": 1.7923186344238977e-06, |
|
"loss": 0.0001, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.9168104936140835, |
|
"grad_norm": 0.01348876953125, |
|
"learning_rate": 1.7638691322901852e-06, |
|
"loss": 0.0001, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.9181912323092855, |
|
"grad_norm": 0.0133056640625, |
|
"learning_rate": 1.7354196301564724e-06, |
|
"loss": 0.0001, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.9195719710044874, |
|
"grad_norm": 0.01544189453125, |
|
"learning_rate": 1.7069701280227595e-06, |
|
"loss": 0.0001, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.9209527096996893, |
|
"grad_norm": 0.01507568359375, |
|
"learning_rate": 1.6785206258890471e-06, |
|
"loss": 0.0001, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.9223334483948913, |
|
"grad_norm": 0.0135498046875, |
|
"learning_rate": 1.6500711237553343e-06, |
|
"loss": 0.0001, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.9237141870900932, |
|
"grad_norm": 0.0142822265625, |
|
"learning_rate": 1.6216216216216219e-06, |
|
"loss": 0.0001, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.9250949257852952, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 1.593172119487909e-06, |
|
"loss": 0.0012, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.9264756644804971, |
|
"grad_norm": 0.126953125, |
|
"learning_rate": 1.5647226173541966e-06, |
|
"loss": 0.0003, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.927856403175699, |
|
"grad_norm": 0.01312255859375, |
|
"learning_rate": 1.5362731152204837e-06, |
|
"loss": 0.0001, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.9292371418709009, |
|
"grad_norm": 0.037109375, |
|
"learning_rate": 1.507823613086771e-06, |
|
"loss": 0.0002, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.9306178805661028, |
|
"grad_norm": 0.0225830078125, |
|
"learning_rate": 1.4793741109530585e-06, |
|
"loss": 0.0001, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.9319986192613048, |
|
"grad_norm": 0.01324462890625, |
|
"learning_rate": 1.4509246088193456e-06, |
|
"loss": 0.0001, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.9333793579565067, |
|
"grad_norm": 0.01416015625, |
|
"learning_rate": 1.4224751066856332e-06, |
|
"loss": 0.0001, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.9347600966517087, |
|
"grad_norm": 0.01611328125, |
|
"learning_rate": 1.3940256045519204e-06, |
|
"loss": 0.0001, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.9361408353469106, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 1.365576102418208e-06, |
|
"loss": 0.0012, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.9375215740421126, |
|
"grad_norm": 0.0157470703125, |
|
"learning_rate": 1.337126600284495e-06, |
|
"loss": 0.0001, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.9389023127373145, |
|
"grad_norm": 0.25390625, |
|
"learning_rate": 1.3086770981507825e-06, |
|
"loss": 0.0017, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.9402830514325164, |
|
"grad_norm": 0.130859375, |
|
"learning_rate": 1.2802275960170698e-06, |
|
"loss": 0.0004, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.9416637901277183, |
|
"grad_norm": 0.0166015625, |
|
"learning_rate": 1.2517780938833572e-06, |
|
"loss": 0.0001, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.9430445288229202, |
|
"grad_norm": 0.01458740234375, |
|
"learning_rate": 1.2233285917496443e-06, |
|
"loss": 0.0001, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.9444252675181222, |
|
"grad_norm": 0.01373291015625, |
|
"learning_rate": 1.1948790896159317e-06, |
|
"loss": 0.0001, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.9458060062133241, |
|
"grad_norm": 0.01336669921875, |
|
"learning_rate": 1.166429587482219e-06, |
|
"loss": 0.0001, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.9471867449085261, |
|
"grad_norm": 0.0213623046875, |
|
"learning_rate": 1.1379800853485064e-06, |
|
"loss": 0.0001, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.948567483603728, |
|
"grad_norm": 0.038818359375, |
|
"learning_rate": 1.1095305832147938e-06, |
|
"loss": 0.0002, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.94994822229893, |
|
"grad_norm": 0.0157470703125, |
|
"learning_rate": 1.0810810810810812e-06, |
|
"loss": 0.0001, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.9513289609941319, |
|
"grad_norm": 0.01373291015625, |
|
"learning_rate": 1.0526315789473685e-06, |
|
"loss": 0.0001, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.9527096996893338, |
|
"grad_norm": 0.0137939453125, |
|
"learning_rate": 1.024182076813656e-06, |
|
"loss": 0.0001, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.9540904383845358, |
|
"grad_norm": 0.05078125, |
|
"learning_rate": 9.957325746799433e-07, |
|
"loss": 0.0002, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.9554711770797376, |
|
"grad_norm": 0.015869140625, |
|
"learning_rate": 9.672830725462306e-07, |
|
"loss": 0.0002, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.9568519157749396, |
|
"grad_norm": 0.042236328125, |
|
"learning_rate": 9.388335704125179e-07, |
|
"loss": 0.0002, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.9582326544701415, |
|
"grad_norm": 0.01287841796875, |
|
"learning_rate": 9.103840682788053e-07, |
|
"loss": 0.0001, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.9596133931653434, |
|
"grad_norm": 0.01422119140625, |
|
"learning_rate": 8.819345661450926e-07, |
|
"loss": 0.0001, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.9609941318605454, |
|
"grad_norm": 0.01513671875, |
|
"learning_rate": 8.534850640113798e-07, |
|
"loss": 0.0001, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.9623748705557473, |
|
"grad_norm": 0.0771484375, |
|
"learning_rate": 8.250355618776671e-07, |
|
"loss": 0.0003, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.9637556092509493, |
|
"grad_norm": 0.01300048828125, |
|
"learning_rate": 7.965860597439545e-07, |
|
"loss": 0.0001, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.9651363479461512, |
|
"grad_norm": 0.01324462890625, |
|
"learning_rate": 7.681365576102419e-07, |
|
"loss": 0.0001, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.9665170866413532, |
|
"grad_norm": 0.01336669921875, |
|
"learning_rate": 7.396870554765292e-07, |
|
"loss": 0.0001, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.9678978253365551, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 7.112375533428166e-07, |
|
"loss": 0.0002, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.9692785640317569, |
|
"grad_norm": 0.020751953125, |
|
"learning_rate": 6.82788051209104e-07, |
|
"loss": 0.0002, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.9706593027269589, |
|
"grad_norm": 0.037841796875, |
|
"learning_rate": 6.543385490753912e-07, |
|
"loss": 0.0001, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.9720400414221608, |
|
"grad_norm": 0.01483154296875, |
|
"learning_rate": 6.258890469416786e-07, |
|
"loss": 0.0001, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.9734207801173628, |
|
"grad_norm": 0.0208740234375, |
|
"learning_rate": 5.974395448079659e-07, |
|
"loss": 0.0001, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.9748015188125647, |
|
"grad_norm": 0.01300048828125, |
|
"learning_rate": 5.689900426742532e-07, |
|
"loss": 0.0001, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.9761822575077667, |
|
"grad_norm": 0.01409912109375, |
|
"learning_rate": 5.405405405405406e-07, |
|
"loss": 0.0001, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.9775629962029686, |
|
"grad_norm": 0.0137939453125, |
|
"learning_rate": 5.12091038406828e-07, |
|
"loss": 0.0001, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.9789437348981705, |
|
"grad_norm": 0.044921875, |
|
"learning_rate": 4.836415362731153e-07, |
|
"loss": 0.0002, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.9803244735933725, |
|
"grad_norm": 0.1357421875, |
|
"learning_rate": 4.551920341394026e-07, |
|
"loss": 0.0003, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.9817052122885744, |
|
"grad_norm": 0.01422119140625, |
|
"learning_rate": 4.267425320056899e-07, |
|
"loss": 0.0001, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.9830859509837763, |
|
"grad_norm": 0.040283203125, |
|
"learning_rate": 3.9829302987197725e-07, |
|
"loss": 0.0003, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.9844666896789782, |
|
"grad_norm": 0.02734375, |
|
"learning_rate": 3.698435277382646e-07, |
|
"loss": 0.0002, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.9858474283741802, |
|
"grad_norm": 0.01422119140625, |
|
"learning_rate": 3.41394025604552e-07, |
|
"loss": 0.0001, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.9872281670693821, |
|
"grad_norm": 0.091796875, |
|
"learning_rate": 3.129445234708393e-07, |
|
"loss": 0.0006, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.988608905764584, |
|
"grad_norm": 0.01611328125, |
|
"learning_rate": 2.844950213371266e-07, |
|
"loss": 0.0001, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.989989644459786, |
|
"grad_norm": 0.054443359375, |
|
"learning_rate": 2.56045519203414e-07, |
|
"loss": 0.0003, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.9913703831549879, |
|
"grad_norm": 0.014892578125, |
|
"learning_rate": 2.275960170697013e-07, |
|
"loss": 0.0001, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.9927511218501899, |
|
"grad_norm": 0.0137939453125, |
|
"learning_rate": 1.9914651493598863e-07, |
|
"loss": 0.0001, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.9941318605453918, |
|
"grad_norm": 0.01361083984375, |
|
"learning_rate": 1.70697012802276e-07, |
|
"loss": 0.0001, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.9941318605453918, |
|
"eval_loss": 0.00024354300694540143, |
|
"eval_runtime": 582.0573, |
|
"eval_samples_per_second": 2.213, |
|
"eval_steps_per_second": 2.213, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.9955125992405938, |
|
"grad_norm": 0.013671875, |
|
"learning_rate": 1.422475106685633e-07, |
|
"loss": 0.0001, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.9968933379357956, |
|
"grad_norm": 0.0133056640625, |
|
"learning_rate": 1.1379800853485066e-07, |
|
"loss": 0.0001, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.9982740766309975, |
|
"grad_norm": 0.01318359375, |
|
"learning_rate": 8.5348506401138e-08, |
|
"loss": 0.0001, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.9996548153261995, |
|
"grad_norm": 0.0211181640625, |
|
"learning_rate": 5.689900426742533e-08, |
|
"loss": 0.0001, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.003204345703125, |
|
"learning_rate": 2.8449502133712664e-08, |
|
"loss": 0.0, |
|
"step": 725 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 725, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 72, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.298593420780503e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|