|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3704389701796629, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0009999978838190456, |
|
"loss": 2.9794, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0009999915352940948, |
|
"loss": 2.3885, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000999980954478887, |
|
"loss": 2.3057, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000999966141462985, |
|
"loss": 2.2692, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000999947096371777, |
|
"loss": 2.2576, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0009999238193664748, |
|
"loss": 2.2388, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0009998963106441117, |
|
"loss": 2.2523, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0009998645704375414, |
|
"loss": 2.218, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000999828599015436, |
|
"loss": 2.2457, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0009997883966822835, |
|
"loss": 2.198, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0009997439637783859, |
|
"loss": 2.2013, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000999695300679855, |
|
"loss": 2.1765, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0009996424077986109, |
|
"loss": 2.1741, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000999585285582377, |
|
"loss": 2.1898, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009995239345146772, |
|
"loss": 2.1466, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009994583551148314, |
|
"loss": 2.1423, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009993885479379506, |
|
"loss": 2.1451, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000999314513574934, |
|
"loss": 2.202, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009992362526524616, |
|
"loss": 2.1208, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009991537658329906, |
|
"loss": 2.1591, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000999067053814749, |
|
"loss": 2.1788, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009989761173317304, |
|
"loss": 2.147, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000998880957153687, |
|
"loss": 2.1249, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000998781574086123, |
|
"loss": 2.165, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000998677968970289, |
|
"loss": 2.1428, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009985701426831735, |
|
"loss": 2.1384, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009984580961374964, |
|
"loss": 2.1585, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009983418302817008, |
|
"loss": 2.1156, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009982213460999448, |
|
"loss": 2.0811, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000998096644612094, |
|
"loss": 2.1081, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009979677268737118, |
|
"loss": 2.1246, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009978345939760515, |
|
"loss": 2.1229, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000997697247046046, |
|
"loss": 2.1033, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009975556872462994, |
|
"loss": 2.0931, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000997409915775076, |
|
"loss": 2.1206, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009972599338662915, |
|
"loss": 2.0537, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009971057427895012, |
|
"loss": 2.0762, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009969473438498897, |
|
"loss": 2.0883, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009967847383882604, |
|
"loss": 2.1174, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009966179277810239, |
|
"loss": 2.1111, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009964469134401855, |
|
"loss": 2.1288, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009962716968133346, |
|
"loss": 2.0967, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009960922793836318, |
|
"loss": 2.1216, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009959086626697955, |
|
"loss": 2.0924, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009957208482260908, |
|
"loss": 2.0809, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009955288376423152, |
|
"loss": 2.1082, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009953326325437852, |
|
"loss": 2.0885, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009951322345913224, |
|
"loss": 2.1133, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009949276454812408, |
|
"loss": 2.0844, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00099471886694533, |
|
"loss": 2.0796, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009945059007508434, |
|
"loss": 2.1255, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009942887487004804, |
|
"loss": 2.0913, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009940674126323733, |
|
"loss": 2.1003, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009938418944200709, |
|
"loss": 2.0541, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009936121959725223, |
|
"loss": 2.0523, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009933783192340618, |
|
"loss": 2.1225, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009931402661843911, |
|
"loss": 2.0446, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000992898038838564, |
|
"loss": 2.0921, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009926516392469674, |
|
"loss": 2.1081, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009924010694953064, |
|
"loss": 2.0734, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009921463317045843, |
|
"loss": 2.0652, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009918874280310862, |
|
"loss": 2.0818, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009916243606663605, |
|
"loss": 2.0776, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009913571318371994, |
|
"loss": 2.1025, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009910857438056215, |
|
"loss": 2.066, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009908101988688512, |
|
"loss": 2.0575, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009905304993593008, |
|
"loss": 2.1269, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009902466476445486, |
|
"loss": 2.0518, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009899586461273218, |
|
"loss": 2.0698, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000989666497245473, |
|
"loss": 2.0988, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009893702034719624, |
|
"loss": 2.0986, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009890697673148345, |
|
"loss": 2.0237, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009887651913171986, |
|
"loss": 2.0027, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009884564780572064, |
|
"loss": 2.0563, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009881436301480305, |
|
"loss": 2.0624, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000987826650237842, |
|
"loss": 2.0926, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000987505541009788, |
|
"loss": 2.0585, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009871803051819696, |
|
"loss": 2.0494, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009868509455074183, |
|
"loss": 2.0106, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009865174647740729, |
|
"loss": 2.0861, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009861798658047556, |
|
"loss": 2.0478, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009858381514571484, |
|
"loss": 2.0469, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000985492324623769, |
|
"loss": 2.0671, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009851423882319458, |
|
"loss": 2.0808, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009847883452437937, |
|
"loss": 2.0331, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009844301986561893, |
|
"loss": 2.0295, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000984067951500744, |
|
"loss": 2.0873, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00098370160684378, |
|
"loss": 2.1038, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009833311677863042, |
|
"loss": 2.0337, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009829566374639801, |
|
"loss": 2.0407, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009825780190471042, |
|
"loss": 2.1049, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000982195315740576, |
|
"loss": 2.0475, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009818085307838741, |
|
"loss": 2.0624, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000981417667451026, |
|
"loss": 2.0714, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009810227290505816, |
|
"loss": 2.0947, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009806237189255859, |
|
"loss": 2.0591, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009802206404535489, |
|
"loss": 2.0301, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.000979813497046419, |
|
"loss": 2.0556, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009794022921505523, |
|
"loss": 2.0753, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000978987029246685, |
|
"loss": 2.0898, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009785677118499029, |
|
"loss": 2.0464, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009781443435096116, |
|
"loss": 2.0828, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009777169278095074, |
|
"loss": 2.1137, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009772854683675462, |
|
"loss": 2.0167, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000976849968835913, |
|
"loss": 2.07, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009764104329009909, |
|
"loss": 2.0409, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009759668642833304, |
|
"loss": 2.015, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009755192667376173, |
|
"loss": 2.0175, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009750676440526411, |
|
"loss": 2.0773, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009746120000512632, |
|
"loss": 2.0245, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009741523385903841, |
|
"loss": 2.094, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009736886635609112, |
|
"loss": 2.0506, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009732209788877258, |
|
"loss": 2.0287, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009727492885296489, |
|
"loss": 2.1162, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009722735964794099, |
|
"loss": 2.1096, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009717939067636099, |
|
"loss": 2.0621, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009713102234426903, |
|
"loss": 2.0796, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009708225506108965, |
|
"loss": 2.0565, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009703308923962447, |
|
"loss": 2.0669, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009698352529604857, |
|
"loss": 2.0638, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009693356364990705, |
|
"loss": 2.0358, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009688320472411143, |
|
"loss": 2.0859, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009683244894493613, |
|
"loss": 2.0932, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009678129674201479, |
|
"loss": 2.0129, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009672974854833669, |
|
"loss": 2.055, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009667780480024304, |
|
"loss": 2.0665, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009662546593742334, |
|
"loss": 2.0488, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009657273240291159, |
|
"loss": 2.0543, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009651960464308261, |
|
"loss": 2.0418, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009646608310764819, |
|
"loss": 2.033, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009641216824965338, |
|
"loss": 2.1034, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009635786052547253, |
|
"loss": 2.0866, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0009630316039480556, |
|
"loss": 2.0607, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0009624806832067394, |
|
"loss": 2.0457, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0009619258476941686, |
|
"loss": 2.032, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000961367102106873, |
|
"loss": 2.0519, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0009608044511744791, |
|
"loss": 2.0449, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0009602378996596721, |
|
"loss": 1.9949, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0009596674523581539, |
|
"loss": 2.0394, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0009590931140986035, |
|
"loss": 2.0386, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0009585148897426354, |
|
"loss": 2.0254, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0009579327841847593, |
|
"loss": 2.0238, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.000957346802352338, |
|
"loss": 2.0509, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0009567569492055456, |
|
"loss": 2.0004, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0009561632297373263, |
|
"loss": 2.0203, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0009555656489733513, |
|
"loss": 2.0182, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.000954964211971977, |
|
"loss": 1.9754, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0009543589238242012, |
|
"loss": 2.0374, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000953749789653621, |
|
"loss": 2.0367, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000953136814616389, |
|
"loss": 2.0866, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0009525200039011694, |
|
"loss": 2.0083, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0009518993627290948, |
|
"loss": 2.0525, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0009512748963537212, |
|
"loss": 2.0636, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000950646610060984, |
|
"loss": 2.0522, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0009500145091691532, |
|
"loss": 2.05, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0009493785990287882, |
|
"loss": 1.9887, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0009487388850226926, |
|
"loss": 2.0309, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000948095372565869, |
|
"loss": 1.9954, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0009474480671054726, |
|
"loss": 2.0078, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009467969741207652, |
|
"loss": 2.0395, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009461420991230693, |
|
"loss": 2.0415, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009454834476557207, |
|
"loss": 2.0308, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009448210252940223, |
|
"loss": 2.0826, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009441548376451963, |
|
"loss": 2.0424, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009434848903483373, |
|
"loss": 2.0125, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009428111890743639, |
|
"loss": 2.0139, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009421337395259717, |
|
"loss": 2.0682, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009414525474375837, |
|
"loss": 2.0577, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009407676185753029, |
|
"loss": 2.0262, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009400789587368632, |
|
"loss": 2.0515, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0009393865737515794, |
|
"loss": 2.0398, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0009386904694802997, |
|
"loss": 2.0146, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0009379906518153543, |
|
"loss": 2.0438, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0009372871266805063, |
|
"loss": 2.0377, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000936579900030902, |
|
"loss": 2.0789, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0009358689778530193, |
|
"loss": 2.0201, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0009351543661646185, |
|
"loss": 2.0114, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0009344360710146898, |
|
"loss": 2.0242, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0009337140984834034, |
|
"loss": 2.0436, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0009329884546820572, |
|
"loss": 2.0452, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.000932259145753026, |
|
"loss": 2.0254, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0009315261778697083, |
|
"loss": 2.0409, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0009307895572364746, |
|
"loss": 2.0301, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0009300492900886154, |
|
"loss": 2.0078, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0009293053826922873, |
|
"loss": 1.9851, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0009285578413444613, |
|
"loss": 1.9947, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009278066723728682, |
|
"loss": 2.0331, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009270518821359461, |
|
"loss": 2.0058, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009262934770227858, |
|
"loss": 2.05, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009255314634530771, |
|
"loss": 2.0444, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009247658478770543, |
|
"loss": 2.0045, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000923996636775442, |
|
"loss": 2.0211, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0009232238366593997, |
|
"loss": 2.0124, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0009224474540704671, |
|
"loss": 2.0067, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0009216674955805079, |
|
"loss": 2.0247, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0009208839677916557, |
|
"loss": 2.0314, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0009200968773362568, |
|
"loss": 2.067, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0009193062308768145, |
|
"loss": 2.0168, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0009185120351059326, |
|
"loss": 2.0649, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0009177142967462591, |
|
"loss": 2.0208, |
|
"step": 2000 |
|
} |
|
], |
|
"max_steps": 10798, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.8983090426321306e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|