|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.1113169105389886, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0009999978838190456, |
|
"loss": 2.9794, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0009999915352940948, |
|
"loss": 2.3885, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000999980954478887, |
|
"loss": 2.3057, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000999966141462985, |
|
"loss": 2.2692, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000999947096371777, |
|
"loss": 2.2576, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0009999238193664748, |
|
"loss": 2.2388, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0009998963106441117, |
|
"loss": 2.2523, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0009998645704375414, |
|
"loss": 2.218, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000999828599015436, |
|
"loss": 2.2457, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0009997883966822835, |
|
"loss": 2.198, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0009997439637783859, |
|
"loss": 2.2013, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000999695300679855, |
|
"loss": 2.1765, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0009996424077986109, |
|
"loss": 2.1741, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000999585285582377, |
|
"loss": 2.1898, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009995239345146772, |
|
"loss": 2.1466, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009994583551148314, |
|
"loss": 2.1423, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0009993885479379506, |
|
"loss": 2.1451, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000999314513574934, |
|
"loss": 2.202, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009992362526524616, |
|
"loss": 2.1208, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009991537658329906, |
|
"loss": 2.1591, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000999067053814749, |
|
"loss": 2.1788, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0009989761173317304, |
|
"loss": 2.147, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000998880957153687, |
|
"loss": 2.1249, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000998781574086123, |
|
"loss": 2.165, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000998677968970289, |
|
"loss": 2.1428, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009985701426831735, |
|
"loss": 2.1384, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009984580961374964, |
|
"loss": 2.1585, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009983418302817008, |
|
"loss": 2.1156, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009982213460999448, |
|
"loss": 2.0811, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000998096644612094, |
|
"loss": 2.1081, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009979677268737118, |
|
"loss": 2.1246, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009978345939760515, |
|
"loss": 2.1229, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000997697247046046, |
|
"loss": 2.1033, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009975556872462994, |
|
"loss": 2.0931, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000997409915775076, |
|
"loss": 2.1206, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009972599338662915, |
|
"loss": 2.0537, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009971057427895012, |
|
"loss": 2.0762, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009969473438498897, |
|
"loss": 2.0883, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009967847383882604, |
|
"loss": 2.1174, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0009966179277810239, |
|
"loss": 2.1111, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009964469134401855, |
|
"loss": 2.1288, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009962716968133346, |
|
"loss": 2.0967, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009960922793836318, |
|
"loss": 2.1216, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009959086626697955, |
|
"loss": 2.0924, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009957208482260908, |
|
"loss": 2.0809, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009955288376423152, |
|
"loss": 2.1082, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009953326325437852, |
|
"loss": 2.0885, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009951322345913224, |
|
"loss": 2.1133, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009949276454812408, |
|
"loss": 2.0844, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00099471886694533, |
|
"loss": 2.0796, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009945059007508434, |
|
"loss": 2.1255, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009942887487004804, |
|
"loss": 2.0913, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009940674126323733, |
|
"loss": 2.1003, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009938418944200709, |
|
"loss": 2.0541, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009936121959725223, |
|
"loss": 2.0523, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0009933783192340618, |
|
"loss": 2.1225, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009931402661843911, |
|
"loss": 2.0446, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000992898038838564, |
|
"loss": 2.0921, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009926516392469674, |
|
"loss": 2.1081, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009924010694953064, |
|
"loss": 2.0734, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009921463317045843, |
|
"loss": 2.0652, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009918874280310862, |
|
"loss": 2.0818, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009916243606663605, |
|
"loss": 2.0776, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009913571318371994, |
|
"loss": 2.1025, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009910857438056215, |
|
"loss": 2.066, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009908101988688512, |
|
"loss": 2.0575, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009905304993593008, |
|
"loss": 2.1269, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009902466476445486, |
|
"loss": 2.0518, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009899586461273218, |
|
"loss": 2.0698, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000989666497245473, |
|
"loss": 2.0988, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009893702034719624, |
|
"loss": 2.0986, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0009890697673148345, |
|
"loss": 2.0237, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009887651913171986, |
|
"loss": 2.0027, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009884564780572064, |
|
"loss": 2.0563, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009881436301480305, |
|
"loss": 2.0624, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000987826650237842, |
|
"loss": 2.0926, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000987505541009788, |
|
"loss": 2.0585, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009871803051819696, |
|
"loss": 2.0494, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009868509455074183, |
|
"loss": 2.0106, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009865174647740729, |
|
"loss": 2.0861, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009861798658047556, |
|
"loss": 2.0478, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009858381514571484, |
|
"loss": 2.0469, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000985492324623769, |
|
"loss": 2.0671, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009851423882319458, |
|
"loss": 2.0808, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009847883452437937, |
|
"loss": 2.0331, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009844301986561893, |
|
"loss": 2.0295, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000984067951500744, |
|
"loss": 2.0873, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00098370160684378, |
|
"loss": 2.1038, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0009833311677863042, |
|
"loss": 2.0337, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009829566374639801, |
|
"loss": 2.0407, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009825780190471042, |
|
"loss": 2.1049, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000982195315740576, |
|
"loss": 2.0475, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009818085307838741, |
|
"loss": 2.0624, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000981417667451026, |
|
"loss": 2.0714, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009810227290505816, |
|
"loss": 2.0947, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009806237189255859, |
|
"loss": 2.0591, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009802206404535489, |
|
"loss": 2.0301, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.000979813497046419, |
|
"loss": 2.0556, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009794022921505523, |
|
"loss": 2.0753, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000978987029246685, |
|
"loss": 2.0898, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009785677118499029, |
|
"loss": 2.0464, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009781443435096116, |
|
"loss": 2.0828, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009777169278095074, |
|
"loss": 2.1137, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0009772854683675462, |
|
"loss": 2.0167, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.000976849968835913, |
|
"loss": 2.07, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009764104329009909, |
|
"loss": 2.0409, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009759668642833304, |
|
"loss": 2.015, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009755192667376173, |
|
"loss": 2.0175, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009750676440526411, |
|
"loss": 2.0773, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009746120000512632, |
|
"loss": 2.0245, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009741523385903841, |
|
"loss": 2.094, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009736886635609112, |
|
"loss": 2.0506, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009732209788877258, |
|
"loss": 2.0287, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009727492885296489, |
|
"loss": 2.1162, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009722735964794099, |
|
"loss": 2.1096, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009717939067636099, |
|
"loss": 2.0621, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009713102234426903, |
|
"loss": 2.0796, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009708225506108965, |
|
"loss": 2.0565, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009703308923962447, |
|
"loss": 2.0669, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009698352529604857, |
|
"loss": 2.0638, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0009693356364990705, |
|
"loss": 2.0358, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009688320472411143, |
|
"loss": 2.0859, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009683244894493613, |
|
"loss": 2.0932, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009678129674201479, |
|
"loss": 2.0129, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009672974854833669, |
|
"loss": 2.055, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009667780480024304, |
|
"loss": 2.0665, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009662546593742334, |
|
"loss": 2.0488, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009657273240291159, |
|
"loss": 2.0543, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009651960464308261, |
|
"loss": 2.0418, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009646608310764819, |
|
"loss": 2.033, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009641216824965338, |
|
"loss": 2.1034, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009635786052547253, |
|
"loss": 2.0866, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0009630316039480556, |
|
"loss": 2.0607, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0009624806832067394, |
|
"loss": 2.0457, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0009619258476941686, |
|
"loss": 2.032, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000961367102106873, |
|
"loss": 2.0519, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0009608044511744791, |
|
"loss": 2.0449, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0009602378996596721, |
|
"loss": 1.9949, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0009596674523581539, |
|
"loss": 2.0394, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0009590931140986035, |
|
"loss": 2.0386, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0009585148897426354, |
|
"loss": 2.0254, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0009579327841847593, |
|
"loss": 2.0238, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.000957346802352338, |
|
"loss": 2.0509, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0009567569492055456, |
|
"loss": 2.0004, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0009561632297373263, |
|
"loss": 2.0203, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0009555656489733513, |
|
"loss": 2.0182, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.000954964211971977, |
|
"loss": 1.9754, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0009543589238242012, |
|
"loss": 2.0374, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000953749789653621, |
|
"loss": 2.0367, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000953136814616389, |
|
"loss": 2.0866, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0009525200039011694, |
|
"loss": 2.0083, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0009518993627290948, |
|
"loss": 2.0525, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0009512748963537212, |
|
"loss": 2.0636, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000950646610060984, |
|
"loss": 2.0522, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0009500145091691532, |
|
"loss": 2.05, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0009493785990287882, |
|
"loss": 1.9887, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0009487388850226926, |
|
"loss": 2.0309, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.000948095372565869, |
|
"loss": 1.9954, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0009474480671054726, |
|
"loss": 2.0078, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009467969741207652, |
|
"loss": 2.0395, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009461420991230693, |
|
"loss": 2.0415, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009454834476557207, |
|
"loss": 2.0308, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009448210252940223, |
|
"loss": 2.0826, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009441548376451963, |
|
"loss": 2.0424, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009434848903483373, |
|
"loss": 2.0125, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009428111890743639, |
|
"loss": 2.0139, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009421337395259717, |
|
"loss": 2.0682, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009414525474375837, |
|
"loss": 2.0577, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009407676185753029, |
|
"loss": 2.0262, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0009400789587368632, |
|
"loss": 2.0515, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0009393865737515794, |
|
"loss": 2.0398, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0009386904694802997, |
|
"loss": 2.0146, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0009379906518153543, |
|
"loss": 2.0438, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0009372871266805063, |
|
"loss": 2.0377, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000936579900030902, |
|
"loss": 2.0789, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0009358689778530193, |
|
"loss": 2.0201, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0009351543661646185, |
|
"loss": 2.0114, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0009344360710146898, |
|
"loss": 2.0242, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0009337140984834034, |
|
"loss": 2.0436, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0009329884546820572, |
|
"loss": 2.0452, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.000932259145753026, |
|
"loss": 2.0254, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0009315261778697083, |
|
"loss": 2.0409, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0009307895572364746, |
|
"loss": 2.0301, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0009300492900886154, |
|
"loss": 2.0078, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0009293053826922873, |
|
"loss": 1.9851, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0009285578413444613, |
|
"loss": 1.9947, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009278066723728682, |
|
"loss": 2.0331, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009270518821359461, |
|
"loss": 2.0058, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009262934770227858, |
|
"loss": 2.05, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009255314634530771, |
|
"loss": 2.0444, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0009247658478770543, |
|
"loss": 2.0045, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000923996636775442, |
|
"loss": 2.0211, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0009232238366593997, |
|
"loss": 2.0124, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0009224474540704671, |
|
"loss": 2.0067, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0009216674955805079, |
|
"loss": 2.0247, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0009208839677916557, |
|
"loss": 2.0314, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0009200968773362568, |
|
"loss": 2.067, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0009193062308768145, |
|
"loss": 2.0168, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0009185120351059326, |
|
"loss": 2.0649, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0009177142967462591, |
|
"loss": 2.0208, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0009169130225504289, |
|
"loss": 2.016, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0009161082193010066, |
|
"loss": 2.0331, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0009152998938104296, |
|
"loss": 2.0757, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0009144880529209498, |
|
"loss": 2.0093, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0009136727035045765, |
|
"loss": 1.954, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0009128538524630167, |
|
"loss": 2.0062, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0009120315067276187, |
|
"loss": 2.0179, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0009112056732593119, |
|
"loss": 2.0301, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0009103763590485488, |
|
"loss": 2.0223, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0009095435711152449, |
|
"loss": 2.0208, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00090870731650872, |
|
"loss": 2.0492, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0009078676023076385, |
|
"loss": 1.9917, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0009070244356199492, |
|
"loss": 2.0235, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0009061778235828253, |
|
"loss": 2.01, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0009054129328980229, |
|
"loss": 1.9999, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.000904559794464287, |
|
"loss": 2.0022, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0009037032315436229, |
|
"loss": 2.0004, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0009028432513865994, |
|
"loss": 1.9936, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.000901979861272711, |
|
"loss": 2.0098, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0009011130685103166, |
|
"loss": 1.9829, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0009002428804365775, |
|
"loss": 2.0135, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0008993693044173954, |
|
"loss": 2.0526, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0008984923478473499, |
|
"loss": 2.0145, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0008976120181496365, |
|
"loss": 2.0364, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0008967283227760028, |
|
"loss": 2.019, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.000895841269206686, |
|
"loss": 1.9874, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0008949508649503495, |
|
"loss": 2.0699, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0008940571175440196, |
|
"loss": 2.0407, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0008931600345530213, |
|
"loss": 2.0176, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0008922596235709142, |
|
"loss": 2.0235, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0008913558922194287, |
|
"loss": 2.0268, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.000890448848148401, |
|
"loss": 2.0279, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0008895384990357087, |
|
"loss": 2.0073, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0008886248525872056, |
|
"loss": 2.0262, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0008877079165366566, |
|
"loss": 2.0112, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000886787698645672, |
|
"loss": 2.042, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0008858642067036424, |
|
"loss": 2.0129, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0008849374485276715, |
|
"loss": 2.0264, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000884007431962512, |
|
"loss": 2.0211, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0008830741648804967, |
|
"loss": 2.0175, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0008821376551814739, |
|
"loss": 2.0423, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0008811979107927396, |
|
"loss": 2.0112, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0008802549396689705, |
|
"loss": 2.0422, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0008793087497921566, |
|
"loss": 2.0075, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0008784544334882426, |
|
"loss": 1.9958, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.000877502150068553, |
|
"loss": 2.0348, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0008765466711974225, |
|
"loss": 2.0232, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0008755880049627157, |
|
"loss": 1.9953, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0008746261594792776, |
|
"loss": 2.0223, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0008736611428888644, |
|
"loss": 2.0332, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0008726929633600753, |
|
"loss": 2.0075, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0008717216290882824, |
|
"loss": 2.002, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0008707471482955622, |
|
"loss": 2.0594, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0008697695292306253, |
|
"loss": 2.0214, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0008687887801687473, |
|
"loss": 1.994, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0008678049094116979, |
|
"loss": 2.0338, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0008668179252876714, |
|
"loss": 2.0176, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0008658278361512161, |
|
"loss": 2.0013, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0008648346503831627, |
|
"loss": 2.0141, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0008638383763905546, |
|
"loss": 1.9655, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0008628390226065761, |
|
"loss": 2.039, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0008618365974904808, |
|
"loss": 1.9418, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0008608311095275205, |
|
"loss": 2.011, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0008598225672288731, |
|
"loss": 2.0174, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0008588109791315707, |
|
"loss": 1.984, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.000857796353798427, |
|
"loss": 2.0437, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0008567786998179654, |
|
"loss": 2.0255, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0008557580258043459, |
|
"loss": 2.0067, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0008547343403972918, |
|
"loss": 2.0201, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0008537076522620175, |
|
"loss": 2.0258, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0008526779700891545, |
|
"loss": 1.9977, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0008516453025946781, |
|
"loss": 1.9835, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0008506096585198333, |
|
"loss": 2.0428, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0008495710466310613, |
|
"loss": 1.9554, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0008485294757199248, |
|
"loss": 2.0497, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0008474849546030339, |
|
"loss": 2.001, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0008464374921219714, |
|
"loss": 1.9824, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0008453870971432179, |
|
"loss": 2.0378, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0008443337785580769, |
|
"loss": 1.9756, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0008432775452825992, |
|
"loss": 2.0563, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0008422184062575077, |
|
"loss": 2.0115, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0008411563704481217, |
|
"loss": 2.0301, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0008400914468442814, |
|
"loss": 1.9912, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0008390236444602706, |
|
"loss": 2.0669, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0008379529723347417, |
|
"loss": 2.0138, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0008368794395306386, |
|
"loss": 2.0486, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0008358030551351199, |
|
"loss": 2.0342, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0008347238282594823, |
|
"loss": 2.0474, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0008336417680390829, |
|
"loss": 2.006, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0008325568836332633, |
|
"loss": 1.99, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00083146918422527, |
|
"loss": 2.0148, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0008303786790221778, |
|
"loss": 2.0381, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0008292853772548126, |
|
"loss": 2.0591, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0008281892881776714, |
|
"loss": 2.0238, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0008270904210688459, |
|
"loss": 1.9939, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0008259887852299427, |
|
"loss": 1.9941, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0008248843899860045, |
|
"loss": 2.0193, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0008237772446854325, |
|
"loss": 2.0166, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0008226673586999058, |
|
"loss": 2.0219, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0008215547414243025, |
|
"loss": 2.0078, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0008204394022766208, |
|
"loss": 2.0022, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0008193213506978983, |
|
"loss": 2.0009, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0008182005961521331, |
|
"loss": 2.0197, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0008170771481262027, |
|
"loss": 2.0101, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0008159510161297846, |
|
"loss": 2.0307, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0008148222096952748, |
|
"loss": 2.0075, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0008136907383777081, |
|
"loss": 2.0415, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.000812556611754677, |
|
"loss": 2.0031, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0008114198394262497, |
|
"loss": 2.0025, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0008102804310148905, |
|
"loss": 1.983, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0008091383961653765, |
|
"loss": 2.0056, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0008079937445447175, |
|
"loss": 1.9679, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0008068464858420732, |
|
"loss": 1.9897, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0008056966297686717, |
|
"loss": 1.9833, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0008045441860577273, |
|
"loss": 2.0088, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0008033891644643576, |
|
"loss": 1.9985, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0008022315747655011, |
|
"loss": 2.013, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0008010714267598354, |
|
"loss": 1.9902, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0007999087302676926, |
|
"loss": 2.0133, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0007987434951309774, |
|
"loss": 2.0092, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0007975757312130836, |
|
"loss": 1.9894, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0007964054483988103, |
|
"loss": 1.9742, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.000795232656594278, |
|
"loss": 2.016, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0007940573657268458, |
|
"loss": 2.0386, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0007928795857450259, |
|
"loss": 1.9602, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0007916993266184007, |
|
"loss": 1.9681, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0007905165983375379, |
|
"loss": 2.0326, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0007893314109139058, |
|
"loss": 1.9958, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0007881437743797882, |
|
"loss": 2.0077, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0007869536987882008, |
|
"loss": 1.9813, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0007857611942128044, |
|
"loss": 1.9538, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0007845662707478214, |
|
"loss": 1.9919, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0007833689385079485, |
|
"loss": 1.9922, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0007821692076282725, |
|
"loss": 1.9973, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0007809670882641842, |
|
"loss": 2.0231, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0007797625905912915, |
|
"loss": 1.9971, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0007785557248053351, |
|
"loss": 1.9934, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0007773465011221002, |
|
"loss": 1.9855, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0007761349297773315, |
|
"loss": 2.0421, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0007749210210266457, |
|
"loss": 1.9795, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0007737047851454451, |
|
"loss": 2.0006, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0007724862324288309, |
|
"loss": 2.011, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0007712653731915147, |
|
"loss": 1.9969, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0007700422177677331, |
|
"loss": 1.9581, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0007688167765111592, |
|
"loss": 1.9858, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0007675890597948141, |
|
"loss": 1.9945, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0007663590780109814, |
|
"loss": 2.0116, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0007651268415711168, |
|
"loss": 1.9831, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0007638923609057616, |
|
"loss": 2.0113, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0007626556464644538, |
|
"loss": 1.9927, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0007614167087156393, |
|
"loss": 2.0209, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0007601755581465844, |
|
"loss": 1.9941, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0007589322052632855, |
|
"loss": 2.0443, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0007576866605903812, |
|
"loss": 1.975, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0007564389346710638, |
|
"loss": 1.9655, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0007551890380669879, |
|
"loss": 2.0034, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0007539369813581836, |
|
"loss": 1.953, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.000752682775142965, |
|
"loss": 2.0162, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0007514264300378411, |
|
"loss": 2.0096, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0007501679566774267, |
|
"loss": 2.0431, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0007489073657143506, |
|
"loss": 1.961, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0007476446678191676, |
|
"loss": 2.0179, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0007463798736802665, |
|
"loss": 1.9885, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.0007451129940037804, |
|
"loss": 1.9955, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.000743844039513496, |
|
"loss": 1.9671, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0007425730209507622, |
|
"loss": 1.9866, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0007412999490744008, |
|
"loss": 1.9508, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0007400248346606129, |
|
"loss": 2.0261, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0007387476885028902, |
|
"loss": 1.9946, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0007374685214119221, |
|
"loss": 1.969, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0007361873442155049, |
|
"loss": 1.9802, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0007349041677584496, |
|
"loss": 1.9739, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0007336190029024907, |
|
"loss": 1.964, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0007323318605261933, |
|
"loss": 2.0009, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0007310427515248626, |
|
"loss": 1.9825, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0007297516868104501, |
|
"loss": 1.9744, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.000728458677311462, |
|
"loss": 2.0197, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0007271637339728667, |
|
"loss": 2.0016, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0007258668677560019, |
|
"loss": 1.9721, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0007245680896384818, |
|
"loss": 1.9591, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0007232674106141048, |
|
"loss": 1.9762, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0007219648416927592, |
|
"loss": 2.0026, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0007206603939003314, |
|
"loss": 1.9876, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0007193540782786117, |
|
"loss": 1.9991, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0007180459058852011, |
|
"loss": 1.9897, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0007167358877934176, |
|
"loss": 2.0581, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0007154240350922025, |
|
"loss": 1.9588, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0007141103588860265, |
|
"loss": 2.0151, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.000712794870294796, |
|
"loss": 1.9494, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0007114775804537586, |
|
"loss": 1.9936, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0007101585005134088, |
|
"loss": 2.0016, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0007088376416393944, |
|
"loss": 1.9628, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0007075150150124208, |
|
"loss": 1.9955, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0007061906318281568, |
|
"loss": 1.9731, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0007048645032971407, |
|
"loss": 1.9554, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0007035366406446839, |
|
"loss": 1.985, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0007022070551107774, |
|
"loss": 1.9961, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0007008757579499954, |
|
"loss": 2.0112, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0006995427604314004, |
|
"loss": 2.0086, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0006982080738384487, |
|
"loss": 2.019, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0006968717094688933, |
|
"loss": 1.9983, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0006955336786346898, |
|
"loss": 1.981, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0006941939926618997, |
|
"loss": 1.9878, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0006928526628905942, |
|
"loss": 2.0067, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0006915097006747601, |
|
"loss": 2.035, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0006901651173822013, |
|
"loss": 1.9614, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0006888189243944439, |
|
"loss": 1.9655, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0006874711331066401, |
|
"loss": 1.9692, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0006861217549274705, |
|
"loss": 1.9791, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0006847708012790492, |
|
"loss": 1.9804, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0006834182835968253, |
|
"loss": 1.9973, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0006820642133294876, |
|
"loss": 2.0153, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0006807086019388669, |
|
"loss": 1.9639, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0006793514608998393, |
|
"loss": 2.0162, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.000677992801700229, |
|
"loss": 1.994, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0006766326358407104, |
|
"loss": 1.9992, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0006752709748347125, |
|
"loss": 1.976, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.000673907830208319, |
|
"loss": 1.9452, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0006725432135001732, |
|
"loss": 1.9982, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0006711771362613785, |
|
"loss": 1.9766, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0006698096100554014, |
|
"loss": 1.9838, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0006684406464579735, |
|
"loss": 1.9746, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0006670702570569937, |
|
"loss": 1.9838, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0006656984534524296, |
|
"loss": 1.9747, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0006643252472562201, |
|
"loss": 1.9896, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.000662950650092176, |
|
"loss": 1.9607, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0006615746735958832, |
|
"loss": 2.0075, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0006603351250463683, |
|
"loss": 1.9918, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.000658956559916577, |
|
"loss": 1.9669, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0006575766492634068, |
|
"loss": 1.9645, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0006561954047674206, |
|
"loss": 1.9511, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0006548128381204714, |
|
"loss": 1.9934, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0006534289610256039, |
|
"loss": 1.9973, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0006520437851969557, |
|
"loss": 1.9983, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0006506573223596575, |
|
"loss": 1.9906, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0006492695842497347, |
|
"loss": 1.9392, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0006478805826140066, |
|
"loss": 2.0015, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0006464903292099886, |
|
"loss": 2.0168, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0006450988358057919, |
|
"loss": 1.9676, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0006437061141800238, |
|
"loss": 1.9798, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0006423121761216881, |
|
"loss": 1.9753, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0006409170334300858, |
|
"loss": 1.9965, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0006395206979147141, |
|
"loss": 1.9532, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0006381231813951678, |
|
"loss": 1.9962, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0006367244957010379, |
|
"loss": 1.9758, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.000635324652671813, |
|
"loss": 1.9606, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0006339236641567774, |
|
"loss": 1.9785, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0006325215420149119, |
|
"loss": 1.9334, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0006311182981147935, |
|
"loss": 1.9829, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.000629713944334494, |
|
"loss": 1.9671, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0006283084925614805, |
|
"loss": 2.0108, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0006269019546925139, |
|
"loss": 1.9341, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0006254943426335488, |
|
"loss": 1.9378, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0006240856682996326, |
|
"loss": 1.9619, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0006226759436148044, |
|
"loss": 1.9516, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0006212651805119946, |
|
"loss": 1.9882, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.000619853390932923, |
|
"loss": 1.9636, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0006184405868279985, |
|
"loss": 1.9894, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0006170267801562176, |
|
"loss": 1.9365, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0006156119828850633, |
|
"loss": 2.0264, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0006141962069904041, |
|
"loss": 1.9659, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0006127794644563915, |
|
"loss": 1.9368, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0006113617672753598, |
|
"loss": 1.959, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0006099431274477245, |
|
"loss": 1.9832, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0006085235569818795, |
|
"loss": 1.9572, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0006071030678940969, |
|
"loss": 1.99, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0006056816722084248, |
|
"loss": 1.9963, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0006042593819565846, |
|
"loss": 1.9997, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0006028362091778706, |
|
"loss": 1.9221, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0006014121659190476, |
|
"loss": 1.9623, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0005999872642342484, |
|
"loss": 1.9949, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0005985615161848721, |
|
"loss": 1.9688, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.000597134933839482, |
|
"loss": 1.975, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0005957075292737042, |
|
"loss": 2.0048, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0005942793145701235, |
|
"loss": 1.9591, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0005928503018181833, |
|
"loss": 1.9957, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0005914205031140819, |
|
"loss": 1.9823, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0005899899305606703, |
|
"loss": 2.0032, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0005885585962673501, |
|
"loss": 1.9722, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0005871265123499708, |
|
"loss": 1.9903, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0005856936909307273, |
|
"loss": 2.0258, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.000584260144138057, |
|
"loss": 2.0444, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0005828258841065377, |
|
"loss": 1.968, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0005813909229767846, |
|
"loss": 1.9598, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0005799552728953472, |
|
"loss": 2.0047, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.000578518946014607, |
|
"loss": 1.939, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0005770819544926741, |
|
"loss": 1.9835, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0005756443104932852, |
|
"loss": 2.028, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0005742060261856996, |
|
"loss": 1.973, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0005727671137445967, |
|
"loss": 1.9734, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0005713275853499729, |
|
"loss": 1.9921, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0005698874531870386, |
|
"loss": 1.9607, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0005684467294461146, |
|
"loss": 1.9468, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0005670054263225297, |
|
"loss": 1.9524, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0005655635560165166, |
|
"loss": 1.9737, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0005641211307331093, |
|
"loss": 1.9532, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0005626781626820395, |
|
"loss": 2.0048, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0005612346640776331, |
|
"loss": 2.0276, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0005597906471387074, |
|
"loss": 1.9476, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0005583461240884666, |
|
"loss": 1.943, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0005569011071543997, |
|
"loss": 1.9859, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0005554556085681757, |
|
"loss": 1.9607, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0005540096405655409, |
|
"loss": 1.9825, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0005525632153862152, |
|
"loss": 1.9875, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0005511163452737884, |
|
"loss": 2.0051, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0005496690424756164, |
|
"loss": 2.0089, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0005482213192427174, |
|
"loss": 1.9495, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0005467731878296688, |
|
"loss": 1.9701, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0005453246604945034, |
|
"loss": 1.9588, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0005438757494986044, |
|
"loss": 1.9833, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0005424264671066038, |
|
"loss": 1.9974, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0005409768255862766, |
|
"loss": 1.9746, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0005395268372084378, |
|
"loss": 1.9757, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0005380765142468387, |
|
"loss": 1.9908, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0005366258689780625, |
|
"loss": 2.006, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0005351749136814208, |
|
"loss": 1.9957, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0005337236606388495, |
|
"loss": 1.959, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0005322721221348047, |
|
"loss": 2.0027, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.000530820310456159, |
|
"loss": 1.9248, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0005293682378920974, |
|
"loss": 1.9742, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.000527915916734013, |
|
"loss": 1.9844, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0005264633592754035, |
|
"loss": 1.9985, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0005250105778117661, |
|
"loss": 2.0074, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0005235575846404952, |
|
"loss": 1.9901, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0005221043920607763, |
|
"loss": 1.9861, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0005206510123734837, |
|
"loss": 1.9576, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0005191974578810746, |
|
"loss": 1.9741, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0005177437408874868, |
|
"loss": 1.9794, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0005162898736980327, |
|
"loss": 1.9883, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0005148358686192967, |
|
"loss": 1.969, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0005133817379590302, |
|
"loss": 2.0018, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0005119274940260476, |
|
"loss": 1.971, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0005104731491301225, |
|
"loss": 1.9564, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0005090187155818823, |
|
"loss": 1.9798, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0005075642056927057, |
|
"loss": 2.0331, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.000506109631774617, |
|
"loss": 1.9505, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0005046550061401825, |
|
"loss": 1.9348, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0005032003411024067, |
|
"loss": 2.0099, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0005017456489746275, |
|
"loss": 1.9756, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0005002909420704116, |
|
"loss": 1.9444, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0004988362327034516, |
|
"loss": 1.976, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0004973815331874603, |
|
"loss": 1.9593, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0004959268558360675, |
|
"loss": 1.9251, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0004944722129627151, |
|
"loss": 1.972, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0004930176168805531, |
|
"loss": 1.9307, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0004915630799023359, |
|
"loss": 1.9541, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.0004901086143403172, |
|
"loss": 1.9212, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0004886542325061464, |
|
"loss": 1.965, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0004871999467107639, |
|
"loss": 2.0083, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0004857457692642974, |
|
"loss": 1.9676, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.000484291712475957, |
|
"loss": 1.9384, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00048283778865393243, |
|
"loss": 1.9339, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0004813840101052869, |
|
"loss": 2.0182, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0004799303891358545, |
|
"loss": 1.9262, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0004784769380501352, |
|
"loss": 1.9397, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0004770236691511908, |
|
"loss": 1.9404, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0004755705947405417, |
|
"loss": 1.9415, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00047411772711806105, |
|
"loss": 1.9292, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.000472665078581872, |
|
"loss": 1.9487, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00047121266142824354, |
|
"loss": 1.9435, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0004697604879514853, |
|
"loss": 1.9145, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0004683085704438451, |
|
"loss": 1.9975, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00046685692119540357, |
|
"loss": 1.9696, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00046540555249397056, |
|
"loss": 1.9598, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00046395447662498117, |
|
"loss": 1.9366, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00046250370587139206, |
|
"loss": 1.9748, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00046105325251357695, |
|
"loss": 1.9125, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00045960312882922306, |
|
"loss": 1.9595, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00045815334709322665, |
|
"loss": 1.9369, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0004567039195775899, |
|
"loss": 1.9564, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.000455254858551316, |
|
"loss": 1.9274, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00045380617628030693, |
|
"loss": 1.9366, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00045235788502725777, |
|
"loss": 1.9446, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00045090999705155383, |
|
"loss": 1.9236, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0004494625246091672, |
|
"loss": 1.9397, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00044801547995255194, |
|
"loss": 1.9849, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00044656887533054193, |
|
"loss": 1.9367, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0004451227229882455, |
|
"loss": 1.9451, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0004436770351669429, |
|
"loss": 1.9578, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0004422318241039821, |
|
"loss": 1.9095, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00044078710203267576, |
|
"loss": 1.9337, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0004393428811821971, |
|
"loss": 1.9904, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00043789917377747695, |
|
"loss": 1.9569, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0004364559920390996, |
|
"loss": 1.9423, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00043501334818320004, |
|
"loss": 2.0013, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0004335712544213597, |
|
"loss": 1.9585, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0004321297229605044, |
|
"loss": 1.9148, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00043068876600279964, |
|
"loss": 1.9558, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0004292483957455482, |
|
"loss": 1.9899, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00042780862438108655, |
|
"loss": 1.9597, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00042636946409668123, |
|
"loss": 1.9603, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00042493092707442717, |
|
"loss": 1.9771, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0004234930254911427, |
|
"loss": 1.9416, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00042205577151826747, |
|
"loss": 1.9417, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00042061917732175966, |
|
"loss": 1.9636, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.000419183255061992, |
|
"loss": 1.966, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0004177480168936504, |
|
"loss": 1.9885, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00041631347496562913, |
|
"loss": 1.9525, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0004148796414209297, |
|
"loss": 1.9809, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0004134465283965568, |
|
"loss": 1.8982, |
|
"step": 6000 |
|
} |
|
], |
|
"max_steps": 10798, |
|
"num_train_epochs": 2, |
|
"total_flos": 5.693111667768951e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|