|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.12008838505139782, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999855889116, |
|
"loss": 3.4871, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999999357728057, |
|
"loss": 2.9969, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999998503737748, |
|
"loss": 2.9734, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999997293918308, |
|
"loss": 2.9203, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999995728269912, |
|
"loss": 2.7908, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999993806792782, |
|
"loss": 2.7973, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000499999152948719, |
|
"loss": 2.8511, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999988896353463, |
|
"loss": 2.8423, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999985907391973, |
|
"loss": 2.8411, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999982562603146, |
|
"loss": 2.838, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.000499997886198746, |
|
"loss": 2.8751, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999974805545439, |
|
"loss": 2.7868, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999970393277663, |
|
"loss": 2.8003, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999965625184758, |
|
"loss": 2.8343, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999960501267404, |
|
"loss": 2.8053, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999955021526329, |
|
"loss": 2.7828, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999949185962313, |
|
"loss": 2.7822, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999942994576189, |
|
"loss": 2.7426, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999936447368836, |
|
"loss": 2.7808, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0004999929544341185, |
|
"loss": 2.7522, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999922285494221, |
|
"loss": 2.8008, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999914670828975, |
|
"loss": 2.748, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999906700346533, |
|
"loss": 2.783, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999898374048027, |
|
"loss": 2.7879, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999889691934643, |
|
"loss": 2.696, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999880654007619, |
|
"loss": 2.7354, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999871260268238, |
|
"loss": 2.7336, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999861510717839, |
|
"loss": 2.7077, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999851405357809, |
|
"loss": 2.7408, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999840944189586, |
|
"loss": 2.7115, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499983012721466, |
|
"loss": 2.6973, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999818954434569, |
|
"loss": 2.6644, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999807425850907, |
|
"loss": 2.7442, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499979554146531, |
|
"loss": 2.7007, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999783301279471, |
|
"loss": 2.7284, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999770705295135, |
|
"loss": 2.6649, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999757753514091, |
|
"loss": 2.7269, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999744445938185, |
|
"loss": 2.64, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999730782569309, |
|
"loss": 2.6886, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499971676340941, |
|
"loss": 2.6911, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999702388460482, |
|
"loss": 2.6814, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999687657724571, |
|
"loss": 2.7235, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999672571203774, |
|
"loss": 2.6898, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999657128900239, |
|
"loss": 2.6222, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999641330816162, |
|
"loss": 2.6827, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999625176953794, |
|
"loss": 2.6634, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999608667315432, |
|
"loss": 2.6976, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999591801903428, |
|
"loss": 2.6856, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499957458072018, |
|
"loss": 2.624, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.000499955700376814, |
|
"loss": 2.6693, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999539071049812, |
|
"loss": 2.7058, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999520782567746, |
|
"loss": 2.6878, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999502138324544, |
|
"loss": 2.6674, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999483138322863, |
|
"loss": 2.634, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999463782565405, |
|
"loss": 2.6242, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999444071054925, |
|
"loss": 2.636, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999424003794229, |
|
"loss": 2.6926, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999403580786175, |
|
"loss": 2.6564, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999382802033666, |
|
"loss": 2.6474, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999361667539663, |
|
"loss": 2.6462, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999340177307172, |
|
"loss": 2.6069, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0004999318331339252, |
|
"loss": 2.632, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999296129639014, |
|
"loss": 2.6533, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999273572209616, |
|
"loss": 2.6524, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499925065905427, |
|
"loss": 2.6229, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999227390176237, |
|
"loss": 2.6587, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999203765578828, |
|
"loss": 2.6175, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999179785265406, |
|
"loss": 2.6214, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999155449239384, |
|
"loss": 2.6338, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999130757504227, |
|
"loss": 2.6364, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999105710063449, |
|
"loss": 2.6509, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999080306920613, |
|
"loss": 2.6465, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999054548079338, |
|
"loss": 2.6452, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999028433543286, |
|
"loss": 2.6858, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004999001963316179, |
|
"loss": 2.6624, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998975137401781, |
|
"loss": 2.5747, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499894795580391, |
|
"loss": 2.6239, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998920418526438, |
|
"loss": 2.612, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998892525573282, |
|
"loss": 2.6312, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998864276948413, |
|
"loss": 2.6321, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499883567265585, |
|
"loss": 2.6128, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998806712699667, |
|
"loss": 2.6299, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998777397083982, |
|
"loss": 2.6444, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998747725812971, |
|
"loss": 2.6058, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998717698890857, |
|
"loss": 2.5646, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998687316321911, |
|
"loss": 2.6089, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998656578110461, |
|
"loss": 2.6257, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998625484260881, |
|
"loss": 2.6794, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998594034777594, |
|
"loss": 2.6585, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998562229665079, |
|
"loss": 2.6161, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998530068927862, |
|
"loss": 2.5684, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499849755257052, |
|
"loss": 2.6284, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998464680597682, |
|
"loss": 2.5965, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998431453014028, |
|
"loss": 2.5825, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998397869824284, |
|
"loss": 2.6056, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998363931033231, |
|
"loss": 2.5434, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998329636645702, |
|
"loss": 2.6535, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998294986666576, |
|
"loss": 2.7084, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998259981100785, |
|
"loss": 2.6648, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998224619953312, |
|
"loss": 2.6241, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000499818890322919, |
|
"loss": 2.5672, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998152830933501, |
|
"loss": 2.6968, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998116403071381, |
|
"loss": 2.5603, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0004998079619648014, |
|
"loss": 2.6031, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998042480668638, |
|
"loss": 2.5779, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004998004986138534, |
|
"loss": 2.6792, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997967136063043, |
|
"loss": 2.6038, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997928930447551, |
|
"loss": 2.6469, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997890369297496, |
|
"loss": 2.62, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997851452618364, |
|
"loss": 2.6517, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997812180415697, |
|
"loss": 2.6313, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997772552695085, |
|
"loss": 2.62, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997732569462167, |
|
"loss": 2.6205, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997692230722633, |
|
"loss": 2.5727, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997651536482227, |
|
"loss": 2.5626, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997610486746738, |
|
"loss": 2.6196, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997569081522012, |
|
"loss": 2.6219, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997527320813939, |
|
"loss": 2.6443, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997485204628466, |
|
"loss": 2.6027, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997442732971584, |
|
"loss": 2.6495, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997399905849342, |
|
"loss": 2.6099, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997356723267833, |
|
"loss": 2.6074, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997313185233204, |
|
"loss": 2.6089, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997269291751651, |
|
"loss": 2.6539, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997225042829423, |
|
"loss": 2.6044, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997180438472816, |
|
"loss": 2.5768, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997135478688181, |
|
"loss": 2.5801, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997090163481916, |
|
"loss": 2.6145, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004997044492860471, |
|
"loss": 2.6338, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996998466830345, |
|
"loss": 2.6048, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996952085398091, |
|
"loss": 2.6023, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000499690534857031, |
|
"loss": 2.6037, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996858256353653, |
|
"loss": 2.5876, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996810808754824, |
|
"loss": 2.5932, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996763005780576, |
|
"loss": 2.6014, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996714847437712, |
|
"loss": 2.6211, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996666333733088, |
|
"loss": 2.6615, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996617464673609, |
|
"loss": 2.6057, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996568240266228, |
|
"loss": 2.5722, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996518660517954, |
|
"loss": 2.5588, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996468725435843, |
|
"loss": 2.612, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996418435027002, |
|
"loss": 2.5926, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996367789298589, |
|
"loss": 2.5635, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996316788257812, |
|
"loss": 2.5991, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0004996265431911932, |
|
"loss": 2.6328, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004996213720268257, |
|
"loss": 2.5715, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004996161653334147, |
|
"loss": 2.5659, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004996109231117013, |
|
"loss": 2.5627, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004996056453624317, |
|
"loss": 2.5869, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004996003320863571, |
|
"loss": 2.6017, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995949832842337, |
|
"loss": 2.532, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995895989568227, |
|
"loss": 2.6129, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995841791048906, |
|
"loss": 2.6216, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995787237292089, |
|
"loss": 2.5878, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995732328305538, |
|
"loss": 2.5592, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499567706409707, |
|
"loss": 2.637, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995621444674552, |
|
"loss": 2.571, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995565470045898, |
|
"loss": 2.5604, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995509140219076, |
|
"loss": 2.6172, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995452455202105, |
|
"loss": 2.6257, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995395415003051, |
|
"loss": 2.5337, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995338019630033, |
|
"loss": 2.5631, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499528026909122, |
|
"loss": 2.5511, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995222163394834, |
|
"loss": 2.609, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995163702549142, |
|
"loss": 2.5627, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995104886562466, |
|
"loss": 2.5877, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004995045715443178, |
|
"loss": 2.5274, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994986189199701, |
|
"loss": 2.545, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994926307840505, |
|
"loss": 2.5648, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994866071374115, |
|
"loss": 2.5424, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994805479809103, |
|
"loss": 2.5759, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994744533154094, |
|
"loss": 2.6146, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994683231417762, |
|
"loss": 2.589, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994621574608833, |
|
"loss": 2.6288, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994559562736083, |
|
"loss": 2.5964, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994497195808336, |
|
"loss": 2.5697, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994434473834472, |
|
"loss": 2.5815, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994371396823416, |
|
"loss": 2.5621, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994307964784147, |
|
"loss": 2.5517, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994244177725693, |
|
"loss": 2.6157, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994180035657132, |
|
"loss": 2.5651, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994115538587595, |
|
"loss": 2.5868, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004994050686526261, |
|
"loss": 2.6103, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004993985479482362, |
|
"loss": 2.6364, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004993919917465178, |
|
"loss": 2.6651, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0004993854000484038, |
|
"loss": 2.5453, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000499378772854833, |
|
"loss": 2.5657, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000499372110166748, |
|
"loss": 2.5375, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004993654119850975, |
|
"loss": 2.5654, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004993586783108349, |
|
"loss": 2.6298, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004993519091449184, |
|
"loss": 2.5405, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004993451044883116, |
|
"loss": 2.6205, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004993382643419831, |
|
"loss": 2.5851, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004993313887069062, |
|
"loss": 2.5528, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004993244775840596, |
|
"loss": 2.5741, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004993175309744273, |
|
"loss": 2.5482, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004993105488789976, |
|
"loss": 2.5643, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004993035312987645, |
|
"loss": 2.5748, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004992964782347269, |
|
"loss": 2.5976, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004992893896878884, |
|
"loss": 2.5567, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000499282265659258, |
|
"loss": 2.516, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00049927510614985, |
|
"loss": 2.5344, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004992679111606829, |
|
"loss": 2.6098, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004992606806927813, |
|
"loss": 2.567, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004992534147471738, |
|
"loss": 2.5139, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004992461133248951, |
|
"loss": 2.6271, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004992387764269841, |
|
"loss": 2.6127, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004992314040544851, |
|
"loss": 2.5665, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004992239962084475, |
|
"loss": 2.5802, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004992165528899256, |
|
"loss": 2.5765, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000499209074099979, |
|
"loss": 2.5625, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004992015598396718, |
|
"loss": 2.5349, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004991940101100739, |
|
"loss": 2.5717, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004991864249122598, |
|
"loss": 2.5886, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004991788042473088, |
|
"loss": 2.5742, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000499171148116306, |
|
"loss": 2.5869, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004991634565203407, |
|
"loss": 2.5709, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000499155729460508, |
|
"loss": 2.6118, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004991479669379076, |
|
"loss": 2.5576, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004991401689536442, |
|
"loss": 2.5306, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004991323355088279, |
|
"loss": 2.5807, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004991244666045735, |
|
"loss": 2.5359, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004991165622420011, |
|
"loss": 2.5796, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004991086224222358, |
|
"loss": 2.5182, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004991006471464075, |
|
"loss": 2.6564, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004990926364156513, |
|
"loss": 2.638, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004990845902311077, |
|
"loss": 2.5452, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004990765085939216, |
|
"loss": 2.5622, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004990683915052436, |
|
"loss": 2.5423, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004990602389662287, |
|
"loss": 2.5891, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004990520509780374, |
|
"loss": 2.5968, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004990438275418351, |
|
"loss": 2.5594, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004990355686587923, |
|
"loss": 2.5458, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004990272743300843, |
|
"loss": 2.5304, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000499018944556892, |
|
"loss": 2.5505, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004990105793404008, |
|
"loss": 2.6207, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004990021786818013, |
|
"loss": 2.6038, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000498993742582289, |
|
"loss": 2.5597, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004989852710430651, |
|
"loss": 2.5344, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000498976764065335, |
|
"loss": 2.6021, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004989682216503097, |
|
"loss": 2.5924, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004989596437992049, |
|
"loss": 2.5742, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004989510305132416, |
|
"loss": 2.5709, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004989423817936458, |
|
"loss": 2.6258, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004989336976416484, |
|
"loss": 2.5664, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004989249780584853, |
|
"loss": 2.5551, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004989162230453978, |
|
"loss": 2.5816, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004989074326036319, |
|
"loss": 2.5314, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004988986067344388, |
|
"loss": 2.6238, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004988897454390747, |
|
"loss": 2.6155, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004988808487188009, |
|
"loss": 2.6048, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004988719165748835, |
|
"loss": 2.5906, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000498862949008594, |
|
"loss": 2.5826, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004988539460212088, |
|
"loss": 2.5686, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004988449076140091, |
|
"loss": 2.5948, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004988358337882816, |
|
"loss": 2.5445, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004988267245453175, |
|
"loss": 2.5417, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004988175798864137, |
|
"loss": 2.5712, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004988083998128714, |
|
"loss": 2.5913, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004987991843259975, |
|
"loss": 2.5529, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004987899334271035, |
|
"loss": 2.5581, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004987806471175063, |
|
"loss": 2.5334, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004987713253985273, |
|
"loss": 2.5911, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004987619682714936, |
|
"loss": 2.5962, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004987525757377368, |
|
"loss": 2.5038, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004987431477985938, |
|
"loss": 2.5732, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004987336844554066, |
|
"loss": 2.5667, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004987241857095221, |
|
"loss": 2.5632, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004987146515622921, |
|
"loss": 2.545, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0004987050820150739, |
|
"loss": 2.5234, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004986954770692293, |
|
"loss": 2.5294, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004986858367261256, |
|
"loss": 2.6312, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004986761609871347, |
|
"loss": 2.5496, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004986664498536339, |
|
"loss": 2.5203, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004986567033270055, |
|
"loss": 2.6495, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004986469214086364, |
|
"loss": 2.592, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004986371040999192, |
|
"loss": 2.532, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004986272514022512, |
|
"loss": 2.591, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004986173633170346, |
|
"loss": 2.5353, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004986074398456769, |
|
"loss": 2.565, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004985974809895903, |
|
"loss": 2.6095, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004985874867501927, |
|
"loss": 2.5576, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004985774571289062, |
|
"loss": 2.5559, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004985673921271584, |
|
"loss": 2.5174, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004985572917463821, |
|
"loss": 2.5949, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004985471559880146, |
|
"loss": 2.5327, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004985369848534988, |
|
"loss": 2.6106, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004985267783442823, |
|
"loss": 2.6212, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004985165364618176, |
|
"loss": 2.6182, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004985062592075627, |
|
"loss": 2.5897, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004984959465829804, |
|
"loss": 2.5322, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004984855985895382, |
|
"loss": 2.4541, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004984752152287093, |
|
"loss": 2.5546, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004984647965019714, |
|
"loss": 2.5689, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004984543424108075, |
|
"loss": 2.5218, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004984438529567054, |
|
"loss": 2.6078, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004984333281411582, |
|
"loss": 2.5947, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000498422767965664, |
|
"loss": 2.5687, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004984121724317256, |
|
"loss": 2.5415, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004984015415408514, |
|
"loss": 2.5362, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004983908752945542, |
|
"loss": 2.521, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004983801736943524, |
|
"loss": 2.5902, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000498369436741769, |
|
"loss": 2.565, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004983586644383322, |
|
"loss": 2.5585, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004983478567855754, |
|
"loss": 2.5787, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004983370137850369, |
|
"loss": 2.5266, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004983261354382598, |
|
"loss": 2.4995, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004983152217467924, |
|
"loss": 2.5792, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004983042727121883, |
|
"loss": 2.5772, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004982932883360058, |
|
"loss": 2.4929, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004982822686198083, |
|
"loss": 2.508, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0004982712135651643, |
|
"loss": 2.5573, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004982601231736472, |
|
"loss": 2.6541, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004982489974468355, |
|
"loss": 2.4863, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000498237836386313, |
|
"loss": 2.5646, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004982266399936678, |
|
"loss": 2.5657, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000498215408270494, |
|
"loss": 2.5708, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00049820414121839, |
|
"loss": 2.5032, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004981928388389594, |
|
"loss": 2.5778, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000498181501133811, |
|
"loss": 2.5989, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004981701281045585, |
|
"loss": 2.5047, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004981587197528205, |
|
"loss": 2.5285, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000498147276080221, |
|
"loss": 2.5799, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004981357970883886, |
|
"loss": 2.5478, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004981242827789572, |
|
"loss": 2.5617, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004981138897052764, |
|
"loss": 2.5479, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004981023082969261, |
|
"loss": 2.5556, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004980906915757434, |
|
"loss": 2.5612, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004980790395433817, |
|
"loss": 2.4746, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004980673522014993, |
|
"loss": 2.5154, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00049805562955176, |
|
"loss": 2.5829, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004980438715958318, |
|
"loss": 2.5546, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004980320783353888, |
|
"loss": 2.6171, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004980202497721091, |
|
"loss": 2.5568, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004980083859076766, |
|
"loss": 2.6162, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004979964867437797, |
|
"loss": 2.5808, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004979845522821121, |
|
"loss": 2.5665, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004979725825243724, |
|
"loss": 2.5723, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004979605774722644, |
|
"loss": 2.5039, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004979485371274967, |
|
"loss": 2.5566, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004979364614917831, |
|
"loss": 2.5481, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004979243505668421, |
|
"loss": 2.545, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004979122043543979, |
|
"loss": 2.5013, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000497900022856179, |
|
"loss": 2.5336, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004978878060739191, |
|
"loss": 2.5577, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004978755540093573, |
|
"loss": 2.5775, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004978632666642374, |
|
"loss": 2.5011, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004978509440403083, |
|
"loss": 2.5834, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004978385861393237, |
|
"loss": 2.5143, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004978261929630427, |
|
"loss": 2.574, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004978137645132294, |
|
"loss": 2.5587, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004978013007916523, |
|
"loss": 2.5767, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0004977888018000858, |
|
"loss": 2.5945, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004977762675403087, |
|
"loss": 2.5834, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004977636980141051, |
|
"loss": 2.6018, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004977510932232639, |
|
"loss": 2.5104, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004977384531695794, |
|
"loss": 2.4966, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004977257778548506, |
|
"loss": 2.5948, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004977130672808815, |
|
"loss": 2.5277, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004977003214494813, |
|
"loss": 2.5563, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004976875403624641, |
|
"loss": 2.5506, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004976747240216491, |
|
"loss": 2.5865, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004976618724288604, |
|
"loss": 2.583, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004976489855859272, |
|
"loss": 2.5752, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004976360634946838, |
|
"loss": 2.5702, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004976231061569694, |
|
"loss": 2.6195, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004976101135746281, |
|
"loss": 2.5673, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004975970857495093, |
|
"loss": 2.5204, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004975840226834672, |
|
"loss": 2.5061, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004975709243783612, |
|
"loss": 2.538, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004975577908360554, |
|
"loss": 2.536, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004975446220584195, |
|
"loss": 2.554, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004975314180473274, |
|
"loss": 2.5227, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004975181788046585, |
|
"loss": 2.5565, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004975049043322976, |
|
"loss": 2.5408, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004974915946321335, |
|
"loss": 2.5228, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000497478249706061, |
|
"loss": 2.6042, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004974648695559794, |
|
"loss": 2.5345, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000497451454183793, |
|
"loss": 2.5326, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004974380035914114, |
|
"loss": 2.5002, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004974245177807489, |
|
"loss": 2.5726, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004974109967537251, |
|
"loss": 2.5553, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004973974405122644, |
|
"loss": 2.5504, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004973838490582962, |
|
"loss": 2.5844, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004973702223937552, |
|
"loss": 2.5305, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004973565605205807, |
|
"loss": 2.5156, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004973428634407174, |
|
"loss": 2.5365, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004973291311561146, |
|
"loss": 2.519, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004973153636687271, |
|
"loss": 2.5673, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004973015609805142, |
|
"loss": 2.5224, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004972877230934407, |
|
"loss": 2.4995, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000497273850009476, |
|
"loss": 2.5978, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004972599417305948, |
|
"loss": 2.5886, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004972459982587765, |
|
"loss": 2.6197, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0004972320195960059, |
|
"loss": 2.5968, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004972180057442726, |
|
"loss": 2.5245, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004972039567055711, |
|
"loss": 2.5286, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004971898724819011, |
|
"loss": 2.5388, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004971757530752671, |
|
"loss": 2.5169, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004971615984876789, |
|
"loss": 2.5923, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004971474087211511, |
|
"loss": 2.5335, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004971331837777033, |
|
"loss": 2.5831, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004971189236593603, |
|
"loss": 2.5268, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004971046283681515, |
|
"loss": 2.5427, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004970902979061119, |
|
"loss": 2.5611, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000497075932275281, |
|
"loss": 2.6082, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004970615314777035, |
|
"loss": 2.5046, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004970470955154291, |
|
"loss": 2.5643, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004970326243905126, |
|
"loss": 2.5789, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004970181181050134, |
|
"loss": 2.4757, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004970035766609965, |
|
"loss": 2.4911, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004969890000605315, |
|
"loss": 2.5294, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004969743883056932, |
|
"loss": 2.4996, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004969597413985612, |
|
"loss": 2.5432, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004969450593412202, |
|
"loss": 2.5731, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004969303421357601, |
|
"loss": 2.6248, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004969155897842754, |
|
"loss": 2.5902, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004969008022888659, |
|
"loss": 2.4852, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004968859796516365, |
|
"loss": 2.4911, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004968711218746967, |
|
"loss": 2.5582, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004968562289601613, |
|
"loss": 2.5825, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004968413009101502, |
|
"loss": 2.5981, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000496826337726788, |
|
"loss": 2.5896, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004968113394122044, |
|
"loss": 2.5474, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004967963059685341, |
|
"loss": 2.5784, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004967812373979169, |
|
"loss": 2.5398, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004967661337024976, |
|
"loss": 2.5089, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004967509948844258, |
|
"loss": 2.519, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004967358209458564, |
|
"loss": 2.5411, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004967206118889491, |
|
"loss": 2.5732, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004967053677158686, |
|
"loss": 2.5164, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004966900884287845, |
|
"loss": 2.5663, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004966747740298717, |
|
"loss": 2.5126, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004966594245213098, |
|
"loss": 2.5704, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004966440399052836, |
|
"loss": 2.5488, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004966286201839828, |
|
"loss": 2.5444, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0004966131653596021, |
|
"loss": 2.5005, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004965976754343413, |
|
"loss": 2.5497, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000496582150410405, |
|
"loss": 2.6044, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004965665902900029, |
|
"loss": 2.6103, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004965509950753498, |
|
"loss": 2.4993, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004965353647686653, |
|
"loss": 2.4838, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004965196993721743, |
|
"loss": 2.4869, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000496503998888106, |
|
"loss": 2.5232, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004964882633186955, |
|
"loss": 2.514, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004964724926661825, |
|
"loss": 2.5312, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004964566869328115, |
|
"loss": 2.578, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004964408461208319, |
|
"loss": 2.557, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004964249702324989, |
|
"loss": 2.4945, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004964090592700718, |
|
"loss": 2.5046, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004963931132358153, |
|
"loss": 2.5283, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000496377132131999, |
|
"loss": 2.5303, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004963611159608975, |
|
"loss": 2.4897, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004963450647247906, |
|
"loss": 2.5238, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004963289784259627, |
|
"loss": 2.5362, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004963128570667034, |
|
"loss": 2.5355, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004962967006493074, |
|
"loss": 2.5849, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004962805091760741, |
|
"loss": 2.5662, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004962642826493082, |
|
"loss": 2.4892, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004962480210713193, |
|
"loss": 2.5777, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004962317244444217, |
|
"loss": 2.5342, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004962153927709353, |
|
"loss": 2.5293, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004961990260531843, |
|
"loss": 2.5612, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004961826242934983, |
|
"loss": 2.4989, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004961661874942117, |
|
"loss": 2.557, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004961497156576641, |
|
"loss": 2.542, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004961332087862, |
|
"loss": 2.5551, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004961166668821687, |
|
"loss": 2.5445, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004961000899479247, |
|
"loss": 2.5366, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004960834779858275, |
|
"loss": 2.5445, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004960668309982415, |
|
"loss": 2.5115, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000496050148987536, |
|
"loss": 2.562, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004960334319560854, |
|
"loss": 2.5237, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004960166799062691, |
|
"loss": 2.5659, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004959998928404715, |
|
"loss": 2.5704, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004959830707610819, |
|
"loss": 2.5654, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004959662136704945, |
|
"loss": 2.557, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004959493215711087, |
|
"loss": 2.4897, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004959323944653288, |
|
"loss": 2.572, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000495915432355564, |
|
"loss": 2.5766, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004958984352442286, |
|
"loss": 2.5121, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004958814031337419, |
|
"loss": 2.4934, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004958643360265279, |
|
"loss": 2.6128, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000495847233925016, |
|
"loss": 2.5246, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004958300968316402, |
|
"loss": 2.5577, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004958129247488397, |
|
"loss": 2.484, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004957957176790587, |
|
"loss": 2.5616, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004957784756247463, |
|
"loss": 2.4936, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004957611985883566, |
|
"loss": 2.58, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004957438865723486, |
|
"loss": 2.5083, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004957265395791865, |
|
"loss": 2.4819, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000495709157611339, |
|
"loss": 2.5151, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004956917406712805, |
|
"loss": 2.4644, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004956742887614897, |
|
"loss": 2.5624, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004956568018844507, |
|
"loss": 2.5754, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004956392800426525, |
|
"loss": 2.5833, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004956217232385888, |
|
"loss": 2.5116, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004956041314747587, |
|
"loss": 2.4878, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004955865047536658, |
|
"loss": 2.627, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0004955688430778193, |
|
"loss": 2.4976, |
|
"step": 5000 |
|
} |
|
], |
|
"max_steps": 83272, |
|
"num_train_epochs": 2, |
|
"total_flos": 4.13118422807321e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|