diff --git "a/checkpoint-10000/trainer_state.json" "b/checkpoint-10000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-10000/trainer_state.json" @@ -0,0 +1,6016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.24017677010279564, + "global_step": 10000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0004999999855889116, + "loss": 3.4871, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999999357728057, + "loss": 2.9969, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999998503737748, + "loss": 2.9734, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999997293918308, + "loss": 2.9203, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999995728269912, + "loss": 2.7908, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999993806792782, + "loss": 2.7973, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 0.000499999152948719, + "loss": 2.8511, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999988896353463, + "loss": 2.8423, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999985907391973, + "loss": 2.8411, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999982562603146, + "loss": 2.838, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 0.000499997886198746, + "loss": 2.8751, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999974805545439, + "loss": 2.7868, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999970393277663, + "loss": 2.8003, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999965625184758, + "loss": 2.8343, + "step": 140 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999960501267404, + "loss": 2.8053, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999955021526329, + "loss": 2.7828, + "step": 160 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999949185962313, + "loss": 2.7822, + "step": 170 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999942994576189, + "loss": 2.7426, + "step": 180 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999936447368836, + "loss": 2.7808, + "step": 190 + }, + { + "epoch": 0.0, + "learning_rate": 0.0004999929544341185, + "loss": 2.7522, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999922285494221, + "loss": 2.8008, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999914670828975, + "loss": 2.748, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999906700346533, + "loss": 2.783, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999898374048027, + "loss": 2.7879, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999889691934643, + "loss": 2.696, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999880654007619, + "loss": 2.7354, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999871260268238, + "loss": 2.7336, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999861510717839, + "loss": 2.7077, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999851405357809, + "loss": 2.7408, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999840944189586, + "loss": 2.7115, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499983012721466, + "loss": 2.6973, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999818954434569, + "loss": 2.6644, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999807425850907, + "loss": 2.7442, + "step": 330 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499979554146531, + "loss": 2.7007, + "step": 340 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999783301279471, + "loss": 2.7284, + "step": 350 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999770705295135, + "loss": 2.6649, + "step": 360 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999757753514091, + "loss": 2.7269, + "step": 370 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999744445938185, + "loss": 2.64, + "step": 380 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999730782569309, + "loss": 2.6886, + "step": 390 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499971676340941, + "loss": 2.6911, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999702388460482, + "loss": 2.6814, + "step": 410 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999687657724571, + "loss": 2.7235, + "step": 420 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999672571203774, + "loss": 2.6898, + "step": 430 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999657128900239, + "loss": 2.6222, + "step": 440 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999641330816162, + "loss": 2.6827, + "step": 450 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999625176953794, + "loss": 2.6634, + "step": 460 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999608667315432, + "loss": 2.6976, + "step": 470 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999591801903428, + "loss": 2.6856, + "step": 480 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499957458072018, + "loss": 2.624, + "step": 490 + }, + { + "epoch": 0.01, + "learning_rate": 0.000499955700376814, + "loss": 2.6693, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999539071049812, + "loss": 2.7058, + "step": 510 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999520782567746, + "loss": 2.6878, + "step": 520 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999502138324544, + "loss": 2.6674, + "step": 530 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999483138322863, + "loss": 2.634, + "step": 540 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999463782565405, + "loss": 2.6242, + "step": 550 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999444071054925, + "loss": 2.636, + "step": 560 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999424003794229, + "loss": 2.6926, + "step": 570 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999403580786175, + "loss": 2.6564, + "step": 580 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999382802033666, + "loss": 2.6474, + "step": 590 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999361667539663, + "loss": 2.6462, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999340177307172, + "loss": 2.6069, + "step": 610 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004999318331339252, + "loss": 2.632, + "step": 620 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999296129639014, + "loss": 2.6533, + "step": 630 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999273572209616, + "loss": 2.6524, + "step": 640 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499925065905427, + "loss": 2.6229, + "step": 650 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999227390176237, + "loss": 2.6587, + "step": 660 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999203765578828, + "loss": 2.6175, + "step": 670 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999179785265406, + "loss": 2.6214, + "step": 680 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999155449239384, + "loss": 2.6338, + "step": 690 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999130757504227, + "loss": 2.6364, + "step": 700 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999105710063449, + "loss": 2.6509, + "step": 710 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999080306920613, + "loss": 2.6465, + "step": 720 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999054548079338, + "loss": 2.6452, + "step": 730 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999028433543286, + "loss": 2.6858, + "step": 740 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004999001963316179, + "loss": 2.6624, + "step": 750 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998975137401781, + "loss": 2.5747, + "step": 760 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499894795580391, + "loss": 2.6239, + "step": 770 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998920418526438, + "loss": 2.612, + "step": 780 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998892525573282, + "loss": 2.6312, + "step": 790 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998864276948413, + "loss": 2.6321, + "step": 800 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499883567265585, + "loss": 2.6128, + "step": 810 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998806712699667, + "loss": 2.6299, + "step": 820 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998777397083982, + "loss": 2.6444, + "step": 830 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998747725812971, + "loss": 2.6058, + "step": 840 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998717698890857, + "loss": 2.5646, + "step": 850 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998687316321911, + "loss": 2.6089, + "step": 860 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998656578110461, + "loss": 2.6257, + "step": 870 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998625484260881, + "loss": 2.6794, + "step": 880 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998594034777594, + "loss": 2.6585, + "step": 890 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998562229665079, + "loss": 2.6161, + "step": 900 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998530068927862, + "loss": 2.5684, + "step": 910 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499849755257052, + "loss": 2.6284, + "step": 920 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998464680597682, + "loss": 2.5965, + "step": 930 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998431453014028, + "loss": 2.5825, + "step": 940 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998397869824284, + "loss": 2.6056, + "step": 950 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998363931033231, + "loss": 2.5434, + "step": 960 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998329636645702, + "loss": 2.6535, + "step": 970 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998294986666576, + "loss": 2.7084, + "step": 980 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998259981100785, + "loss": 2.6648, + "step": 990 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998224619953312, + "loss": 2.6241, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 0.000499818890322919, + "loss": 2.5672, + "step": 1010 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998152830933501, + "loss": 2.6968, + "step": 1020 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998116403071381, + "loss": 2.5603, + "step": 1030 + }, + { + "epoch": 0.02, + "learning_rate": 0.0004998079619648014, + "loss": 2.6031, + "step": 1040 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998042480668638, + "loss": 2.5779, + "step": 1050 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004998004986138534, + "loss": 2.6792, + "step": 1060 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997967136063043, + "loss": 2.6038, + "step": 1070 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997928930447551, + "loss": 2.6469, + "step": 1080 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997890369297496, + "loss": 2.62, + "step": 1090 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997851452618364, + "loss": 2.6517, + "step": 1100 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997812180415697, + "loss": 2.6313, + "step": 1110 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997772552695085, + "loss": 2.62, + "step": 1120 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997732569462167, + "loss": 2.6205, + "step": 1130 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997692230722633, + "loss": 2.5727, + "step": 1140 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997651536482227, + "loss": 2.5626, + "step": 1150 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997610486746738, + "loss": 2.6196, + "step": 1160 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997569081522012, + "loss": 2.6219, + "step": 1170 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997527320813939, + "loss": 2.6443, + "step": 1180 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997485204628466, + "loss": 2.6027, + "step": 1190 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997442732971584, + "loss": 2.6495, + "step": 1200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997399905849342, + "loss": 2.6099, + "step": 1210 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997356723267833, + "loss": 2.6074, + "step": 1220 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997313185233204, + "loss": 2.6089, + "step": 1230 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997269291751651, + "loss": 2.6539, + "step": 1240 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997225042829423, + "loss": 2.6044, + "step": 1250 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997180438472816, + "loss": 2.5768, + "step": 1260 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997135478688181, + "loss": 2.5801, + "step": 1270 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997090163481916, + "loss": 2.6145, + "step": 1280 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004997044492860471, + "loss": 2.6338, + "step": 1290 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996998466830345, + "loss": 2.6048, + "step": 1300 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996952085398091, + "loss": 2.6023, + "step": 1310 + }, + { + "epoch": 0.03, + "learning_rate": 0.000499690534857031, + "loss": 2.6037, + "step": 1320 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996858256353653, + "loss": 2.5876, + "step": 1330 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996810808754824, + "loss": 2.5932, + "step": 1340 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996763005780576, + "loss": 2.6014, + "step": 1350 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996714847437712, + "loss": 2.6211, + "step": 1360 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996666333733088, + "loss": 2.6615, + "step": 1370 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996617464673609, + "loss": 2.6057, + "step": 1380 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996568240266228, + "loss": 2.5722, + "step": 1390 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996518660517954, + "loss": 2.5588, + "step": 1400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996468725435843, + "loss": 2.612, + "step": 1410 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996418435027002, + "loss": 2.5926, + "step": 1420 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996367789298589, + "loss": 2.5635, + "step": 1430 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996316788257812, + "loss": 2.5991, + "step": 1440 + }, + { + "epoch": 0.03, + "learning_rate": 0.0004996265431911932, + "loss": 2.6328, + "step": 1450 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004996213720268257, + "loss": 2.5715, + "step": 1460 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004996161653334147, + "loss": 2.5659, + "step": 1470 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004996109231117013, + "loss": 2.5627, + "step": 1480 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004996056453624317, + "loss": 2.5869, + "step": 1490 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004996003320863571, + "loss": 2.6017, + "step": 1500 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995949832842337, + "loss": 2.532, + "step": 1510 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995895989568227, + "loss": 2.6129, + "step": 1520 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995841791048906, + "loss": 2.6216, + "step": 1530 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995787237292089, + "loss": 2.5878, + "step": 1540 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995732328305538, + "loss": 2.5592, + "step": 1550 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499567706409707, + "loss": 2.637, + "step": 1560 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995621444674552, + "loss": 2.571, + "step": 1570 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995565470045898, + "loss": 2.5604, + "step": 1580 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995509140219076, + "loss": 2.6172, + "step": 1590 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995452455202105, + "loss": 2.6257, + "step": 1600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995395415003051, + "loss": 2.5337, + "step": 1610 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995338019630033, + "loss": 2.5631, + "step": 1620 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499528026909122, + "loss": 2.5511, + "step": 1630 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995222163394834, + "loss": 2.609, + "step": 1640 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995163702549142, + "loss": 2.5627, + "step": 1650 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995104886562466, + "loss": 2.5877, + "step": 1660 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004995045715443178, + "loss": 2.5274, + "step": 1670 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994986189199701, + "loss": 2.545, + "step": 1680 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994926307840505, + "loss": 2.5648, + "step": 1690 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994866071374115, + "loss": 2.5424, + "step": 1700 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994805479809103, + "loss": 2.5759, + "step": 1710 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994744533154094, + "loss": 2.6146, + "step": 1720 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994683231417762, + "loss": 2.589, + "step": 1730 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994621574608833, + "loss": 2.6288, + "step": 1740 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994559562736083, + "loss": 2.5964, + "step": 1750 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994497195808336, + "loss": 2.5697, + "step": 1760 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994434473834472, + "loss": 2.5815, + "step": 1770 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994371396823416, + "loss": 2.5621, + "step": 1780 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994307964784147, + "loss": 2.5517, + "step": 1790 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994244177725693, + "loss": 2.6157, + "step": 1800 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994180035657132, + "loss": 2.5651, + "step": 1810 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994115538587595, + "loss": 2.5868, + "step": 1820 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004994050686526261, + "loss": 2.6103, + "step": 1830 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004993985479482362, + "loss": 2.6364, + "step": 1840 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004993919917465178, + "loss": 2.6651, + "step": 1850 + }, + { + "epoch": 0.04, + "learning_rate": 0.0004993854000484038, + "loss": 2.5453, + "step": 1860 + }, + { + "epoch": 0.04, + "learning_rate": 0.000499378772854833, + "loss": 2.5657, + "step": 1870 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499372110166748, + "loss": 2.5375, + "step": 1880 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993654119850975, + "loss": 2.5654, + "step": 1890 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993586783108349, + "loss": 2.6298, + "step": 1900 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993519091449184, + "loss": 2.5405, + "step": 1910 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993451044883116, + "loss": 2.6205, + "step": 1920 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993382643419831, + "loss": 2.5851, + "step": 1930 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993313887069062, + "loss": 2.5528, + "step": 1940 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993244775840596, + "loss": 2.5741, + "step": 1950 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993175309744273, + "loss": 2.5482, + "step": 1960 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993105488789976, + "loss": 2.5643, + "step": 1970 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004993035312987645, + "loss": 2.5748, + "step": 1980 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992964782347269, + "loss": 2.5976, + "step": 1990 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992893896878884, + "loss": 2.5567, + "step": 2000 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499282265659258, + "loss": 2.516, + "step": 2010 + }, + { + "epoch": 0.05, + "learning_rate": 0.00049927510614985, + "loss": 2.5344, + "step": 2020 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992679111606829, + "loss": 2.6098, + "step": 2030 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992606806927813, + "loss": 2.567, + "step": 2040 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992534147471738, + "loss": 2.5139, + "step": 2050 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992461133248951, + "loss": 2.6271, + "step": 2060 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992387764269841, + "loss": 2.6127, + "step": 2070 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992314040544851, + "loss": 2.5665, + "step": 2080 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992239962084475, + "loss": 2.5802, + "step": 2090 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992165528899256, + "loss": 2.5765, + "step": 2100 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499209074099979, + "loss": 2.5625, + "step": 2110 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004992015598396718, + "loss": 2.5349, + "step": 2120 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004991940101100739, + "loss": 2.5717, + "step": 2130 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004991864249122598, + "loss": 2.5886, + "step": 2140 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004991788042473088, + "loss": 2.5742, + "step": 2150 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499171148116306, + "loss": 2.5869, + "step": 2160 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004991634565203407, + "loss": 2.5709, + "step": 2170 + }, + { + "epoch": 0.05, + "learning_rate": 0.000499155729460508, + "loss": 2.6118, + "step": 2180 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004991479669379076, + "loss": 2.5576, + "step": 2190 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004991401689536442, + "loss": 2.5306, + "step": 2200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004991323355088279, + "loss": 2.5807, + "step": 2210 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004991244666045735, + "loss": 2.5359, + "step": 2220 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004991165622420011, + "loss": 2.5796, + "step": 2230 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004991086224222358, + "loss": 2.5182, + "step": 2240 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004991006471464075, + "loss": 2.6564, + "step": 2250 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004990926364156513, + "loss": 2.638, + "step": 2260 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004990845902311077, + "loss": 2.5452, + "step": 2270 + }, + { + "epoch": 0.05, + "learning_rate": 0.0004990765085939216, + "loss": 2.5622, + "step": 2280 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990683915052436, + "loss": 2.5423, + "step": 2290 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990602389662287, + "loss": 2.5891, + "step": 2300 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990520509780374, + "loss": 2.5968, + "step": 2310 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990438275418351, + "loss": 2.5594, + "step": 2320 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990355686587923, + "loss": 2.5458, + "step": 2330 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990272743300843, + "loss": 2.5304, + "step": 2340 + }, + { + "epoch": 0.06, + "learning_rate": 0.000499018944556892, + "loss": 2.5505, + "step": 2350 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990105793404008, + "loss": 2.6207, + "step": 2360 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004990021786818013, + "loss": 2.6038, + "step": 2370 + }, + { + "epoch": 0.06, + "learning_rate": 0.000498993742582289, + "loss": 2.5597, + "step": 2380 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989852710430651, + "loss": 2.5344, + "step": 2390 + }, + { + "epoch": 0.06, + "learning_rate": 0.000498976764065335, + "loss": 2.6021, + "step": 2400 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989682216503097, + "loss": 2.5924, + "step": 2410 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989596437992049, + "loss": 2.5742, + "step": 2420 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989510305132416, + "loss": 2.5709, + "step": 2430 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989423817936458, + "loss": 2.6258, + "step": 2440 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989336976416484, + "loss": 2.5664, + "step": 2450 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989249780584853, + "loss": 2.5551, + "step": 2460 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989162230453978, + "loss": 2.5816, + "step": 2470 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004989074326036319, + "loss": 2.5314, + "step": 2480 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988986067344388, + "loss": 2.6238, + "step": 2490 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988897454390747, + "loss": 2.6155, + "step": 2500 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988808487188009, + "loss": 2.6048, + "step": 2510 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988719165748835, + "loss": 2.5906, + "step": 2520 + }, + { + "epoch": 0.06, + "learning_rate": 0.000498862949008594, + "loss": 2.5826, + "step": 2530 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988539460212088, + "loss": 2.5686, + "step": 2540 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988449076140091, + "loss": 2.5948, + "step": 2550 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988358337882816, + "loss": 2.5445, + "step": 2560 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988267245453175, + "loss": 2.5417, + "step": 2570 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988175798864137, + "loss": 2.5712, + "step": 2580 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004988083998128714, + "loss": 2.5913, + "step": 2590 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987991843259975, + "loss": 2.5529, + "step": 2600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987899334271035, + "loss": 2.5581, + "step": 2610 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987806471175063, + "loss": 2.5334, + "step": 2620 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987713253985273, + "loss": 2.5911, + "step": 2630 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987619682714936, + "loss": 2.5962, + "step": 2640 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987525757377368, + "loss": 2.5038, + "step": 2650 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987431477985938, + "loss": 2.5732, + "step": 2660 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987336844554066, + "loss": 2.5667, + "step": 2670 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987241857095221, + "loss": 2.5632, + "step": 2680 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987146515622921, + "loss": 2.545, + "step": 2690 + }, + { + "epoch": 0.06, + "learning_rate": 0.0004987050820150739, + "loss": 2.5234, + "step": 2700 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004986954770692293, + "loss": 2.5294, + "step": 2710 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004986858367261256, + "loss": 2.6312, + "step": 2720 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004986761609871347, + "loss": 2.5496, + "step": 2730 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004986664498536339, + "loss": 2.5203, + "step": 2740 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004986567033270055, + "loss": 2.6495, + "step": 2750 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004986469214086364, + "loss": 2.592, + "step": 2760 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004986371040999192, + "loss": 2.532, + "step": 2770 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004986272514022512, + "loss": 2.591, + "step": 2780 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004986173633170346, + "loss": 2.5353, + "step": 2790 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004986074398456769, + "loss": 2.565, + "step": 2800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004985974809895903, + "loss": 2.6095, + "step": 2810 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004985874867501927, + "loss": 2.5576, + "step": 2820 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004985774571289062, + "loss": 2.5559, + "step": 2830 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004985673921271584, + "loss": 2.5174, + "step": 2840 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004985572917463821, + "loss": 2.5949, + "step": 2850 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004985471559880146, + "loss": 2.5327, + "step": 2860 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004985369848534988, + "loss": 2.6106, + "step": 2870 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004985267783442823, + "loss": 2.6212, + "step": 2880 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004985165364618176, + "loss": 2.6182, + "step": 2890 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004985062592075627, + "loss": 2.5897, + "step": 2900 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004984959465829804, + "loss": 2.5322, + "step": 2910 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004984855985895382, + "loss": 2.4541, + "step": 2920 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004984752152287093, + "loss": 2.5546, + "step": 2930 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004984647965019714, + "loss": 2.5689, + "step": 2940 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004984543424108075, + "loss": 2.5218, + "step": 2950 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004984438529567054, + "loss": 2.6078, + "step": 2960 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004984333281411582, + "loss": 2.5947, + "step": 2970 + }, + { + "epoch": 0.07, + "learning_rate": 0.000498422767965664, + "loss": 2.5687, + "step": 2980 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004984121724317256, + "loss": 2.5415, + "step": 2990 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004984015415408514, + "loss": 2.5362, + "step": 3000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983908752945542, + "loss": 2.521, + "step": 3010 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983801736943524, + "loss": 2.5902, + "step": 3020 + }, + { + "epoch": 0.07, + "learning_rate": 0.000498369436741769, + "loss": 2.565, + "step": 3030 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983586644383322, + "loss": 2.5585, + "step": 3040 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983478567855754, + "loss": 2.5787, + "step": 3050 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983370137850369, + "loss": 2.5266, + "step": 3060 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983261354382598, + "loss": 2.4995, + "step": 3070 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983152217467924, + "loss": 2.5792, + "step": 3080 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004983042727121883, + "loss": 2.5772, + "step": 3090 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004982932883360058, + "loss": 2.4929, + "step": 3100 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004982822686198083, + "loss": 2.508, + "step": 3110 + }, + { + "epoch": 0.07, + "learning_rate": 0.0004982712135651643, + "loss": 2.5573, + "step": 3120 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004982601231736472, + "loss": 2.6541, + "step": 3130 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004982489974468355, + "loss": 2.4863, + "step": 3140 + }, + { + "epoch": 0.08, + "learning_rate": 0.000498237836386313, + "loss": 2.5646, + "step": 3150 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004982266399936678, + "loss": 2.5657, + "step": 3160 + }, + { + "epoch": 0.08, + "learning_rate": 0.000498215408270494, + "loss": 2.5708, + "step": 3170 + }, + { + "epoch": 0.08, + "learning_rate": 0.00049820414121839, + "loss": 2.5032, + "step": 3180 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004981928388389594, + "loss": 2.5778, + "step": 3190 + }, + { + "epoch": 0.08, + "learning_rate": 0.000498181501133811, + "loss": 2.5989, + "step": 3200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004981701281045585, + "loss": 2.5047, + "step": 3210 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004981587197528205, + "loss": 2.5285, + "step": 3220 + }, + { + "epoch": 0.08, + "learning_rate": 0.000498147276080221, + "loss": 2.5799, + "step": 3230 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004981357970883886, + "loss": 2.5478, + "step": 3240 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004981242827789572, + "loss": 2.5617, + "step": 3250 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004981138897052764, + "loss": 2.5479, + "step": 3260 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004981023082969261, + "loss": 2.5556, + "step": 3270 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004980906915757434, + "loss": 2.5612, + "step": 3280 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004980790395433817, + "loss": 2.4746, + "step": 3290 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004980673522014993, + "loss": 2.5154, + "step": 3300 + }, + { + "epoch": 0.08, + "learning_rate": 0.00049805562955176, + "loss": 2.5829, + "step": 3310 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004980438715958318, + "loss": 2.5546, + "step": 3320 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004980320783353888, + "loss": 2.6171, + "step": 3330 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004980202497721091, + "loss": 2.5568, + "step": 3340 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004980083859076766, + "loss": 2.6162, + "step": 3350 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004979964867437797, + "loss": 2.5808, + "step": 3360 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004979845522821121, + "loss": 2.5665, + "step": 3370 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004979725825243724, + "loss": 2.5723, + "step": 3380 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004979605774722644, + "loss": 2.5039, + "step": 3390 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004979485371274967, + "loss": 2.5566, + "step": 3400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004979364614917831, + "loss": 2.5481, + "step": 3410 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004979243505668421, + "loss": 2.545, + "step": 3420 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004979122043543979, + "loss": 2.5013, + "step": 3430 + }, + { + "epoch": 0.08, + "learning_rate": 0.000497900022856179, + "loss": 2.5336, + "step": 3440 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004978878060739191, + "loss": 2.5577, + "step": 3450 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004978755540093573, + "loss": 2.5775, + "step": 3460 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004978632666642374, + "loss": 2.5011, + "step": 3470 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004978509440403083, + "loss": 2.5834, + "step": 3480 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004978385861393237, + "loss": 2.5143, + "step": 3490 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004978261929630427, + "loss": 2.574, + "step": 3500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004978137645132294, + "loss": 2.5587, + "step": 3510 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004978013007916523, + "loss": 2.5767, + "step": 3520 + }, + { + "epoch": 0.08, + "learning_rate": 0.0004977888018000858, + "loss": 2.5945, + "step": 3530 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004977762675403087, + "loss": 2.5834, + "step": 3540 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004977636980141051, + "loss": 2.6018, + "step": 3550 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004977510932232639, + "loss": 2.5104, + "step": 3560 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004977384531695794, + "loss": 2.4966, + "step": 3570 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004977257778548506, + "loss": 2.5948, + "step": 3580 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004977130672808815, + "loss": 2.5277, + "step": 3590 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004977003214494813, + "loss": 2.5563, + "step": 3600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004976875403624641, + "loss": 2.5506, + "step": 3610 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004976747240216491, + "loss": 2.5865, + "step": 3620 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004976618724288604, + "loss": 2.583, + "step": 3630 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004976489855859272, + "loss": 2.5752, + "step": 3640 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004976360634946838, + "loss": 2.5702, + "step": 3650 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004976231061569694, + "loss": 2.6195, + "step": 3660 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004976101135746281, + "loss": 2.5673, + "step": 3670 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004975970857495093, + "loss": 2.5204, + "step": 3680 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004975840226834672, + "loss": 2.5061, + "step": 3690 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004975709243783612, + "loss": 2.538, + "step": 3700 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004975577908360554, + "loss": 2.536, + "step": 3710 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004975446220584195, + "loss": 2.554, + "step": 3720 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004975314180473274, + "loss": 2.5227, + "step": 3730 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004975181788046585, + "loss": 2.5565, + "step": 3740 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004975049043322976, + "loss": 2.5408, + "step": 3750 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004974915946321335, + "loss": 2.5228, + "step": 3760 + }, + { + "epoch": 0.09, + "learning_rate": 0.000497478249706061, + "loss": 2.6042, + "step": 3770 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004974648695559794, + "loss": 2.5345, + "step": 3780 + }, + { + "epoch": 0.09, + "learning_rate": 0.000497451454183793, + "loss": 2.5326, + "step": 3790 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004974380035914114, + "loss": 2.5002, + "step": 3800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004974245177807489, + "loss": 2.5726, + "step": 3810 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004974109967537251, + "loss": 2.5553, + "step": 3820 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004973974405122644, + "loss": 2.5504, + "step": 3830 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004973838490582962, + "loss": 2.5844, + "step": 3840 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004973702223937552, + "loss": 2.5305, + "step": 3850 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004973565605205807, + "loss": 2.5156, + "step": 3860 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004973428634407174, + "loss": 2.5365, + "step": 3870 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004973291311561146, + "loss": 2.519, + "step": 3880 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004973153636687271, + "loss": 2.5673, + "step": 3890 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004973015609805142, + "loss": 2.5224, + "step": 3900 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004972877230934407, + "loss": 2.4995, + "step": 3910 + }, + { + "epoch": 0.09, + "learning_rate": 0.000497273850009476, + "loss": 2.5978, + "step": 3920 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004972599417305948, + "loss": 2.5886, + "step": 3930 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004972459982587765, + "loss": 2.6197, + "step": 3940 + }, + { + "epoch": 0.09, + "learning_rate": 0.0004972320195960059, + "loss": 2.5968, + "step": 3950 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004972180057442726, + "loss": 2.5245, + "step": 3960 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004972039567055711, + "loss": 2.5286, + "step": 3970 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004971898724819011, + "loss": 2.5388, + "step": 3980 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004971757530752671, + "loss": 2.5169, + "step": 3990 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004971615984876789, + "loss": 2.5923, + "step": 4000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004971474087211511, + "loss": 2.5335, + "step": 4010 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004971331837777033, + "loss": 2.5831, + "step": 4020 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004971189236593603, + "loss": 2.5268, + "step": 4030 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004971046283681515, + "loss": 2.5427, + "step": 4040 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004970902979061119, + "loss": 2.5611, + "step": 4050 + }, + { + "epoch": 0.1, + "learning_rate": 0.000497075932275281, + "loss": 2.6082, + "step": 4060 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004970615314777035, + "loss": 2.5046, + "step": 4070 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004970470955154291, + "loss": 2.5643, + "step": 4080 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004970326243905126, + "loss": 2.5789, + "step": 4090 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004970181181050134, + "loss": 2.4757, + "step": 4100 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004970035766609965, + "loss": 2.4911, + "step": 4110 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004969890000605315, + "loss": 2.5294, + "step": 4120 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004969743883056932, + "loss": 2.4996, + "step": 4130 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004969597413985612, + "loss": 2.5432, + "step": 4140 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004969450593412202, + "loss": 2.5731, + "step": 4150 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004969303421357601, + "loss": 2.6248, + "step": 4160 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004969155897842754, + "loss": 2.5902, + "step": 4170 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004969008022888659, + "loss": 2.4852, + "step": 4180 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004968859796516365, + "loss": 2.4911, + "step": 4190 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004968711218746967, + "loss": 2.5582, + "step": 4200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004968562289601613, + "loss": 2.5825, + "step": 4210 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004968413009101502, + "loss": 2.5981, + "step": 4220 + }, + { + "epoch": 0.1, + "learning_rate": 0.000496826337726788, + "loss": 2.5896, + "step": 4230 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004968113394122044, + "loss": 2.5474, + "step": 4240 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004967963059685341, + "loss": 2.5784, + "step": 4250 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004967812373979169, + "loss": 2.5398, + "step": 4260 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004967661337024976, + "loss": 2.5089, + "step": 4270 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004967509948844258, + "loss": 2.519, + "step": 4280 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004967358209458564, + "loss": 2.5411, + "step": 4290 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004967206118889491, + "loss": 2.5732, + "step": 4300 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004967053677158686, + "loss": 2.5164, + "step": 4310 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004966900884287845, + "loss": 2.5663, + "step": 4320 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004966747740298717, + "loss": 2.5126, + "step": 4330 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004966594245213098, + "loss": 2.5704, + "step": 4340 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004966440399052836, + "loss": 2.5488, + "step": 4350 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004966286201839828, + "loss": 2.5444, + "step": 4360 + }, + { + "epoch": 0.1, + "learning_rate": 0.0004966131653596021, + "loss": 2.5005, + "step": 4370 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004965976754343413, + "loss": 2.5497, + "step": 4380 + }, + { + "epoch": 0.11, + "learning_rate": 0.000496582150410405, + "loss": 2.6044, + "step": 4390 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004965665902900029, + "loss": 2.6103, + "step": 4400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004965509950753498, + "loss": 2.4993, + "step": 4410 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004965353647686653, + "loss": 2.4838, + "step": 4420 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004965196993721743, + "loss": 2.4869, + "step": 4430 + }, + { + "epoch": 0.11, + "learning_rate": 0.000496503998888106, + "loss": 2.5232, + "step": 4440 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004964882633186955, + "loss": 2.514, + "step": 4450 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004964724926661825, + "loss": 2.5312, + "step": 4460 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004964566869328115, + "loss": 2.578, + "step": 4470 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004964408461208319, + "loss": 2.557, + "step": 4480 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004964249702324989, + "loss": 2.4945, + "step": 4490 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004964090592700718, + "loss": 2.5046, + "step": 4500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004963931132358153, + "loss": 2.5283, + "step": 4510 + }, + { + "epoch": 0.11, + "learning_rate": 0.000496377132131999, + "loss": 2.5303, + "step": 4520 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004963611159608975, + "loss": 2.4897, + "step": 4530 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004963450647247906, + "loss": 2.5238, + "step": 4540 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004963289784259627, + "loss": 2.5362, + "step": 4550 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004963128570667034, + "loss": 2.5355, + "step": 4560 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004962967006493074, + "loss": 2.5849, + "step": 4570 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004962805091760741, + "loss": 2.5662, + "step": 4580 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004962642826493082, + "loss": 2.4892, + "step": 4590 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004962480210713193, + "loss": 2.5777, + "step": 4600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004962317244444217, + "loss": 2.5342, + "step": 4610 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004962153927709353, + "loss": 2.5293, + "step": 4620 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004961990260531843, + "loss": 2.5612, + "step": 4630 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004961826242934983, + "loss": 2.4989, + "step": 4640 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004961661874942117, + "loss": 2.557, + "step": 4650 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004961497156576641, + "loss": 2.542, + "step": 4660 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004961332087862, + "loss": 2.5551, + "step": 4670 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004961166668821687, + "loss": 2.5445, + "step": 4680 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004961000899479247, + "loss": 2.5366, + "step": 4690 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004960834779858275, + "loss": 2.5445, + "step": 4700 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004960668309982415, + "loss": 2.5115, + "step": 4710 + }, + { + "epoch": 0.11, + "learning_rate": 0.000496050148987536, + "loss": 2.562, + "step": 4720 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004960334319560854, + "loss": 2.5237, + "step": 4730 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004960166799062691, + "loss": 2.5659, + "step": 4740 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004959998928404715, + "loss": 2.5704, + "step": 4750 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004959830707610819, + "loss": 2.5654, + "step": 4760 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004959662136704945, + "loss": 2.557, + "step": 4770 + }, + { + "epoch": 0.11, + "learning_rate": 0.0004959493215711087, + "loss": 2.4897, + "step": 4780 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004959323944653288, + "loss": 2.572, + "step": 4790 + }, + { + "epoch": 0.12, + "learning_rate": 0.000495915432355564, + "loss": 2.5766, + "step": 4800 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004958984352442286, + "loss": 2.5121, + "step": 4810 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004958814031337419, + "loss": 2.4934, + "step": 4820 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004958643360265279, + "loss": 2.6128, + "step": 4830 + }, + { + "epoch": 0.12, + "learning_rate": 0.000495847233925016, + "loss": 2.5246, + "step": 4840 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004958300968316402, + "loss": 2.5577, + "step": 4850 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004958129247488397, + "loss": 2.484, + "step": 4860 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004957957176790587, + "loss": 2.5616, + "step": 4870 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004957784756247463, + "loss": 2.4936, + "step": 4880 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004957611985883566, + "loss": 2.58, + "step": 4890 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004957438865723486, + "loss": 2.5083, + "step": 4900 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004957265395791865, + "loss": 2.4819, + "step": 4910 + }, + { + "epoch": 0.12, + "learning_rate": 0.000495709157611339, + "loss": 2.5151, + "step": 4920 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004956917406712805, + "loss": 2.4644, + "step": 4930 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004956742887614897, + "loss": 2.5624, + "step": 4940 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004956568018844507, + "loss": 2.5754, + "step": 4950 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004956392800426525, + "loss": 2.5833, + "step": 4960 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004956217232385888, + "loss": 2.5116, + "step": 4970 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004956041314747587, + "loss": 2.4878, + "step": 4980 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004955865047536658, + "loss": 2.627, + "step": 4990 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004955688430778193, + "loss": 2.4976, + "step": 5000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004955511464497329, + "loss": 2.5709, + "step": 5010 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004955334148719252, + "loss": 2.5895, + "step": 5020 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004955156483469201, + "loss": 2.5231, + "step": 5030 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004954978468772463, + "loss": 2.5312, + "step": 5040 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004954800104654375, + "loss": 2.5598, + "step": 5050 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004954621391140326, + "loss": 2.5421, + "step": 5060 + }, + { + "epoch": 0.12, + "learning_rate": 0.000495444232825575, + "loss": 2.5199, + "step": 5070 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004954262916026135, + "loss": 2.4944, + "step": 5080 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004954083154477015, + "loss": 2.5115, + "step": 5090 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004953903043633977, + "loss": 2.5126, + "step": 5100 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004953722583522658, + "loss": 2.446, + "step": 5110 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004953541774168742, + "loss": 2.6025, + "step": 5120 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004953360615597963, + "loss": 2.5208, + "step": 5130 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004953179107836105, + "loss": 2.5167, + "step": 5140 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004952997250909006, + "loss": 2.5042, + "step": 5150 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004952815044842546, + "loss": 2.4997, + "step": 5160 + }, + { + "epoch": 0.12, + "learning_rate": 0.000495263248966266, + "loss": 2.5308, + "step": 5170 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004952449585395334, + "loss": 2.5414, + "step": 5180 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004952266332066597, + "loss": 2.5362, + "step": 5190 + }, + { + "epoch": 0.12, + "learning_rate": 0.0004952082729702534, + "loss": 2.5335, + "step": 5200 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004951898778329277, + "loss": 2.6089, + "step": 5210 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004951714477973008, + "loss": 2.5703, + "step": 5220 + }, + { + "epoch": 0.13, + "learning_rate": 0.000495152982865996, + "loss": 2.5374, + "step": 5230 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004951344830416412, + "loss": 2.5378, + "step": 5240 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004951159483268698, + "loss": 2.5335, + "step": 5250 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004950973787243196, + "loss": 2.5149, + "step": 5260 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004950787742366339, + "loss": 2.5265, + "step": 5270 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004950601348664605, + "loss": 2.5606, + "step": 5280 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004950414606164525, + "loss": 2.5707, + "step": 5290 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004950227514892678, + "loss": 2.5363, + "step": 5300 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004950040074875692, + "loss": 2.5186, + "step": 5310 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004949852286140247, + "loss": 2.5089, + "step": 5320 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004949664148713071, + "loss": 2.5773, + "step": 5330 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004949475662620942, + "loss": 2.5507, + "step": 5340 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004949286827890687, + "loss": 2.4891, + "step": 5350 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004949097644549185, + "loss": 2.5419, + "step": 5360 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004948908112623361, + "loss": 2.5012, + "step": 5370 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004948718232140191, + "loss": 2.57, + "step": 5380 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004948528003126702, + "loss": 2.5826, + "step": 5390 + }, + { + "epoch": 0.13, + "learning_rate": 0.000494833742560997, + "loss": 2.5257, + "step": 5400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004948146499617119, + "loss": 2.5108, + "step": 5410 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004947955225175325, + "loss": 2.542, + "step": 5420 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004947763602311812, + "loss": 2.5763, + "step": 5430 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004947571631053854, + "loss": 2.5096, + "step": 5440 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004947379311428774, + "loss": 2.5886, + "step": 5450 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004947186643463945, + "loss": 2.4859, + "step": 5460 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004946993627186792, + "loss": 2.5634, + "step": 5470 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004946800262624786, + "loss": 2.5464, + "step": 5480 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004946606549805448, + "loss": 2.5726, + "step": 5490 + }, + { + "epoch": 0.13, + "learning_rate": 0.000494641248875635, + "loss": 2.5298, + "step": 5500 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004946218079505114, + "loss": 2.5529, + "step": 5510 + }, + { + "epoch": 0.13, + "learning_rate": 0.000494602332207941, + "loss": 2.5434, + "step": 5520 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004945828216506956, + "loss": 2.5649, + "step": 5530 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004945632762815526, + "loss": 2.5616, + "step": 5540 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004945436961032937, + "loss": 2.5637, + "step": 5550 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004945240811187057, + "loss": 2.5155, + "step": 5560 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004945044313305805, + "loss": 2.4839, + "step": 5570 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004944847467417149, + "loss": 2.5336, + "step": 5580 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004944650273549108, + "loss": 2.6058, + "step": 5590 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004944452731729746, + "loss": 2.5009, + "step": 5600 + }, + { + "epoch": 0.13, + "learning_rate": 0.0004944254841987182, + "loss": 2.5141, + "step": 5610 + }, + { + "epoch": 0.13, + "learning_rate": 0.000494405660434958, + "loss": 2.588, + "step": 5620 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004943858018845157, + "loss": 2.5345, + "step": 5630 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004943659085502177, + "loss": 2.4981, + "step": 5640 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004943459804348955, + "loss": 2.4701, + "step": 5650 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004943260175413855, + "loss": 2.4917, + "step": 5660 + }, + { + "epoch": 0.14, + "learning_rate": 0.000494306019872529, + "loss": 2.5416, + "step": 5670 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004942859874311724, + "loss": 2.4962, + "step": 5680 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004942659202201669, + "loss": 2.4898, + "step": 5690 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004942458182423688, + "loss": 2.5407, + "step": 5700 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004942256815006391, + "loss": 2.523, + "step": 5710 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004942055099978438, + "loss": 2.5788, + "step": 5720 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004941853037368543, + "loss": 2.5076, + "step": 5730 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004941650627205463, + "loss": 2.6023, + "step": 5740 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004941447869518009, + "loss": 2.4776, + "step": 5750 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004941244764335038, + "loss": 2.5034, + "step": 5760 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004941041311685461, + "loss": 2.566, + "step": 5770 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004940837511598233, + "loss": 2.5236, + "step": 5780 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004940633364102364, + "loss": 2.5121, + "step": 5790 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004940428869226908, + "loss": 2.5473, + "step": 5800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004940224027000971, + "loss": 2.5438, + "step": 5810 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004940018837453711, + "loss": 2.5331, + "step": 5820 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004939813300614333, + "loss": 2.5063, + "step": 5830 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004939607416512089, + "loss": 2.4819, + "step": 5840 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004939401185176283, + "loss": 2.5368, + "step": 5850 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004939194606636271, + "loss": 2.5501, + "step": 5860 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004938987680921454, + "loss": 2.5764, + "step": 5870 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004938780408061283, + "loss": 2.5278, + "step": 5880 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004938572788085262, + "loss": 2.5153, + "step": 5890 + }, + { + "epoch": 0.14, + "learning_rate": 0.000493836482102294, + "loss": 2.5262, + "step": 5900 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004938156506903916, + "loss": 2.4982, + "step": 5910 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004937947845757843, + "loss": 2.5665, + "step": 5920 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004937738837614419, + "loss": 2.5907, + "step": 5930 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004937529482503391, + "loss": 2.5652, + "step": 5940 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004937319780454559, + "loss": 2.5768, + "step": 5950 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004937109731497769, + "loss": 2.5628, + "step": 5960 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004936899335662917, + "loss": 2.5346, + "step": 5970 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004936688592979951, + "loss": 2.6023, + "step": 5980 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004936477503478865, + "loss": 2.6156, + "step": 5990 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004936266067189705, + "loss": 2.5852, + "step": 6000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004936054284142563, + "loss": 2.6043, + "step": 6010 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004935842154367583, + "loss": 2.5359, + "step": 6020 + }, + { + "epoch": 0.14, + "learning_rate": 0.0004935629677894959, + "loss": 2.6143, + "step": 6030 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004935416854754933, + "loss": 2.5196, + "step": 6040 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004935203684977795, + "loss": 2.5343, + "step": 6050 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004934990168593887, + "loss": 2.5528, + "step": 6060 + }, + { + "epoch": 0.15, + "learning_rate": 0.00049347763056336, + "loss": 2.5252, + "step": 6070 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004934562096127371, + "loss": 2.572, + "step": 6080 + }, + { + "epoch": 0.15, + "learning_rate": 0.000493434754010569, + "loss": 2.5583, + "step": 6090 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004934132637599097, + "loss": 2.5042, + "step": 6100 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004933917388638177, + "loss": 2.5118, + "step": 6110 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004933701793253568, + "loss": 2.534, + "step": 6120 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004933485851475955, + "loss": 2.5335, + "step": 6130 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004933269563336075, + "loss": 2.5667, + "step": 6140 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004933052928864709, + "loss": 2.5293, + "step": 6150 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004932835948092696, + "loss": 2.5083, + "step": 6160 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004932618621050917, + "loss": 2.5234, + "step": 6170 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004932400947770304, + "loss": 2.5295, + "step": 6180 + }, + { + "epoch": 0.15, + "learning_rate": 0.000493218292828184, + "loss": 2.5481, + "step": 6190 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004931964562616554, + "loss": 2.5662, + "step": 6200 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004931745850805528, + "loss": 2.4865, + "step": 6210 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004931526792879892, + "loss": 2.5432, + "step": 6220 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004931307388870824, + "loss": 2.4741, + "step": 6230 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004931087638809553, + "loss": 2.5365, + "step": 6240 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004930867542727354, + "loss": 2.5459, + "step": 6250 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004930647100655556, + "loss": 2.498, + "step": 6260 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004930426312625536, + "loss": 2.5288, + "step": 6270 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004930205178668716, + "loss": 2.551, + "step": 6280 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004929983698816572, + "loss": 2.5706, + "step": 6290 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004929761873100628, + "loss": 2.5562, + "step": 6300 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004929539701552456, + "loss": 2.5513, + "step": 6310 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004929317184203678, + "loss": 2.5474, + "step": 6320 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004929094321085967, + "loss": 2.5221, + "step": 6330 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004928871112231041, + "loss": 2.575, + "step": 6340 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004928647557670672, + "loss": 2.5772, + "step": 6350 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004928423657436676, + "loss": 2.523, + "step": 6360 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004928199411560924, + "loss": 2.5617, + "step": 6370 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004927974820075332, + "loss": 2.5132, + "step": 6380 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004927749883011866, + "loss": 2.5358, + "step": 6390 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004927524600402543, + "loss": 2.5094, + "step": 6400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004927298972279428, + "loss": 2.5867, + "step": 6410 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004927072998674633, + "loss": 2.4521, + "step": 6420 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004926846679620323, + "loss": 2.5832, + "step": 6430 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004926620015148711, + "loss": 2.5095, + "step": 6440 + }, + { + "epoch": 0.15, + "learning_rate": 0.0004926393005292057, + "loss": 2.606, + "step": 6450 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004926165650082671, + "loss": 2.5507, + "step": 6460 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004925937949552914, + "loss": 2.539, + "step": 6470 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004925709903735196, + "loss": 2.474, + "step": 6480 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004925481512661975, + "loss": 2.5245, + "step": 6490 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004925252776365755, + "loss": 2.5624, + "step": 6500 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004925023694879097, + "loss": 2.5657, + "step": 6510 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004924794268234603, + "loss": 2.5445, + "step": 6520 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004924564496464931, + "loss": 2.4908, + "step": 6530 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004924334379602781, + "loss": 2.5565, + "step": 6540 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004924103917680909, + "loss": 2.6322, + "step": 6550 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004923873110732115, + "loss": 2.5302, + "step": 6560 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004923641958789252, + "loss": 2.5552, + "step": 6570 + }, + { + "epoch": 0.16, + "learning_rate": 0.000492341046188522, + "loss": 2.5387, + "step": 6580 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004923178620052966, + "loss": 2.5912, + "step": 6590 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004922946433325491, + "loss": 2.5186, + "step": 6600 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004922713901735842, + "loss": 2.6154, + "step": 6610 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004922481025317115, + "loss": 2.499, + "step": 6620 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004922247804102456, + "loss": 2.5363, + "step": 6630 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004922037610236175, + "loss": 2.5905, + "step": 6640 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004921803734000738, + "loss": 2.5299, + "step": 6650 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004921569513065769, + "loss": 2.5492, + "step": 6660 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004921334947464604, + "loss": 2.508, + "step": 6670 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004921100037230632, + "loss": 2.4594, + "step": 6680 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004920864782397285, + "loss": 2.5114, + "step": 6690 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004920629182998049, + "loss": 2.5465, + "step": 6700 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004920393239066459, + "loss": 2.5416, + "step": 6710 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004920156950636093, + "loss": 2.5362, + "step": 6720 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004919920317740584, + "loss": 2.4794, + "step": 6730 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004919683340413616, + "loss": 2.4981, + "step": 6740 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004919446018688913, + "loss": 2.5181, + "step": 6750 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004919208352600256, + "loss": 2.5417, + "step": 6760 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004918970342181473, + "loss": 2.5154, + "step": 6770 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004918731987466439, + "loss": 2.5431, + "step": 6780 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004918493288489081, + "loss": 2.5907, + "step": 6790 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004918254245283372, + "loss": 2.5224, + "step": 6800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004918014857883335, + "loss": 2.5333, + "step": 6810 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004917775126323045, + "loss": 2.574, + "step": 6820 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004917535050636622, + "loss": 2.5394, + "step": 6830 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004917294630858235, + "loss": 2.4887, + "step": 6840 + }, + { + "epoch": 0.16, + "learning_rate": 0.0004917053867022105, + "loss": 2.5104, + "step": 6850 + }, + { + "epoch": 0.16, + "learning_rate": 0.00049168127591625, + "loss": 2.468, + "step": 6860 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004916571307313737, + "loss": 2.5628, + "step": 6870 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004916329511510183, + "loss": 2.5458, + "step": 6880 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004916087371786251, + "loss": 2.4946, + "step": 6890 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004915844888176409, + "loss": 2.5286, + "step": 6900 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004915602060715168, + "loss": 2.5253, + "step": 6910 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004915358889437088, + "loss": 2.4748, + "step": 6920 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004915115374376784, + "loss": 2.5094, + "step": 6930 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004914871515568913, + "loss": 2.5571, + "step": 6940 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004914627313048185, + "loss": 2.5289, + "step": 6950 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004914382766849357, + "loss": 2.5304, + "step": 6960 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004914137877007237, + "loss": 2.4569, + "step": 6970 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004913892643556678, + "loss": 2.537, + "step": 6980 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004913647066532589, + "loss": 2.518, + "step": 6990 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004913401145969919, + "loss": 2.5473, + "step": 7000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004913154881903673, + "loss": 2.5467, + "step": 7010 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004912908274368902, + "loss": 2.5051, + "step": 7020 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004912661323400703, + "loss": 2.5434, + "step": 7030 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004912414029034229, + "loss": 2.5079, + "step": 7040 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004912166391304676, + "loss": 2.5231, + "step": 7050 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004911918410247289, + "loss": 2.5327, + "step": 7060 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004911670085897367, + "loss": 2.4631, + "step": 7070 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004911421418290253, + "loss": 2.4736, + "step": 7080 + }, + { + "epoch": 0.17, + "learning_rate": 0.000491117240746134, + "loss": 2.5329, + "step": 7090 + }, + { + "epoch": 0.17, + "learning_rate": 0.000491092305344607, + "loss": 2.5367, + "step": 7100 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004910673356279934, + "loss": 2.4862, + "step": 7110 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004910423315998472, + "loss": 2.5973, + "step": 7120 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004910172932637274, + "loss": 2.476, + "step": 7130 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004909922206231974, + "loss": 2.5796, + "step": 7140 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004909671136818262, + "loss": 2.5209, + "step": 7150 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004909419724431872, + "loss": 2.5255, + "step": 7160 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004909167969108588, + "loss": 2.528, + "step": 7170 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004908915870884241, + "loss": 2.5318, + "step": 7180 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004908663429794715, + "loss": 2.5298, + "step": 7190 + }, + { + "epoch": 0.17, + "learning_rate": 0.000490841064587594, + "loss": 2.5611, + "step": 7200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004908157519163893, + "loss": 2.5744, + "step": 7210 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004907904049694605, + "loss": 2.4975, + "step": 7220 + }, + { + "epoch": 0.17, + "learning_rate": 0.000490765023750415, + "loss": 2.5416, + "step": 7230 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004907396082628656, + "loss": 2.5316, + "step": 7240 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004907141585104296, + "loss": 2.4884, + "step": 7250 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004906886744967293, + "loss": 2.4595, + "step": 7260 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004906631562253918, + "loss": 2.6201, + "step": 7270 + }, + { + "epoch": 0.17, + "learning_rate": 0.0004906376037000494, + "loss": 2.5497, + "step": 7280 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004906120169243389, + "loss": 2.5548, + "step": 7290 + }, + { + "epoch": 0.18, + "learning_rate": 0.000490586395901902, + "loss": 2.5192, + "step": 7300 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004905607406363856, + "loss": 2.4508, + "step": 7310 + }, + { + "epoch": 0.18, + "learning_rate": 0.000490535051131441, + "loss": 2.5351, + "step": 7320 + }, + { + "epoch": 0.18, + "learning_rate": 0.000490509327390725, + "loss": 2.5056, + "step": 7330 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004904835694178986, + "loss": 2.5102, + "step": 7340 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004904577772166281, + "loss": 2.5897, + "step": 7350 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004904319507905844, + "loss": 2.5587, + "step": 7360 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004904060901434436, + "loss": 2.5699, + "step": 7370 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004903801952788865, + "loss": 2.4632, + "step": 7380 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004903542662005986, + "loss": 2.5195, + "step": 7390 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004903309007804499, + "loss": 2.4926, + "step": 7400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004903049067062451, + "loss": 2.5151, + "step": 7410 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004902788784290256, + "loss": 2.5064, + "step": 7420 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004902528159524959, + "loss": 2.5318, + "step": 7430 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004902267192803657, + "loss": 2.4508, + "step": 7440 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004902005884163492, + "loss": 2.5051, + "step": 7450 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004901744233641659, + "loss": 2.574, + "step": 7460 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004901482241275396, + "loss": 2.5474, + "step": 7470 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004901219907101995, + "loss": 2.5259, + "step": 7480 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004900957231158793, + "loss": 2.5176, + "step": 7490 + }, + { + "epoch": 0.18, + "learning_rate": 0.000490069421348318, + "loss": 2.4825, + "step": 7500 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004900430854112589, + "loss": 2.5205, + "step": 7510 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004900167153084504, + "loss": 2.5239, + "step": 7520 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004899903110436461, + "loss": 2.5319, + "step": 7530 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004899638726206039, + "loss": 2.548, + "step": 7540 + }, + { + "epoch": 0.18, + "learning_rate": 0.000489937400043087, + "loss": 2.5479, + "step": 7550 + }, + { + "epoch": 0.18, + "learning_rate": 0.000489910893314863, + "loss": 2.5116, + "step": 7560 + }, + { + "epoch": 0.18, + "learning_rate": 0.000489884352439705, + "loss": 2.5425, + "step": 7570 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004898577774213905, + "loss": 2.5161, + "step": 7580 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004898311682637018, + "loss": 2.4576, + "step": 7590 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004898045249704264, + "loss": 2.4789, + "step": 7600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004897778475453565, + "loss": 2.548, + "step": 7610 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004897511359922891, + "loss": 2.4826, + "step": 7620 + }, + { + "epoch": 0.18, + "learning_rate": 0.000489724390315026, + "loss": 2.5211, + "step": 7630 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004896976105173742, + "loss": 2.5603, + "step": 7640 + }, + { + "epoch": 0.18, + "learning_rate": 0.000489670796603145, + "loss": 2.5257, + "step": 7650 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004896439485761551, + "loss": 2.5226, + "step": 7660 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004896170664402257, + "loss": 2.5378, + "step": 7670 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004895901501991831, + "loss": 2.5309, + "step": 7680 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004895631998568583, + "loss": 2.5318, + "step": 7690 + }, + { + "epoch": 0.18, + "learning_rate": 0.0004895362154170871, + "loss": 2.5586, + "step": 7700 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004895091968837105, + "loss": 2.5026, + "step": 7710 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004894821442605736, + "loss": 2.5712, + "step": 7720 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004894550575515273, + "loss": 2.5526, + "step": 7730 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004894279367604268, + "loss": 2.5771, + "step": 7740 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004894007818911321, + "loss": 2.5358, + "step": 7750 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004893735929475083, + "loss": 2.5122, + "step": 7760 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004893463699334254, + "loss": 2.5698, + "step": 7770 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004893191128527577, + "loss": 2.5597, + "step": 7780 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004892918217093852, + "loss": 2.5425, + "step": 7790 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004892644965071919, + "loss": 2.596, + "step": 7800 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004892371372500674, + "loss": 2.5331, + "step": 7810 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004892097439419057, + "loss": 2.5161, + "step": 7820 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004891823165866055, + "loss": 2.5049, + "step": 7830 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004891548551880706, + "loss": 2.5163, + "step": 7840 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004891273597502101, + "loss": 2.5393, + "step": 7850 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004890998302769369, + "loss": 2.5318, + "step": 7860 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004890722667721697, + "loss": 2.4846, + "step": 7870 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004890446692398315, + "loss": 2.5587, + "step": 7880 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004890170376838503, + "loss": 2.4969, + "step": 7890 + }, + { + "epoch": 0.19, + "learning_rate": 0.000488989372108159, + "loss": 2.5095, + "step": 7900 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004889616725166951, + "loss": 2.4844, + "step": 7910 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004889339389134016, + "loss": 2.5708, + "step": 7920 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004889061713022253, + "loss": 2.5844, + "step": 7930 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004888783696871188, + "loss": 2.5585, + "step": 7940 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004888505340720391, + "loss": 2.5356, + "step": 7950 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004888226644609478, + "loss": 2.4999, + "step": 7960 + }, + { + "epoch": 0.19, + "learning_rate": 0.000488794760857812, + "loss": 2.5001, + "step": 7970 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004887668232666032, + "loss": 2.5503, + "step": 7980 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004887388516912976, + "loss": 2.5694, + "step": 7990 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004887108461358767, + "loss": 2.5327, + "step": 8000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004886828066043263, + "loss": 2.5373, + "step": 8010 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004886547331006375, + "loss": 2.5168, + "step": 8020 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004886266256288062, + "loss": 2.5754, + "step": 8030 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004885984841928326, + "loss": 2.5211, + "step": 8040 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004885703087967224, + "loss": 2.5467, + "step": 8050 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004885420994444858, + "loss": 2.5561, + "step": 8060 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004885138561401377, + "loss": 2.5086, + "step": 8070 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004884855788876984, + "loss": 2.5144, + "step": 8080 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004884572676911923, + "loss": 2.4998, + "step": 8090 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004884289225546491, + "loss": 2.4899, + "step": 8100 + }, + { + "epoch": 0.19, + "learning_rate": 0.0004884005434821032, + "loss": 2.5413, + "step": 8110 + }, + { + "epoch": 0.2, + "learning_rate": 0.000488372130477594, + "loss": 2.4236, + "step": 8120 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004883436835451654, + "loss": 2.5394, + "step": 8130 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004883152026888664, + "loss": 2.5144, + "step": 8140 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004882866879127505, + "loss": 2.5319, + "step": 8150 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048825813922087646, + "loss": 2.4748, + "step": 8160 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004882295566173077, + "loss": 2.5425, + "step": 8170 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048820094010611236, + "loss": 2.4608, + "step": 8180 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004881722896913635, + "loss": 2.5557, + "step": 8190 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004881436053771389, + "loss": 2.5616, + "step": 8200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004881148871675214, + "loss": 2.5322, + "step": 8210 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004880861350665983, + "loss": 2.5417, + "step": 8220 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004880573490784622, + "loss": 2.5991, + "step": 8230 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048802852920721, + "loss": 2.429, + "step": 8240 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004879996754569439, + "loss": 2.513, + "step": 8250 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048797078783177053, + "loss": 2.5191, + "step": 8260 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048794186633580164, + "loss": 2.5401, + "step": 8270 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048791291097315364, + "loss": 2.5169, + "step": 8280 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004878839217479478, + "loss": 2.5629, + "step": 8290 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048785489866431014, + "loss": 2.4349, + "step": 8300 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048782584172637165, + "loss": 2.5095, + "step": 8310 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048779675093826803, + "loss": 2.4796, + "step": 8320 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048776762630413985, + "loss": 2.5365, + "step": 8330 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048773846782813234, + "loss": 2.5191, + "step": 8340 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048770927551439593, + "loss": 2.5618, + "step": 8350 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048768004936708534, + "loss": 2.5195, + "step": 8360 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004876507893903605, + "loss": 2.5409, + "step": 8370 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048762149558838607, + "loss": 2.4873, + "step": 8380 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048759216796533144, + "loss": 2.5492, + "step": 8390 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004875628065253709, + "loss": 2.5033, + "step": 8400 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048753341127268334, + "loss": 2.5711, + "step": 8410 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048750398221145293, + "loss": 2.538, + "step": 8420 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004874745193458682, + "loss": 2.5147, + "step": 8430 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048744502268012257, + "loss": 2.4645, + "step": 8440 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004874154922184145, + "loss": 2.568, + "step": 8450 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004873859279649471, + "loss": 2.5086, + "step": 8460 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004873563299239281, + "loss": 2.4851, + "step": 8470 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048732669809957056, + "loss": 2.5279, + "step": 8480 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004872970324960918, + "loss": 2.5428, + "step": 8490 + }, + { + "epoch": 0.2, + "learning_rate": 0.0004872673331177142, + "loss": 2.5061, + "step": 8500 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048723759996866504, + "loss": 2.5048, + "step": 8510 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048720783305317616, + "loss": 2.4591, + "step": 8520 + }, + { + "epoch": 0.2, + "learning_rate": 0.00048717803237548446, + "loss": 2.5629, + "step": 8530 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048714819793983143, + "loss": 2.4768, + "step": 8540 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048711832975046346, + "loss": 2.5034, + "step": 8550 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048708842781163176, + "loss": 2.5183, + "step": 8560 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004870584921275923, + "loss": 2.518, + "step": 8570 + }, + { + "epoch": 0.21, + "learning_rate": 0.000487028522702606, + "loss": 2.5804, + "step": 8580 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004869985195409383, + "loss": 2.5195, + "step": 8590 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004869684826468597, + "loss": 2.4545, + "step": 8600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004869384120246454, + "loss": 2.4323, + "step": 8610 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004869083076785753, + "loss": 2.4878, + "step": 8620 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048687816961293437, + "loss": 2.5676, + "step": 8630 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048684799783201205, + "loss": 2.4564, + "step": 8640 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048681779234010283, + "loss": 2.5636, + "step": 8650 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048678755314150593, + "loss": 2.5314, + "step": 8660 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048675728024052527, + "loss": 2.5547, + "step": 8670 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048672697364146974, + "loss": 2.5618, + "step": 8680 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004866966333486528, + "loss": 2.5123, + "step": 8690 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048666625936639297, + "loss": 2.5665, + "step": 8700 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048663585169901336, + "loss": 2.5149, + "step": 8710 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048660541035084205, + "loss": 2.5337, + "step": 8720 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004865749353262116, + "loss": 2.4607, + "step": 8730 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048654442662945973, + "loss": 2.5306, + "step": 8740 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004865138842649288, + "loss": 2.4574, + "step": 8750 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048648330823696586, + "loss": 2.4983, + "step": 8760 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048645269854992294, + "loss": 2.5543, + "step": 8770 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048642205520815676, + "loss": 2.583, + "step": 8780 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048639137821602883, + "loss": 2.4791, + "step": 8790 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004863606675779054, + "loss": 2.542, + "step": 8800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004863299232981576, + "loss": 2.4648, + "step": 8810 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004862991453811614, + "loss": 2.4428, + "step": 8820 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048626833383129744, + "loss": 2.5686, + "step": 8830 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048623748865295104, + "loss": 2.5727, + "step": 8840 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048620660985051263, + "loss": 2.5765, + "step": 8850 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048617569742837713, + "loss": 2.4668, + "step": 8860 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048614475139094446, + "loss": 2.5309, + "step": 8870 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004861137717426191, + "loss": 2.573, + "step": 8880 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004860827584878105, + "loss": 2.513, + "step": 8890 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004860517116309329, + "loss": 2.5, + "step": 8900 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004860206311764052, + "loss": 2.5337, + "step": 8910 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048598951712865103, + "loss": 2.5854, + "step": 8920 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048595836949209896, + "loss": 2.5265, + "step": 8930 + }, + { + "epoch": 0.21, + "learning_rate": 0.00048592718827118243, + "loss": 2.5244, + "step": 8940 + }, + { + "epoch": 0.21, + "learning_rate": 0.0004858959734703393, + "loss": 2.5271, + "step": 8950 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048586472509401263, + "loss": 2.5153, + "step": 8960 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048583344314664993, + "loss": 2.4984, + "step": 8970 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004858021276327037, + "loss": 2.5511, + "step": 8980 + }, + { + "epoch": 0.22, + "learning_rate": 0.000485770778556631, + "loss": 2.5409, + "step": 8990 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004857393959228939, + "loss": 2.5153, + "step": 9000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004857079797359591, + "loss": 2.593, + "step": 9010 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004856765300002982, + "loss": 2.5274, + "step": 9020 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004856450467203875, + "loss": 2.521, + "step": 9030 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004856135299007078, + "loss": 2.5454, + "step": 9040 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004855819795457453, + "loss": 2.4956, + "step": 9050 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004855503956599904, + "loss": 2.4448, + "step": 9060 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004855187782479386, + "loss": 2.5293, + "step": 9070 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048548712731409, + "loss": 2.5492, + "step": 9080 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004854554428629496, + "loss": 2.5481, + "step": 9090 + }, + { + "epoch": 0.22, + "learning_rate": 0.000485423724899027, + "loss": 2.4744, + "step": 9100 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004853919734268367, + "loss": 2.489, + "step": 9110 + }, + { + "epoch": 0.22, + "learning_rate": 0.000485360188450898, + "loss": 2.5009, + "step": 9120 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048532836997573493, + "loss": 2.5212, + "step": 9130 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048529651800587617, + "loss": 2.5175, + "step": 9140 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004852646325458553, + "loss": 2.5256, + "step": 9150 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048523271360021065, + "loss": 2.4885, + "step": 9160 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048520076117348533, + "loss": 2.5278, + "step": 9170 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048516877527022704, + "loss": 2.5603, + "step": 9180 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048513675589498864, + "loss": 2.5113, + "step": 9190 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048510470305232733, + "loss": 2.4844, + "step": 9200 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048507261674680523, + "loss": 2.4827, + "step": 9210 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004850404969829894, + "loss": 2.5388, + "step": 9220 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004850083437654512, + "loss": 2.5186, + "step": 9230 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004849761570987674, + "loss": 2.5464, + "step": 9240 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048494393698751904, + "loss": 2.57, + "step": 9250 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048491168343629197, + "loss": 2.5314, + "step": 9260 + }, + { + "epoch": 0.22, + "learning_rate": 0.000484879396449677, + "loss": 2.4585, + "step": 9270 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048484707603226957, + "loss": 2.5167, + "step": 9280 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004848147221886699, + "loss": 2.528, + "step": 9290 + }, + { + "epoch": 0.22, + "learning_rate": 0.000484782334923483, + "loss": 2.4785, + "step": 9300 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048474991424131853, + "loss": 2.5506, + "step": 9310 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004847174601467911, + "loss": 2.5288, + "step": 9320 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004846849726445198, + "loss": 2.4625, + "step": 9330 + }, + { + "epoch": 0.22, + "learning_rate": 0.00048465245173912876, + "loss": 2.5387, + "step": 9340 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004846198974352467, + "loss": 2.5236, + "step": 9350 + }, + { + "epoch": 0.22, + "learning_rate": 0.0004845873097375071, + "loss": 2.6822, + "step": 9360 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048455468865054826, + "loss": 2.5064, + "step": 9370 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048452203417901317, + "loss": 2.4583, + "step": 9380 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004844893463275496, + "loss": 2.493, + "step": 9390 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004844566251008101, + "loss": 2.5302, + "step": 9400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004844238705034519, + "loss": 2.5663, + "step": 9410 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048439108254013714, + "loss": 2.4936, + "step": 9420 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004843582612155324, + "loss": 2.4725, + "step": 9430 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004843254065343093, + "loss": 2.5412, + "step": 9440 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004842925185011441, + "loss": 2.5752, + "step": 9450 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048425959712071774, + "loss": 2.5842, + "step": 9460 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048422664239771606, + "loss": 2.5258, + "step": 9470 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004841936543368296, + "loss": 2.5472, + "step": 9480 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048416063294275347, + "loss": 2.4784, + "step": 9490 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004841275782201878, + "loss": 2.5275, + "step": 9500 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048409449017383725, + "loss": 2.5766, + "step": 9510 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048406136880841126, + "loss": 2.4899, + "step": 9520 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004840282141286242, + "loss": 2.5405, + "step": 9530 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048399502613919484, + "loss": 2.5145, + "step": 9540 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048396180484484706, + "loss": 2.5267, + "step": 9550 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048392855025030914, + "loss": 2.5416, + "step": 9560 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048389526236031444, + "loss": 2.5031, + "step": 9570 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004838619411796007, + "loss": 2.486, + "step": 9580 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048382858671291073, + "loss": 2.4852, + "step": 9590 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004837951989649919, + "loss": 2.5473, + "step": 9600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004837617779405963, + "loss": 2.5644, + "step": 9610 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048372832364448084, + "loss": 2.4929, + "step": 9620 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004836948360814071, + "loss": 2.5748, + "step": 9630 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048366131525614137, + "loss": 2.4606, + "step": 9640 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048362776117345487, + "loss": 2.5153, + "step": 9650 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004835941738381233, + "loss": 2.5266, + "step": 9660 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004835605532549272, + "loss": 2.5345, + "step": 9670 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048352689942865196, + "loss": 2.5751, + "step": 9680 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004834932123640875, + "loss": 2.5734, + "step": 9690 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004834594920660286, + "loss": 2.5082, + "step": 9700 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048342573853927455, + "loss": 2.4498, + "step": 9710 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048339195178862985, + "loss": 2.5814, + "step": 9720 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004833581318189032, + "loss": 2.5311, + "step": 9730 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048332427863490837, + "loss": 2.5387, + "step": 9740 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048329039224146364, + "loss": 2.529, + "step": 9750 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004832564726433922, + "loss": 2.5404, + "step": 9760 + }, + { + "epoch": 0.23, + "learning_rate": 0.00048322251984552183, + "loss": 2.5415, + "step": 9770 + }, + { + "epoch": 0.23, + "learning_rate": 0.0004831885338526852, + "loss": 2.4912, + "step": 9780 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004831545146697195, + "loss": 2.4882, + "step": 9790 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004831204623014667, + "loss": 2.5481, + "step": 9800 + }, + { + "epoch": 0.24, + "learning_rate": 0.00048308637675277365, + "loss": 2.5279, + "step": 9810 + }, + { + "epoch": 0.24, + "learning_rate": 0.00048305225802849173, + "loss": 2.5243, + "step": 9820 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004830181061334771, + "loss": 2.5333, + "step": 9830 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004829839210725908, + "loss": 2.5383, + "step": 9840 + }, + { + "epoch": 0.24, + "learning_rate": 0.00048294970285069826, + "loss": 2.5704, + "step": 9850 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004829154514726699, + "loss": 2.5403, + "step": 9860 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004828811669433808, + "loss": 2.535, + "step": 9870 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004828468492677107, + "loss": 2.4692, + "step": 9880 + }, + { + "epoch": 0.24, + "learning_rate": 0.00048281249845054413, + "loss": 2.5308, + "step": 9890 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004827781144967703, + "loss": 2.4852, + "step": 9900 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004827436974112831, + "loss": 2.4735, + "step": 9910 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004827092471989811, + "loss": 2.5586, + "step": 9920 + }, + { + "epoch": 0.24, + "learning_rate": 0.00048267476386476793, + "loss": 2.5126, + "step": 9930 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004826402474135514, + "loss": 2.5112, + "step": 9940 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004826056978502443, + "loss": 2.5092, + "step": 9950 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004825711151797643, + "loss": 2.5596, + "step": 9960 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004825364994070335, + "loss": 2.5572, + "step": 9970 + }, + { + "epoch": 0.24, + "learning_rate": 0.00048250185053697893, + "loss": 2.5777, + "step": 9980 + }, + { + "epoch": 0.24, + "learning_rate": 0.00048246716857453203, + "loss": 2.536, + "step": 9990 + }, + { + "epoch": 0.24, + "learning_rate": 0.0004824324535246293, + "loss": 2.5377, + "step": 10000 + } + ], + "max_steps": 83272, + "num_train_epochs": 2, + "total_flos": 8.253517770868654e+18, + "trial_name": null, + "trial_params": null +}